cxx11_tensor_striding_sycl.cpp (7074B)
1 // This file is part of Eigen, a lightweight C++ template library 2 // for linear algebra. 3 // 4 // Copyright (C) 2016 5 // Mehdi Goli Codeplay Software Ltd. 6 // Ralph Potter Codeplay Software Ltd. 7 // Luke Iwanski Codeplay Software Ltd. 8 // Contact: <eigen@codeplay.com> 9 // 10 // This Source Code Form is subject to the terms of the Mozilla 11 // Public License v. 2.0. If a copy of the MPL was not distributed 12 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. 13 14 #define EIGEN_TEST_NO_LONGDOUBLE 15 #define EIGEN_TEST_NO_COMPLEX 16 17 #define EIGEN_DEFAULT_DENSE_INDEX_TYPE int64_t 18 #define EIGEN_USE_SYCL 19 20 #include <iostream> 21 #include <chrono> 22 #include <ctime> 23 24 #include "main.h" 25 #include <unsupported/Eigen/CXX11/Tensor> 26 27 using Eigen::array; 28 using Eigen::SyclDevice; 29 using Eigen::Tensor; 30 using Eigen::TensorMap; 31 32 33 template <typename DataType, int DataLayout, typename IndexType> 34 static void test_simple_striding(const Eigen::SyclDevice& sycl_device) 35 { 36 37 Eigen::array<IndexType, 4> tensor_dims = {{2,3,5,7}}; 38 Eigen::array<IndexType, 4> stride_dims = {{1,1,3,3}}; 39 40 41 Tensor<DataType, 4, DataLayout, IndexType> tensor(tensor_dims); 42 Tensor<DataType, 4, DataLayout,IndexType> no_stride(tensor_dims); 43 Tensor<DataType, 4, DataLayout,IndexType> stride(stride_dims); 44 45 46 std::size_t tensor_bytes = tensor.size() * sizeof(DataType); 47 std::size_t no_stride_bytes = no_stride.size() * sizeof(DataType); 48 std::size_t stride_bytes = stride.size() * sizeof(DataType); 49 DataType * d_tensor = static_cast<DataType*>(sycl_device.allocate(tensor_bytes)); 50 DataType * d_no_stride = static_cast<DataType*>(sycl_device.allocate(no_stride_bytes)); 51 DataType * d_stride = static_cast<DataType*>(sycl_device.allocate(stride_bytes)); 52 53 Eigen::TensorMap<Eigen::Tensor<DataType, 4, DataLayout, IndexType> > gpu_tensor(d_tensor, tensor_dims); 54 Eigen::TensorMap<Eigen::Tensor<DataType, 4, DataLayout, IndexType> > gpu_no_stride(d_no_stride, tensor_dims); 55 Eigen::TensorMap<Eigen::Tensor<DataType, 4, DataLayout, IndexType> > gpu_stride(d_stride, stride_dims); 56 57 58 tensor.setRandom(); 59 array<IndexType, 4> strides; 60 strides[0] = 1; 61 strides[1] = 1; 62 strides[2] = 1; 63 strides[3] = 1; 64 sycl_device.memcpyHostToDevice(d_tensor, tensor.data(), tensor_bytes); 65 gpu_no_stride.device(sycl_device)=gpu_tensor.stride(strides); 66 sycl_device.memcpyDeviceToHost(no_stride.data(), d_no_stride, no_stride_bytes); 67 68 //no_stride = tensor.stride(strides); 69 70 VERIFY_IS_EQUAL(no_stride.dimension(0), 2); 71 VERIFY_IS_EQUAL(no_stride.dimension(1), 3); 72 VERIFY_IS_EQUAL(no_stride.dimension(2), 5); 73 VERIFY_IS_EQUAL(no_stride.dimension(3), 7); 74 75 for (IndexType i = 0; i < 2; ++i) { 76 for (IndexType j = 0; j < 3; ++j) { 77 for (IndexType k = 0; k < 5; ++k) { 78 for (IndexType l = 0; l < 7; ++l) { 79 VERIFY_IS_EQUAL(tensor(i,j,k,l), no_stride(i,j,k,l)); 80 } 81 } 82 } 83 } 84 85 strides[0] = 2; 86 strides[1] = 4; 87 strides[2] = 2; 88 strides[3] = 3; 89 //Tensor<float, 4, DataLayout> stride; 90 // stride = tensor.stride(strides); 91 92 gpu_stride.device(sycl_device)=gpu_tensor.stride(strides); 93 sycl_device.memcpyDeviceToHost(stride.data(), d_stride, stride_bytes); 94 95 VERIFY_IS_EQUAL(stride.dimension(0), 1); 96 VERIFY_IS_EQUAL(stride.dimension(1), 1); 97 VERIFY_IS_EQUAL(stride.dimension(2), 3); 98 VERIFY_IS_EQUAL(stride.dimension(3), 3); 99 100 for (IndexType i = 0; i < 1; ++i) { 101 for (IndexType j = 0; j < 1; ++j) { 102 for (IndexType k = 0; k < 3; ++k) { 103 for (IndexType l = 0; l < 3; ++l) { 104 VERIFY_IS_EQUAL(tensor(2*i,4*j,2*k,3*l), stride(i,j,k,l)); 105 } 106 } 107 } 108 } 109 110 sycl_device.deallocate(d_tensor); 111 sycl_device.deallocate(d_no_stride); 112 sycl_device.deallocate(d_stride); 113 } 114 115 template <typename DataType, int DataLayout, typename IndexType> 116 static void test_striding_as_lvalue(const Eigen::SyclDevice& sycl_device) 117 { 118 119 Eigen::array<IndexType, 4> tensor_dims = {{2,3,5,7}}; 120 Eigen::array<IndexType, 4> stride_dims = {{3,12,10,21}}; 121 122 123 Tensor<DataType, 4, DataLayout, IndexType> tensor(tensor_dims); 124 Tensor<DataType, 4, DataLayout,IndexType> no_stride(stride_dims); 125 Tensor<DataType, 4, DataLayout,IndexType> stride(stride_dims); 126 127 128 std::size_t tensor_bytes = tensor.size() * sizeof(DataType); 129 std::size_t no_stride_bytes = no_stride.size() * sizeof(DataType); 130 std::size_t stride_bytes = stride.size() * sizeof(DataType); 131 132 DataType * d_tensor = static_cast<DataType*>(sycl_device.allocate(tensor_bytes)); 133 DataType * d_no_stride = static_cast<DataType*>(sycl_device.allocate(no_stride_bytes)); 134 DataType * d_stride = static_cast<DataType*>(sycl_device.allocate(stride_bytes)); 135 136 Eigen::TensorMap<Eigen::Tensor<DataType, 4, DataLayout, IndexType> > gpu_tensor(d_tensor, tensor_dims); 137 Eigen::TensorMap<Eigen::Tensor<DataType, 4, DataLayout, IndexType> > gpu_no_stride(d_no_stride, stride_dims); 138 Eigen::TensorMap<Eigen::Tensor<DataType, 4, DataLayout, IndexType> > gpu_stride(d_stride, stride_dims); 139 140 //Tensor<float, 4, DataLayout> tensor(2,3,5,7); 141 tensor.setRandom(); 142 array<IndexType, 4> strides; 143 strides[0] = 2; 144 strides[1] = 4; 145 strides[2] = 2; 146 strides[3] = 3; 147 148 // Tensor<float, 4, DataLayout> result(3, 12, 10, 21); 149 // result.stride(strides) = tensor; 150 sycl_device.memcpyHostToDevice(d_tensor, tensor.data(), tensor_bytes); 151 gpu_stride.stride(strides).device(sycl_device)=gpu_tensor; 152 sycl_device.memcpyDeviceToHost(stride.data(), d_stride, stride_bytes); 153 154 for (IndexType i = 0; i < 2; ++i) { 155 for (IndexType j = 0; j < 3; ++j) { 156 for (IndexType k = 0; k < 5; ++k) { 157 for (IndexType l = 0; l < 7; ++l) { 158 VERIFY_IS_EQUAL(tensor(i,j,k,l), stride(2*i,4*j,2*k,3*l)); 159 } 160 } 161 } 162 } 163 164 array<IndexType, 4> no_strides; 165 no_strides[0] = 1; 166 no_strides[1] = 1; 167 no_strides[2] = 1; 168 no_strides[3] = 1; 169 // Tensor<float, 4, DataLayout> result2(3, 12, 10, 21); 170 // result2.stride(strides) = tensor.stride(no_strides); 171 172 gpu_no_stride.stride(strides).device(sycl_device)=gpu_tensor.stride(no_strides); 173 sycl_device.memcpyDeviceToHost(no_stride.data(), d_no_stride, no_stride_bytes); 174 175 for (IndexType i = 0; i < 2; ++i) { 176 for (IndexType j = 0; j < 3; ++j) { 177 for (IndexType k = 0; k < 5; ++k) { 178 for (IndexType l = 0; l < 7; ++l) { 179 VERIFY_IS_EQUAL(tensor(i,j,k,l), no_stride(2*i,4*j,2*k,3*l)); 180 } 181 } 182 } 183 } 184 sycl_device.deallocate(d_tensor); 185 sycl_device.deallocate(d_no_stride); 186 sycl_device.deallocate(d_stride); 187 } 188 189 190 template <typename Dev_selector> void tensorStridingPerDevice(Dev_selector& s){ 191 QueueInterface queueInterface(s); 192 auto sycl_device=Eigen::SyclDevice(&queueInterface); 193 test_simple_striding<float, ColMajor, int64_t>(sycl_device); 194 test_simple_striding<float, RowMajor, int64_t>(sycl_device); 195 test_striding_as_lvalue<float, ColMajor, int64_t>(sycl_device); 196 test_striding_as_lvalue<float, RowMajor, int64_t>(sycl_device); 197 } 198 199 EIGEN_DECLARE_TEST(cxx11_tensor_striding_sycl) { 200 for (const auto& device :Eigen::get_sycl_supported_devices()) { 201 CALL_SUBTEST(tensorStridingPerDevice(device)); 202 } 203 }