cxx11_tensor_reverse_sycl.cpp (9283B)
1 // This file is part of Eigen, a lightweight C++ template library 2 // for linear algebra. 3 // 4 // Copyright (C) 2015 5 // Mehdi Goli Codeplay Software Ltd. 6 // Ralph Potter Codeplay Software Ltd. 7 // Luke Iwanski Codeplay Software Ltd. 8 // Contact: <eigen@codeplay.com> 9 // 10 // This Source Code Form is subject to the terms of the Mozilla 11 // Public License v. 2.0. If a copy of the MPL was not distributed 12 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. 13 14 #define EIGEN_TEST_NO_LONGDOUBLE 15 #define EIGEN_TEST_NO_COMPLEX 16 17 #define EIGEN_DEFAULT_DENSE_INDEX_TYPE int64_t 18 #define EIGEN_USE_SYCL 19 20 #include "main.h" 21 #include <unsupported/Eigen/CXX11/Tensor> 22 23 template <typename DataType, int DataLayout, typename IndexType> 24 static void test_simple_reverse(const Eigen::SyclDevice& sycl_device) { 25 IndexType dim1 = 2; 26 IndexType dim2 = 3; 27 IndexType dim3 = 5; 28 IndexType dim4 = 7; 29 30 array<IndexType, 4> tensorRange = {{dim1, dim2, dim3, dim4}}; 31 Tensor<DataType, 4, DataLayout, IndexType> tensor(tensorRange); 32 Tensor<DataType, 4, DataLayout, IndexType> reversed_tensor(tensorRange); 33 tensor.setRandom(); 34 35 array<bool, 4> dim_rev; 36 dim_rev[0] = false; 37 dim_rev[1] = true; 38 dim_rev[2] = true; 39 dim_rev[3] = false; 40 41 DataType* gpu_in_data = static_cast<DataType*>( 42 sycl_device.allocate(tensor.dimensions().TotalSize() * sizeof(DataType))); 43 DataType* gpu_out_data = static_cast<DataType*>(sycl_device.allocate( 44 reversed_tensor.dimensions().TotalSize() * sizeof(DataType))); 45 46 TensorMap<Tensor<DataType, 4, DataLayout, IndexType> > in_gpu(gpu_in_data, 47 tensorRange); 48 TensorMap<Tensor<DataType, 4, DataLayout, IndexType> > out_gpu(gpu_out_data, 49 tensorRange); 50 51 sycl_device.memcpyHostToDevice( 52 gpu_in_data, tensor.data(), 53 (tensor.dimensions().TotalSize()) * sizeof(DataType)); 54 out_gpu.device(sycl_device) = in_gpu.reverse(dim_rev); 55 sycl_device.memcpyDeviceToHost( 56 reversed_tensor.data(), gpu_out_data, 57 reversed_tensor.dimensions().TotalSize() * sizeof(DataType)); 58 // Check that the CPU and GPU reductions return the same result. 59 for (IndexType i = 0; i < 2; ++i) { 60 for (IndexType j = 0; j < 3; ++j) { 61 for (IndexType k = 0; k < 5; ++k) { 62 for (IndexType l = 0; l < 7; ++l) { 63 VERIFY_IS_EQUAL(tensor(i, j, k, l), 64 reversed_tensor(i, 2 - j, 4 - k, l)); 65 } 66 } 67 } 68 } 69 dim_rev[0] = true; 70 dim_rev[1] = false; 71 dim_rev[2] = false; 72 dim_rev[3] = false; 73 74 out_gpu.device(sycl_device) = in_gpu.reverse(dim_rev); 75 sycl_device.memcpyDeviceToHost( 76 reversed_tensor.data(), gpu_out_data, 77 reversed_tensor.dimensions().TotalSize() * sizeof(DataType)); 78 79 for (IndexType i = 0; i < 2; ++i) { 80 for (IndexType j = 0; j < 3; ++j) { 81 for (IndexType k = 0; k < 5; ++k) { 82 for (IndexType l = 0; l < 7; ++l) { 83 VERIFY_IS_EQUAL(tensor(i, j, k, l), reversed_tensor(1 - i, j, k, l)); 84 } 85 } 86 } 87 } 88 89 dim_rev[0] = true; 90 dim_rev[1] = false; 91 dim_rev[2] = false; 92 dim_rev[3] = true; 93 out_gpu.device(sycl_device) = in_gpu.reverse(dim_rev); 94 sycl_device.memcpyDeviceToHost( 95 reversed_tensor.data(), gpu_out_data, 96 reversed_tensor.dimensions().TotalSize() * sizeof(DataType)); 97 98 for (IndexType i = 0; i < 2; ++i) { 99 for (IndexType j = 0; j < 3; ++j) { 100 for (IndexType k = 0; k < 5; ++k) { 101 for (IndexType l = 0; l < 7; ++l) { 102 VERIFY_IS_EQUAL(tensor(i, j, k, l), 103 reversed_tensor(1 - i, j, k, 6 - l)); 104 } 105 } 106 } 107 } 108 109 sycl_device.deallocate(gpu_in_data); 110 sycl_device.deallocate(gpu_out_data); 111 } 112 113 template <typename DataType, int DataLayout, typename IndexType> 114 static void test_expr_reverse(const Eigen::SyclDevice& sycl_device, 115 bool LValue) { 116 IndexType dim1 = 2; 117 IndexType dim2 = 3; 118 IndexType dim3 = 5; 119 IndexType dim4 = 7; 120 121 array<IndexType, 4> tensorRange = {{dim1, dim2, dim3, dim4}}; 122 Tensor<DataType, 4, DataLayout, IndexType> tensor(tensorRange); 123 Tensor<DataType, 4, DataLayout, IndexType> expected(tensorRange); 124 Tensor<DataType, 4, DataLayout, IndexType> result(tensorRange); 125 tensor.setRandom(); 126 127 array<bool, 4> dim_rev; 128 dim_rev[0] = false; 129 dim_rev[1] = true; 130 dim_rev[2] = false; 131 dim_rev[3] = true; 132 133 DataType* gpu_in_data = static_cast<DataType*>( 134 sycl_device.allocate(tensor.dimensions().TotalSize() * sizeof(DataType))); 135 DataType* gpu_out_data_expected = static_cast<DataType*>(sycl_device.allocate( 136 expected.dimensions().TotalSize() * sizeof(DataType))); 137 DataType* gpu_out_data_result = static_cast<DataType*>( 138 sycl_device.allocate(result.dimensions().TotalSize() * sizeof(DataType))); 139 140 TensorMap<Tensor<DataType, 4, DataLayout, IndexType> > in_gpu(gpu_in_data, 141 tensorRange); 142 TensorMap<Tensor<DataType, 4, DataLayout, IndexType> > out_gpu_expected( 143 gpu_out_data_expected, tensorRange); 144 TensorMap<Tensor<DataType, 4, DataLayout, IndexType> > out_gpu_result( 145 gpu_out_data_result, tensorRange); 146 147 sycl_device.memcpyHostToDevice( 148 gpu_in_data, tensor.data(), 149 (tensor.dimensions().TotalSize()) * sizeof(DataType)); 150 151 if (LValue) { 152 out_gpu_expected.reverse(dim_rev).device(sycl_device) = in_gpu; 153 } else { 154 out_gpu_expected.device(sycl_device) = in_gpu.reverse(dim_rev); 155 } 156 sycl_device.memcpyDeviceToHost( 157 expected.data(), gpu_out_data_expected, 158 expected.dimensions().TotalSize() * sizeof(DataType)); 159 160 array<IndexType, 4> src_slice_dim; 161 src_slice_dim[0] = 2; 162 src_slice_dim[1] = 3; 163 src_slice_dim[2] = 1; 164 src_slice_dim[3] = 7; 165 array<IndexType, 4> src_slice_start; 166 src_slice_start[0] = 0; 167 src_slice_start[1] = 0; 168 src_slice_start[2] = 0; 169 src_slice_start[3] = 0; 170 array<IndexType, 4> dst_slice_dim = src_slice_dim; 171 array<IndexType, 4> dst_slice_start = src_slice_start; 172 173 for (IndexType i = 0; i < 5; ++i) { 174 if (LValue) { 175 out_gpu_result.slice(dst_slice_start, dst_slice_dim) 176 .reverse(dim_rev) 177 .device(sycl_device) = in_gpu.slice(src_slice_start, src_slice_dim); 178 } else { 179 out_gpu_result.slice(dst_slice_start, dst_slice_dim).device(sycl_device) = 180 in_gpu.slice(src_slice_start, src_slice_dim).reverse(dim_rev); 181 } 182 src_slice_start[2] += 1; 183 dst_slice_start[2] += 1; 184 } 185 sycl_device.memcpyDeviceToHost( 186 result.data(), gpu_out_data_result, 187 result.dimensions().TotalSize() * sizeof(DataType)); 188 189 for (IndexType i = 0; i < expected.dimension(0); ++i) { 190 for (IndexType j = 0; j < expected.dimension(1); ++j) { 191 for (IndexType k = 0; k < expected.dimension(2); ++k) { 192 for (IndexType l = 0; l < expected.dimension(3); ++l) { 193 VERIFY_IS_EQUAL(result(i, j, k, l), expected(i, j, k, l)); 194 } 195 } 196 } 197 } 198 199 dst_slice_start[2] = 0; 200 result.setRandom(); 201 sycl_device.memcpyHostToDevice( 202 gpu_out_data_result, result.data(), 203 (result.dimensions().TotalSize()) * sizeof(DataType)); 204 for (IndexType i = 0; i < 5; ++i) { 205 if (LValue) { 206 out_gpu_result.slice(dst_slice_start, dst_slice_dim) 207 .reverse(dim_rev) 208 .device(sycl_device) = in_gpu.slice(dst_slice_start, dst_slice_dim); 209 } else { 210 out_gpu_result.slice(dst_slice_start, dst_slice_dim).device(sycl_device) = 211 in_gpu.reverse(dim_rev).slice(dst_slice_start, dst_slice_dim); 212 } 213 dst_slice_start[2] += 1; 214 } 215 sycl_device.memcpyDeviceToHost( 216 result.data(), gpu_out_data_result, 217 result.dimensions().TotalSize() * sizeof(DataType)); 218 219 for (IndexType i = 0; i < expected.dimension(0); ++i) { 220 for (IndexType j = 0; j < expected.dimension(1); ++j) { 221 for (IndexType k = 0; k < expected.dimension(2); ++k) { 222 for (IndexType l = 0; l < expected.dimension(3); ++l) { 223 VERIFY_IS_EQUAL(result(i, j, k, l), expected(i, j, k, l)); 224 } 225 } 226 } 227 } 228 } 229 230 template <typename DataType> 231 void sycl_reverse_test_per_device(const cl::sycl::device& d) { 232 QueueInterface queueInterface(d); 233 auto sycl_device = Eigen::SyclDevice(&queueInterface); 234 test_simple_reverse<DataType, RowMajor, int64_t>(sycl_device); 235 test_simple_reverse<DataType, ColMajor, int64_t>(sycl_device); 236 test_expr_reverse<DataType, RowMajor, int64_t>(sycl_device, false); 237 test_expr_reverse<DataType, ColMajor, int64_t>(sycl_device, false); 238 test_expr_reverse<DataType, RowMajor, int64_t>(sycl_device, true); 239 test_expr_reverse<DataType, ColMajor, int64_t>(sycl_device, true); 240 } 241 EIGEN_DECLARE_TEST(cxx11_tensor_reverse_sycl) { 242 for (const auto& device : Eigen::get_sycl_supported_devices()) { 243 std::cout << "Running on " 244 << device.get_info<cl::sycl::info::device::name>() << std::endl; 245 CALL_SUBTEST_1(sycl_reverse_test_per_device<short>(device)); 246 CALL_SUBTEST_2(sycl_reverse_test_per_device<int>(device)); 247 CALL_SUBTEST_3(sycl_reverse_test_per_device<unsigned int>(device)); 248 #ifdef EIGEN_SYCL_DOUBLE_SUPPORT 249 CALL_SUBTEST_4(sycl_reverse_test_per_device<double>(device)); 250 #endif 251 CALL_SUBTEST_5(sycl_reverse_test_per_device<float>(device)); 252 } 253 }