cxx11_tensor_convolution_sycl.cpp (20033B)
1 // This file is part of Eigen, a lightweight C++ template library 2 // for linear algebra. 3 // 4 // Copyright (C) 2016 5 // Mehdi Goli Codeplay Software Ltd. 6 // Ralph Potter Codeplay Software Ltd. 7 // Luke Iwanski Codeplay Software Ltd. 8 // Contact: <eigen@codeplay.com> 9 // 10 // This Source Code Form is subject to the terms of the Mozilla 11 // Public License v. 2.0. If a copy of the MPL was not distributed 12 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. 13 14 #define EIGEN_TEST_NO_LONGDOUBLE 15 #define EIGEN_TEST_NO_COMPLEX 16 17 #define EIGEN_DEFAULT_DENSE_INDEX_TYPE int64_t 18 #define EIGEN_USE_SYCL 19 20 #include <iostream> 21 #include <chrono> 22 #include <ctime> 23 24 #include "main.h" 25 #include <unsupported/Eigen/CXX11/Tensor> 26 #include <iomanip> 27 28 using Eigen::array; 29 using Eigen::SyclDevice; 30 using Eigen::Tensor; 31 using Eigen::TensorMap; 32 static const float error_threshold =1e-4f; 33 34 35 template <typename DataType, int DataLayout, typename IndexType> 36 static void test_larg_expr1D(const Eigen::SyclDevice& sycl_device) 37 { 38 IndexType indim0 =53; 39 IndexType indim1= 55; 40 IndexType indim2= 51; 41 IndexType outdim0=50; 42 IndexType outdim1=55; 43 IndexType outdim2=51; 44 Eigen::array<IndexType, 3> input_dims = {{indim0, indim1, indim2}}; 45 Eigen::array<IndexType, 1> kernel_dims = {{4}}; 46 Eigen::array<IndexType, 3> result_dims = {{outdim0, outdim1, outdim2}}; 47 48 Tensor<DataType, 3, DataLayout, IndexType> input(input_dims); 49 Tensor<DataType, 1, DataLayout,IndexType> kernel(kernel_dims); 50 Tensor<DataType, 3, DataLayout,IndexType> result(result_dims); 51 Tensor<DataType, 3, DataLayout,IndexType> result_host(result_dims); 52 53 Eigen::array<IndexType, 1> dims3{{0}}; 54 55 input.setRandom(); 56 kernel.setRandom(); 57 result.setZero(); 58 result_host.setZero(); 59 60 std::size_t input_bytes = input.size() * sizeof(DataType); 61 std::size_t kernel_bytes = kernel.size() * sizeof(DataType); 62 std::size_t result_bytes = result.size() * sizeof(DataType); 63 64 DataType * d_input = static_cast<DataType*>(sycl_device.allocate(input_bytes)); 65 DataType * d_kernel = static_cast<DataType*>(sycl_device.allocate(kernel_bytes)); 66 DataType * d_result = static_cast<DataType*>(sycl_device.allocate(result_bytes)); 67 68 Eigen::TensorMap<Eigen::Tensor<DataType, 3, DataLayout, IndexType> > gpu_input(d_input, input_dims); 69 Eigen::TensorMap<Eigen::Tensor<DataType, 1, DataLayout, IndexType> > gpu_kernel(d_kernel, kernel_dims); 70 Eigen::TensorMap<Eigen::Tensor<DataType, 3, DataLayout, IndexType> > gpu_result(d_result, result_dims); 71 sycl_device.memcpyHostToDevice(d_input, input.data(), input_bytes); 72 sycl_device.memcpyHostToDevice(d_kernel, kernel.data(), kernel_bytes); 73 74 gpu_result.device(sycl_device)=gpu_input.convolve(gpu_kernel, dims3); 75 sycl_device.memcpyDeviceToHost(result.data(), d_result, result_bytes); 76 77 result_host=input.convolve(kernel, dims3); 78 79 for(IndexType i=0; i< outdim0; i++ ){ 80 for(IndexType j=0; j< outdim1; j++ ){ 81 for(IndexType k=0; k< outdim2; k++ ){ 82 if (!(Eigen::internal::isApprox(result(i,j,k), result_host(i,j,k), error_threshold))) { 83 std::cout <<std::setprecision(16)<< "mismatch detected at index ( "<< i << " , " << j << ", " << k << " ) " << " \t " << result(i,j,k) << " vs "<< result_host(i,j,k) << std::endl; 84 assert(false); 85 } 86 } 87 } 88 } 89 sycl_device.deallocate(d_input); 90 sycl_device.deallocate(d_kernel); 91 sycl_device.deallocate(d_result); 92 93 } 94 95 96 template <typename DataType, int DataLayout, typename IndexType> 97 static void test_larg_expr2D(const Eigen::SyclDevice& sycl_device) 98 { 99 IndexType indim0 =53; 100 IndexType indim1= 55; 101 IndexType indim2= 51; 102 IndexType outdim0=50; 103 IndexType outdim1=51; 104 IndexType outdim2=51; 105 Eigen::array<IndexType, 3> input_dims = {{indim0, indim1, indim2}}; 106 Eigen::array<IndexType, 2> kernel_dims = {{4,5}}; 107 Eigen::array<IndexType, 3> result_dims = {{outdim0, outdim1, outdim2}}; 108 109 Tensor<DataType, 3, DataLayout, IndexType> input(input_dims); 110 Tensor<DataType, 2, DataLayout,IndexType> kernel(kernel_dims); 111 Tensor<DataType, 3, DataLayout,IndexType> result(result_dims); 112 Tensor<DataType, 3, DataLayout,IndexType> result_host(result_dims); 113 114 Eigen::array<IndexType, 2> dims3{{0,1}}; 115 116 input.setRandom(); 117 kernel.setRandom(); 118 result.setZero(); 119 result_host.setZero(); 120 121 std::size_t input_bytes = input.size() * sizeof(DataType); 122 std::size_t kernel_bytes = kernel.size() * sizeof(DataType); 123 std::size_t result_bytes = result.size() * sizeof(DataType); 124 125 DataType * d_input = static_cast<DataType*>(sycl_device.allocate(input_bytes)); 126 DataType * d_kernel = static_cast<DataType*>(sycl_device.allocate(kernel_bytes)); 127 DataType * d_result = static_cast<DataType*>(sycl_device.allocate(result_bytes)); 128 129 Eigen::TensorMap<Eigen::Tensor<DataType, 3, DataLayout, IndexType> > gpu_input(d_input, input_dims); 130 Eigen::TensorMap<Eigen::Tensor<DataType, 2, DataLayout, IndexType> > gpu_kernel(d_kernel, kernel_dims); 131 Eigen::TensorMap<Eigen::Tensor<DataType, 3, DataLayout, IndexType> > gpu_result(d_result, result_dims); 132 sycl_device.memcpyHostToDevice(d_input, input.data(), input_bytes); 133 sycl_device.memcpyHostToDevice(d_kernel, kernel.data(), kernel_bytes); 134 135 gpu_result.device(sycl_device)=gpu_input.convolve(gpu_kernel, dims3); 136 sycl_device.memcpyDeviceToHost(result.data(), d_result, result_bytes); 137 138 result_host=input.convolve(kernel, dims3); 139 140 for(IndexType i=0; i< outdim0; i++ ){ 141 for(IndexType j=0; j< outdim1; j++ ){ 142 for(IndexType k=0; k< outdim2; k++ ){ 143 if (!(Eigen::internal::isApprox(result(i,j,k), result_host(i,j,k), error_threshold))) { 144 std::cout <<std::setprecision(16)<< "mismatch detected at index ( "<< i << " , " << j << ", " << k << " ) " << " \t " << result(i,j,k) << " vs "<< result_host(i,j,k) << std::endl; 145 assert(false); 146 } 147 } 148 } 149 } 150 sycl_device.deallocate(d_input); 151 sycl_device.deallocate(d_kernel); 152 sycl_device.deallocate(d_result); 153 154 } 155 156 157 template <typename DataType, int DataLayout, typename IndexType> 158 static void test_larg_expr3D(const Eigen::SyclDevice& sycl_device) 159 { 160 IndexType indim0 =53; 161 IndexType indim1= 55; 162 IndexType indim2= 51; 163 IndexType outdim0=50; 164 IndexType outdim1=51; 165 IndexType outdim2=49; 166 Eigen::array<IndexType, 3> input_dims = {{indim0, indim1, indim2}}; 167 Eigen::array<IndexType, 3> kernel_dims = {{4,5,3}}; 168 Eigen::array<IndexType, 3> result_dims = {{outdim0, outdim1, outdim2}}; 169 170 Tensor<DataType, 3, DataLayout, IndexType> input(input_dims); 171 Tensor<DataType, 3, DataLayout,IndexType> kernel(kernel_dims); 172 Tensor<DataType, 3, DataLayout,IndexType> result(result_dims); 173 Tensor<DataType, 3, DataLayout,IndexType> result_host(result_dims); 174 175 Eigen::array<IndexType, 3> dims3{{0,1,2}}; 176 177 input.setRandom(); 178 kernel.setRandom(); 179 result.setZero(); 180 result_host.setZero(); 181 182 std::size_t input_bytes = input.size() * sizeof(DataType); 183 std::size_t kernel_bytes = kernel.size() * sizeof(DataType); 184 std::size_t result_bytes = result.size() * sizeof(DataType); 185 186 DataType * d_input = static_cast<DataType*>(sycl_device.allocate(input_bytes)); 187 DataType * d_kernel = static_cast<DataType*>(sycl_device.allocate(kernel_bytes)); 188 DataType * d_result = static_cast<DataType*>(sycl_device.allocate(result_bytes)); 189 190 Eigen::TensorMap<Eigen::Tensor<DataType, 3, DataLayout, IndexType> > gpu_input(d_input, input_dims); 191 Eigen::TensorMap<Eigen::Tensor<DataType, 3, DataLayout, IndexType> > gpu_kernel(d_kernel, kernel_dims); 192 Eigen::TensorMap<Eigen::Tensor<DataType, 3, DataLayout, IndexType> > gpu_result(d_result, result_dims); 193 sycl_device.memcpyHostToDevice(d_input, input.data(), input_bytes); 194 sycl_device.memcpyHostToDevice(d_kernel, kernel.data(), kernel_bytes); 195 196 gpu_result.device(sycl_device)=gpu_input.convolve(gpu_kernel, dims3); 197 sycl_device.memcpyDeviceToHost(result.data(), d_result, result_bytes); 198 199 result_host=input.convolve(kernel, dims3); 200 201 for(IndexType i=0; i< outdim0; i++ ){ 202 for(IndexType j=0; j< outdim1; j++ ){ 203 for(IndexType k=0; k< outdim2; k++ ){ 204 if (!(Eigen::internal::isApprox(result(i,j,k), result_host(i,j,k), error_threshold))) { 205 std::cout <<std::setprecision(16)<< "mismatch detected at index ( "<< i << " , " << j << ", " << k << " ) " << " \t " << result(i,j,k) << " vs "<< result_host(i,j,k) << std::endl; 206 assert(false); 207 } 208 } 209 } 210 } 211 sycl_device.deallocate(d_input); 212 sycl_device.deallocate(d_kernel); 213 sycl_device.deallocate(d_result); 214 215 } 216 217 218 template <typename DataType, int DataLayout, typename IndexType> 219 static void test_evals(const Eigen::SyclDevice& sycl_device) 220 { 221 Eigen::array<IndexType, 2> input_dims = {{3, 3}}; 222 Eigen::array<IndexType, 1> kernel_dims = {{2}}; 223 Eigen::array<IndexType, 2> result_dims = {{2, 3}}; 224 225 Tensor<DataType, 2, DataLayout, IndexType> input(input_dims); 226 Tensor<DataType, 1, DataLayout,IndexType> kernel(kernel_dims); 227 Tensor<DataType, 2, DataLayout,IndexType> result(result_dims); 228 229 Eigen::array<IndexType, 1> dims3{{0}}; 230 231 input.setRandom(); 232 kernel.setRandom(); 233 result.setZero(); 234 235 std::size_t input_bytes = input.size() * sizeof(DataType); 236 std::size_t kernel_bytes = kernel.size() * sizeof(DataType); 237 std::size_t result_bytes = result.size() * sizeof(DataType); 238 239 DataType * d_input = static_cast<DataType*>(sycl_device.allocate(input_bytes)); 240 DataType * d_kernel = static_cast<DataType*>(sycl_device.allocate(kernel_bytes)); 241 DataType * d_result = static_cast<DataType*>(sycl_device.allocate(result_bytes)); 242 243 Eigen::TensorMap<Eigen::Tensor<DataType, 2, DataLayout, IndexType> > gpu_input(d_input, input_dims); 244 Eigen::TensorMap<Eigen::Tensor<DataType, 1, DataLayout, IndexType> > gpu_kernel(d_kernel, kernel_dims); 245 Eigen::TensorMap<Eigen::Tensor<DataType, 2, DataLayout, IndexType> > gpu_result(d_result, result_dims); 246 sycl_device.memcpyHostToDevice(d_input, input.data(), input_bytes); 247 sycl_device.memcpyHostToDevice(d_kernel, kernel.data(), kernel_bytes); 248 249 gpu_result.device(sycl_device)=gpu_input.convolve(gpu_kernel, dims3); 250 sycl_device.memcpyDeviceToHost(result.data(), d_result, result_bytes); 251 252 VERIFY_IS_APPROX(result(0,0), input(0,0)*kernel(0) + input(1,0)*kernel(1)); // index 0 253 VERIFY_IS_APPROX(result(0,1), input(0,1)*kernel(0) + input(1,1)*kernel(1)); // index 2 254 VERIFY_IS_APPROX(result(0,2), input(0,2)*kernel(0) + input(1,2)*kernel(1)); // index 4 255 VERIFY_IS_APPROX(result(1,0), input(1,0)*kernel(0) + input(2,0)*kernel(1)); // index 1 256 VERIFY_IS_APPROX(result(1,1), input(1,1)*kernel(0) + input(2,1)*kernel(1)); // index 3 257 VERIFY_IS_APPROX(result(1,2), input(1,2)*kernel(0) + input(2,2)*kernel(1)); // index 5 258 259 sycl_device.deallocate(d_input); 260 sycl_device.deallocate(d_kernel); 261 sycl_device.deallocate(d_result); 262 } 263 264 template <typename DataType, int DataLayout, typename IndexType> 265 static void test_expr(const Eigen::SyclDevice& sycl_device) 266 { 267 Eigen::array<IndexType, 2> input_dims = {{3, 3}}; 268 Eigen::array<IndexType, 2> kernel_dims = {{2, 2}}; 269 Eigen::array<IndexType, 2> result_dims = {{2, 2}}; 270 271 Tensor<DataType, 2, DataLayout, IndexType> input(input_dims); 272 Tensor<DataType, 2, DataLayout, IndexType> kernel(kernel_dims); 273 Tensor<DataType, 2, DataLayout, IndexType> result(result_dims); 274 275 input.setRandom(); 276 kernel.setRandom(); 277 Eigen::array<IndexType, 2> dims; 278 dims[0] = 0; 279 dims[1] = 1; 280 281 std::size_t input_bytes = input.size() * sizeof(DataType); 282 std::size_t kernel_bytes = kernel.size() * sizeof(DataType); 283 std::size_t result_bytes = result.size() * sizeof(DataType); 284 285 DataType * d_input = static_cast<DataType*>(sycl_device.allocate(input_bytes)); 286 DataType * d_kernel = static_cast<DataType*>(sycl_device.allocate(kernel_bytes)); 287 DataType * d_result = static_cast<DataType*>(sycl_device.allocate(result_bytes)); 288 289 Eigen::TensorMap<Eigen::Tensor<DataType, 2, DataLayout,IndexType> > gpu_input(d_input, input_dims); 290 Eigen::TensorMap<Eigen::Tensor<DataType, 2, DataLayout,IndexType> > gpu_kernel(d_kernel, kernel_dims); 291 Eigen::TensorMap<Eigen::Tensor<DataType, 2, DataLayout,IndexType> > gpu_result(d_result, result_dims); 292 sycl_device.memcpyHostToDevice(d_input, input.data(), input_bytes); 293 sycl_device.memcpyHostToDevice(d_kernel, kernel.data(), kernel_bytes); 294 295 gpu_result.device(sycl_device)=gpu_input.convolve(gpu_kernel, dims); 296 sycl_device.memcpyDeviceToHost(result.data(), d_result, result_bytes); 297 298 VERIFY_IS_APPROX(result(0,0), input(0,0)*kernel(0,0) + input(0,1)*kernel(0,1) + 299 input(1,0)*kernel(1,0) + input(1,1)*kernel(1,1)); 300 VERIFY_IS_APPROX(result(0,1), input(0,1)*kernel(0,0) + input(0,2)*kernel(0,1) + 301 input(1,1)*kernel(1,0) + input(1,2)*kernel(1,1)); 302 VERIFY_IS_APPROX(result(1,0), input(1,0)*kernel(0,0) + input(1,1)*kernel(0,1) + 303 input(2,0)*kernel(1,0) + input(2,1)*kernel(1,1)); 304 VERIFY_IS_APPROX(result(1,1), input(1,1)*kernel(0,0) + input(1,2)*kernel(0,1) + 305 input(2,1)*kernel(1,0) + input(2,2)*kernel(1,1)); 306 307 sycl_device.deallocate(d_input); 308 sycl_device.deallocate(d_kernel); 309 sycl_device.deallocate(d_result); 310 } 311 312 313 template <typename DataType, int DataLayout, typename IndexType> 314 static void test_modes(const Eigen::SyclDevice& sycl_device){ 315 316 Eigen::array<IndexType, 1> input_dims = {{3}}; 317 Eigen::array<IndexType, 1> kernel_dims = {{3}}; 318 319 Tensor<DataType, 1, DataLayout, IndexType> input(input_dims); 320 Tensor<DataType, 1, DataLayout, IndexType> kernel(kernel_dims); 321 322 input.setRandom(); 323 kernel.setRandom(); 324 Eigen::array<IndexType, 1> dims; 325 dims[0] = 0; 326 327 input(0) = 1.0f; 328 input(1) = 2.0f; 329 input(2) = 3.0f; 330 kernel(0) = 0.5f; 331 kernel(1) = 1.0f; 332 kernel(2) = 0.0f; 333 334 Eigen::array<std::pair<IndexType, IndexType>, 1> padding; 335 336 // Emulate VALID mode (as defined in 337 // http://docs.scipy.org/doc/numpy/reference/generated/numpy.convolve.html). 338 padding[0] = std::make_pair(0, 0); 339 Tensor<DataType, 1, DataLayout, IndexType> valid(1); 340 341 std::size_t input_bytes = input.size() * sizeof(DataType); 342 std::size_t kernel_bytes = kernel.size() * sizeof(DataType); 343 std::size_t valid_bytes = valid.size() * sizeof(DataType); 344 345 DataType * d_input = static_cast<DataType*>(sycl_device.allocate(input_bytes)); 346 DataType * d_kernel = static_cast<DataType*>(sycl_device.allocate(kernel_bytes)); 347 DataType * d_valid = static_cast<DataType*>(sycl_device.allocate(valid_bytes)); 348 349 Eigen::TensorMap<Eigen::Tensor<DataType, 1, DataLayout,IndexType> > gpu_input(d_input, input_dims); 350 Eigen::TensorMap<Eigen::Tensor<DataType, 1, DataLayout,IndexType> > gpu_kernel(d_kernel, kernel_dims); 351 Eigen::TensorMap<Eigen::Tensor<DataType, 1, DataLayout,IndexType> > gpu_valid(d_valid, valid.dimensions()); 352 sycl_device.memcpyHostToDevice(d_input, input.data(), input_bytes); 353 sycl_device.memcpyHostToDevice(d_kernel, kernel.data(), kernel_bytes); 354 355 gpu_valid.device(sycl_device)=gpu_input.pad(padding).convolve(gpu_kernel, dims); 356 sycl_device.memcpyDeviceToHost(valid.data(), d_valid, valid_bytes); 357 358 VERIFY_IS_EQUAL(valid.dimension(0), 1); 359 VERIFY_IS_APPROX(valid(0), 2.5f); 360 361 // Emulate SAME mode (as defined in 362 // http://docs.scipy.org/doc/numpy/reference/generated/numpy.convolve.html). 363 padding[0] = std::make_pair(1, 1); 364 Tensor<DataType, 1, DataLayout, IndexType> same(3); 365 std::size_t same_bytes = same.size() * sizeof(DataType); 366 DataType * d_same = static_cast<DataType*>(sycl_device.allocate(same_bytes)); 367 Eigen::TensorMap<Eigen::Tensor<DataType, 1, DataLayout,IndexType> > gpu_same(d_same, same.dimensions()); 368 gpu_same.device(sycl_device)=gpu_input.pad(padding).convolve(gpu_kernel, dims); 369 sycl_device.memcpyDeviceToHost(same.data(), d_same, same_bytes); 370 371 VERIFY_IS_EQUAL(same.dimension(0), 3); 372 VERIFY_IS_APPROX(same(0), 1.0f); 373 VERIFY_IS_APPROX(same(1), 2.5f); 374 VERIFY_IS_APPROX(same(2), 4.0f); 375 376 // Emulate FULL mode (as defined in 377 // http://docs.scipy.org/doc/numpy/reference/generated/numpy.convolve.html). 378 padding[0] = std::make_pair(2, 2); 379 380 Tensor<DataType, 1, DataLayout, IndexType> full(5); 381 std::size_t full_bytes = full.size() * sizeof(DataType); 382 DataType * d_full = static_cast<DataType*>(sycl_device.allocate(full_bytes)); 383 Eigen::TensorMap<Eigen::Tensor<DataType, 1, DataLayout,IndexType> > gpu_full(d_full, full.dimensions()); 384 gpu_full.device(sycl_device)=gpu_input.pad(padding).convolve(gpu_kernel, dims); 385 sycl_device.memcpyDeviceToHost(full.data(), d_full, full_bytes); 386 387 VERIFY_IS_EQUAL(full.dimension(0), 5); 388 VERIFY_IS_APPROX(full(0), 0.0f); 389 VERIFY_IS_APPROX(full(1), 1.0f); 390 VERIFY_IS_APPROX(full(2), 2.5f); 391 VERIFY_IS_APPROX(full(3), 4.0f); 392 VERIFY_IS_APPROX(full(4), 1.5f); 393 394 sycl_device.deallocate(d_input); 395 sycl_device.deallocate(d_kernel); 396 sycl_device.deallocate(d_valid); 397 sycl_device.deallocate(d_same); 398 sycl_device.deallocate(d_full); 399 400 } 401 402 template <typename DataType, int DataLayout, typename IndexType> 403 static void test_strides(const Eigen::SyclDevice& sycl_device){ 404 405 Eigen::array<IndexType, 1> input_dims = {{13}}; 406 Eigen::array<IndexType, 1> kernel_dims = {{3}}; 407 408 Tensor<DataType, 1, DataLayout, IndexType> input(input_dims); 409 Tensor<DataType, 1, DataLayout, IndexType> kernel(kernel_dims); 410 Tensor<DataType, 1, DataLayout, IndexType> result(2); 411 412 input.setRandom(); 413 kernel.setRandom(); 414 Eigen::array<IndexType, 1> dims; 415 dims[0] = 0; 416 417 Eigen::array<IndexType, 1> stride_of_3; 418 stride_of_3[0] = 3; 419 Eigen::array<IndexType, 1> stride_of_2; 420 stride_of_2[0] = 2; 421 422 std::size_t input_bytes = input.size() * sizeof(DataType); 423 std::size_t kernel_bytes = kernel.size() * sizeof(DataType); 424 std::size_t result_bytes = result.size() * sizeof(DataType); 425 426 DataType * d_input = static_cast<DataType*>(sycl_device.allocate(input_bytes)); 427 DataType * d_kernel = static_cast<DataType*>(sycl_device.allocate(kernel_bytes)); 428 DataType * d_result = static_cast<DataType*>(sycl_device.allocate(result_bytes)); 429 430 Eigen::TensorMap<Eigen::Tensor<DataType, 1, DataLayout,IndexType> > gpu_input(d_input, input_dims); 431 Eigen::TensorMap<Eigen::Tensor<DataType, 1, DataLayout,IndexType> > gpu_kernel(d_kernel, kernel_dims); 432 Eigen::TensorMap<Eigen::Tensor<DataType, 1, DataLayout,IndexType> > gpu_result(d_result, result.dimensions()); 433 sycl_device.memcpyHostToDevice(d_input, input.data(), input_bytes); 434 sycl_device.memcpyHostToDevice(d_kernel, kernel.data(), kernel_bytes); 435 436 gpu_result.device(sycl_device)=gpu_input.stride(stride_of_3).convolve(gpu_kernel, dims).stride(stride_of_2); 437 sycl_device.memcpyDeviceToHost(result.data(), d_result, result_bytes); 438 439 VERIFY_IS_EQUAL(result.dimension(0), 2); 440 VERIFY_IS_APPROX(result(0), (input(0)*kernel(0) + input(3)*kernel(1) + 441 input(6)*kernel(2))); 442 VERIFY_IS_APPROX(result(1), (input(6)*kernel(0) + input(9)*kernel(1) + 443 input(12)*kernel(2))); 444 } 445 446 template <typename Dev_selector> void tensorConvolutionPerDevice(Dev_selector& s){ 447 QueueInterface queueInterface(s); 448 auto sycl_device=Eigen::SyclDevice(&queueInterface); 449 test_larg_expr1D<float, RowMajor, int64_t>(sycl_device); 450 test_larg_expr1D<float, ColMajor, int64_t>(sycl_device); 451 test_larg_expr2D<float, RowMajor, int64_t>(sycl_device); 452 test_larg_expr2D<float, ColMajor, int64_t>(sycl_device); 453 test_larg_expr3D<float, RowMajor, int64_t>(sycl_device); 454 test_larg_expr3D<float, ColMajor, int64_t>(sycl_device); 455 test_evals<float, ColMajor, int64_t>(sycl_device); 456 test_evals<float, RowMajor, int64_t>(sycl_device); 457 test_expr<float, ColMajor, int64_t>(sycl_device); 458 test_expr<float, RowMajor, int64_t>(sycl_device); 459 test_modes<float, ColMajor, int64_t>(sycl_device); 460 test_modes<float, RowMajor, int64_t>(sycl_device); 461 test_strides<float, ColMajor, int64_t>(sycl_device); 462 test_strides<float, RowMajor, int64_t>(sycl_device); 463 } 464 465 EIGEN_DECLARE_TEST(cxx11_tensor_convolution_sycl) { 466 for (const auto& device :Eigen::get_sycl_supported_devices()) { 467 CALL_SUBTEST(tensorConvolutionPerDevice(device)); 468 } 469 }