cart-elc

Source code for CART-ELC
git clone git://git.laack.co/cart-elc.git
Log | Files | Refs | README | LICENSE

cxx11_tensor_image_patch_sycl.cpp (62111B)


      1 // This file is part of Eigen, a lightweight C++ template library
      2 // for linear algebra.
      3 //
      4 // Copyright (C) 2016
      5 // Mehdi Goli    Codeplay Software Ltd.
      6 // Ralph Potter  Codeplay Software Ltd.
      7 // Luke Iwanski  Codeplay Software Ltd.
      8 // Contact: <eigen@codeplay.com>
      9 //
     10 // This Source Code Form is subject to the terms of the Mozilla
     11 // Public License v. 2.0. If a copy of the MPL was not distributed
     12 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
     13 
     14 #define EIGEN_TEST_NO_LONGDOUBLE
     15 #define EIGEN_TEST_NO_COMPLEX
     16 
     17 #define EIGEN_DEFAULT_DENSE_INDEX_TYPE int64_t
     18 #define EIGEN_USE_SYCL
     19 
     20 #include "main.h"
     21 #include <unsupported/Eigen/CXX11/Tensor>
     22 
     23 using Eigen::Tensor;
     24 static const int DataLayout = ColMajor;
     25 
     26 template <typename DataType, typename IndexType>
     27 static void test_simple_image_patch_sycl(const Eigen::SyclDevice& sycl_device)
     28 {
     29   IndexType sizeDim1 = 2;
     30   IndexType sizeDim2 = 3;
     31   IndexType sizeDim3 = 5;
     32   IndexType sizeDim4 = 7;
     33   array<IndexType, 4> tensorColMajorRange = {{sizeDim1, sizeDim2, sizeDim3, sizeDim4}};
     34   array<IndexType, 4> tensorRowMajorRange = {{sizeDim4, sizeDim3, sizeDim2, sizeDim1}};
     35   Tensor<DataType, 4, DataLayout,IndexType> tensor_col_major(tensorColMajorRange);
     36   Tensor<DataType, 4, RowMajor,IndexType> tensor_row_major(tensorRowMajorRange);
     37   tensor_col_major.setRandom();
     38 
     39   DataType* gpu_data_col_major  = static_cast<DataType*>(sycl_device.allocate(tensor_col_major.size()*sizeof(DataType)));
     40   DataType* gpu_data_row_major  = static_cast<DataType*>(sycl_device.allocate(tensor_row_major.size()*sizeof(DataType)));
     41   TensorMap<Tensor<DataType, 4, ColMajor, IndexType>> gpu_col_major(gpu_data_col_major, tensorColMajorRange);
     42   TensorMap<Tensor<DataType, 4, RowMajor, IndexType>> gpu_row_major(gpu_data_row_major, tensorRowMajorRange);
     43 
     44   sycl_device.memcpyHostToDevice(gpu_data_col_major, tensor_col_major.data(),(tensor_col_major.size())*sizeof(DataType));
     45   gpu_row_major.device(sycl_device)=gpu_col_major.swap_layout();
     46   sycl_device.memcpyDeviceToHost(tensor_row_major.data(), gpu_data_row_major, (tensor_col_major.size())*sizeof(DataType));
     47 
     48   VERIFY_IS_EQUAL(tensor_col_major.dimension(0), tensor_row_major.dimension(3));
     49   VERIFY_IS_EQUAL(tensor_col_major.dimension(1), tensor_row_major.dimension(2));
     50   VERIFY_IS_EQUAL(tensor_col_major.dimension(2), tensor_row_major.dimension(1));
     51   VERIFY_IS_EQUAL(tensor_col_major.dimension(3), tensor_row_major.dimension(0));
     52 
     53   // Single pixel patch: ColMajor
     54   array<IndexType, 5> patchColMajorTensorRange={{sizeDim1, 1, 1, sizeDim2*sizeDim3, sizeDim4}};
     55   Tensor<DataType, 5, DataLayout,IndexType> single_patch_col_major(patchColMajorTensorRange);
     56   size_t patchTensorBuffSize =single_patch_col_major.size()*sizeof(DataType);
     57   DataType* gpu_data_single_patch_col_major  = static_cast<DataType*>(sycl_device.allocate(patchTensorBuffSize));
     58   TensorMap<Tensor<DataType, 5, DataLayout,IndexType>> gpu_single_patch_col_major(gpu_data_single_patch_col_major, patchColMajorTensorRange);
     59   gpu_single_patch_col_major.device(sycl_device)=gpu_col_major.extract_image_patches(1, 1);
     60   sycl_device.memcpyDeviceToHost(single_patch_col_major.data(), gpu_data_single_patch_col_major, patchTensorBuffSize);
     61 
     62   VERIFY_IS_EQUAL(single_patch_col_major.dimension(0), 2);
     63   VERIFY_IS_EQUAL(single_patch_col_major.dimension(1), 1);
     64   VERIFY_IS_EQUAL(single_patch_col_major.dimension(2), 1);
     65   VERIFY_IS_EQUAL(single_patch_col_major.dimension(3), 3*5);
     66   VERIFY_IS_EQUAL(single_patch_col_major.dimension(4), 7);
     67 
     68   // Single pixel patch: RowMajor
     69   array<IndexType, 5> patchRowMajorTensorRange={{sizeDim4, sizeDim2*sizeDim3, 1, 1, sizeDim1}};
     70   Tensor<DataType, 5, RowMajor,IndexType> single_patch_row_major(patchRowMajorTensorRange);
     71   patchTensorBuffSize =single_patch_row_major.size()*sizeof(DataType);
     72   DataType* gpu_data_single_patch_row_major  = static_cast<DataType*>(sycl_device.allocate(patchTensorBuffSize));
     73   TensorMap<Tensor<DataType, 5, RowMajor,IndexType>> gpu_single_patch_row_major(gpu_data_single_patch_row_major, patchRowMajorTensorRange);
     74   gpu_single_patch_row_major.device(sycl_device)=gpu_row_major.extract_image_patches(1, 1);
     75   sycl_device.memcpyDeviceToHost(single_patch_row_major.data(), gpu_data_single_patch_row_major, patchTensorBuffSize);
     76 
     77   VERIFY_IS_EQUAL(single_patch_row_major.dimension(0), 7);
     78   VERIFY_IS_EQUAL(single_patch_row_major.dimension(1), 3*5);
     79   VERIFY_IS_EQUAL(single_patch_row_major.dimension(2), 1);
     80   VERIFY_IS_EQUAL(single_patch_row_major.dimension(3), 1);
     81   VERIFY_IS_EQUAL(single_patch_row_major.dimension(4), 2);
     82 
     83   for (IndexType i = 0; i < tensor_col_major.size(); ++i) {
     84     // ColMajor
     85     if (tensor_col_major.data()[i] != single_patch_col_major.data()[i]) {
     86       std::cout << "Mismatch detected at index colmajor " << i << " : "
     87            << tensor_col_major.data()[i] << " vs " << single_patch_col_major.data()[i]
     88            << std::endl;
     89     }
     90     VERIFY_IS_EQUAL(single_patch_col_major.data()[i], tensor_col_major.data()[i]);
     91     // RowMajor
     92     if (tensor_row_major.data()[i] != single_patch_row_major.data()[i]) {
     93       std::cout << "Mismatch detected at index row major" << i << " : "
     94            << tensor_row_major.data()[i] << " vs "
     95            << single_patch_row_major.data()[i] << std::endl;
     96     }
     97     VERIFY_IS_EQUAL(single_patch_row_major.data()[i],
     98                     tensor_row_major.data()[i]);
     99     VERIFY_IS_EQUAL(tensor_col_major.data()[i], tensor_row_major.data()[i]);
    100     VERIFY_IS_EQUAL(single_patch_col_major.data()[i],
    101                     single_patch_row_major.data()[i]);
    102   }
    103 
    104 
    105   // Entire image patch: ColMajor
    106   patchColMajorTensorRange={{sizeDim1, sizeDim2, sizeDim3, sizeDim2*sizeDim3, sizeDim4}};
    107   Tensor<DataType, 5, DataLayout,IndexType> entire_image_patch_col_major(patchColMajorTensorRange);
    108   patchTensorBuffSize =entire_image_patch_col_major.size()*sizeof(DataType);
    109   DataType* gpu_data_entire_image_patch_col_major  = static_cast<DataType*>(sycl_device.allocate(patchTensorBuffSize));
    110   TensorMap<Tensor<DataType, 5, DataLayout,IndexType>> gpu_entire_image_patch_col_major(gpu_data_entire_image_patch_col_major, patchColMajorTensorRange);
    111   gpu_entire_image_patch_col_major.device(sycl_device)=gpu_col_major.extract_image_patches(3, 5);
    112   sycl_device.memcpyDeviceToHost(entire_image_patch_col_major.data(), gpu_data_entire_image_patch_col_major, patchTensorBuffSize);
    113 
    114   VERIFY_IS_EQUAL(entire_image_patch_col_major.dimension(0), 2);
    115   VERIFY_IS_EQUAL(entire_image_patch_col_major.dimension(1), 3);
    116   VERIFY_IS_EQUAL(entire_image_patch_col_major.dimension(2), 5);
    117   VERIFY_IS_EQUAL(entire_image_patch_col_major.dimension(3), 3*5);
    118   VERIFY_IS_EQUAL(entire_image_patch_col_major.dimension(4), 7);
    119 
    120   // Entire image patch: RowMajor
    121   patchRowMajorTensorRange={{sizeDim4, sizeDim2*sizeDim3, sizeDim3, sizeDim2, sizeDim1}};
    122   Tensor<DataType, 5, RowMajor,IndexType> entire_image_patch_row_major(patchRowMajorTensorRange);
    123   patchTensorBuffSize =entire_image_patch_row_major.size()*sizeof(DataType);
    124   DataType* gpu_data_entire_image_patch_row_major  = static_cast<DataType*>(sycl_device.allocate(patchTensorBuffSize));
    125   TensorMap<Tensor<DataType, 5, RowMajor,IndexType>> gpu_entire_image_patch_row_major(gpu_data_entire_image_patch_row_major, patchRowMajorTensorRange);
    126   gpu_entire_image_patch_row_major.device(sycl_device)=gpu_row_major.extract_image_patches(3, 5);
    127   sycl_device.memcpyDeviceToHost(entire_image_patch_row_major.data(), gpu_data_entire_image_patch_row_major, patchTensorBuffSize);
    128 
    129   VERIFY_IS_EQUAL(entire_image_patch_row_major.dimension(0), 7);
    130   VERIFY_IS_EQUAL(entire_image_patch_row_major.dimension(1), 3*5);
    131   VERIFY_IS_EQUAL(entire_image_patch_row_major.dimension(2), 5);
    132   VERIFY_IS_EQUAL(entire_image_patch_row_major.dimension(3), 3);
    133   VERIFY_IS_EQUAL(entire_image_patch_row_major.dimension(4), 2);
    134 
    135   for (IndexType i = 0; i < 3; ++i) {
    136     for (IndexType j = 0; j < 5; ++j) {
    137       IndexType patchId = i+3*j;
    138       for (IndexType r = 0; r < 3; ++r) {
    139         for (IndexType c = 0; c < 5; ++c) {
    140           for (IndexType d = 0; d < 2; ++d) {
    141             for (IndexType b = 0; b < 7; ++b) {
    142               DataType expected_col_major = 0.0f;
    143               DataType expected_row_major = 0.0f;
    144               if (r-1+i >= 0 && c-2+j >= 0 && r-1+i < 3 && c-2+j < 5) {
    145                 expected_col_major = tensor_col_major(d, r-1+i, c-2+j, b);
    146                 expected_row_major = tensor_row_major(b, c-2+j, r-1+i, d);
    147               }
    148               // ColMajor
    149               if (entire_image_patch_col_major(d, r, c, patchId, b) != expected_col_major) {
    150                 std::cout << "Mismatch detected at index i=" << i << " j=" << j << " r=" << r << " c=" << c << " d=" << d << " b=" << b << std::endl;
    151               }
    152               VERIFY_IS_EQUAL(entire_image_patch_col_major(d, r, c, patchId, b), expected_col_major);
    153               // RowMajor
    154               if (entire_image_patch_row_major(b, patchId, c, r, d) !=
    155                   expected_row_major) {
    156                 std::cout << "Mismatch detected at index i=" << i << " j=" << j
    157                      << " r=" << r << " c=" << c << " d=" << d << " b=" << b
    158                      << std::endl;
    159               }
    160               VERIFY_IS_EQUAL(entire_image_patch_row_major(b, patchId, c, r, d),
    161                               expected_row_major);
    162               // Check that ColMajor and RowMajor agree.
    163               VERIFY_IS_EQUAL(expected_col_major, expected_row_major);
    164             }
    165           }
    166         }
    167       }
    168     }
    169   }
    170 
    171   // 2D patch: ColMajor
    172   patchColMajorTensorRange={{sizeDim1, 2, 2, sizeDim2*sizeDim3, sizeDim4}};
    173   Tensor<DataType, 5, DataLayout,IndexType> twod_patch_col_major(patchColMajorTensorRange);
    174   patchTensorBuffSize =twod_patch_col_major.size()*sizeof(DataType);
    175   DataType* gpu_data_twod_patch_col_major  = static_cast<DataType*>(sycl_device.allocate(patchTensorBuffSize));
    176   TensorMap<Tensor<DataType, 5, DataLayout,IndexType>> gpu_twod_patch_col_major(gpu_data_twod_patch_col_major, patchColMajorTensorRange);
    177   gpu_twod_patch_col_major.device(sycl_device)=gpu_col_major.extract_image_patches(2, 2);
    178   sycl_device.memcpyDeviceToHost(twod_patch_col_major.data(), gpu_data_twod_patch_col_major, patchTensorBuffSize);
    179 
    180   VERIFY_IS_EQUAL(twod_patch_col_major.dimension(0), 2);
    181   VERIFY_IS_EQUAL(twod_patch_col_major.dimension(1), 2);
    182   VERIFY_IS_EQUAL(twod_patch_col_major.dimension(2), 2);
    183   VERIFY_IS_EQUAL(twod_patch_col_major.dimension(3), 3*5);
    184   VERIFY_IS_EQUAL(twod_patch_col_major.dimension(4), 7);
    185 
    186   // 2D patch: RowMajor
    187   patchRowMajorTensorRange={{sizeDim4, sizeDim2*sizeDim3, 2, 2, sizeDim1}};
    188   Tensor<DataType, 5, RowMajor,IndexType> twod_patch_row_major(patchRowMajorTensorRange);
    189   patchTensorBuffSize =twod_patch_row_major.size()*sizeof(DataType);
    190   DataType* gpu_data_twod_patch_row_major  = static_cast<DataType*>(sycl_device.allocate(patchTensorBuffSize));
    191   TensorMap<Tensor<DataType, 5, RowMajor,IndexType>> gpu_twod_patch_row_major(gpu_data_twod_patch_row_major, patchRowMajorTensorRange);
    192   gpu_twod_patch_row_major.device(sycl_device)=gpu_row_major.extract_image_patches(2, 2);
    193   sycl_device.memcpyDeviceToHost(twod_patch_row_major.data(), gpu_data_twod_patch_row_major, patchTensorBuffSize);
    194 
    195   VERIFY_IS_EQUAL(twod_patch_row_major.dimension(0), 7);
    196   VERIFY_IS_EQUAL(twod_patch_row_major.dimension(1), 3*5);
    197   VERIFY_IS_EQUAL(twod_patch_row_major.dimension(2), 2);
    198   VERIFY_IS_EQUAL(twod_patch_row_major.dimension(3), 2);
    199   VERIFY_IS_EQUAL(twod_patch_row_major.dimension(4), 2);
    200 
    201 
    202   // Based on the calculation described in TensorTraits.h, padding happens to be 0.
    203   IndexType row_padding = 0;
    204   IndexType col_padding = 0;
    205   IndexType stride = 1;
    206 
    207   for (IndexType i = 0; i < 3; ++i) {
    208     for (IndexType j = 0; j < 5; ++j) {
    209       IndexType patchId = i+3*j;
    210       for (IndexType r = 0; r < 2; ++r) {
    211         for (IndexType c = 0; c < 2; ++c) {
    212           for (IndexType d = 0; d < 2; ++d) {
    213             for (IndexType b = 0; b < 7; ++b) {
    214               DataType expected_col_major = 0.0f;
    215               DataType expected_row_major = 0.0f;
    216               IndexType row_offset = r*stride + i - row_padding;
    217               IndexType col_offset = c*stride + j - col_padding;
    218               // ColMajor
    219               if (row_offset >= 0 && col_offset >= 0 && row_offset < tensor_col_major.dimension(1) && col_offset < tensor_col_major.dimension(2)) {
    220                 expected_col_major = tensor_col_major(d, row_offset, col_offset, b);
    221               }
    222               if (twod_patch_col_major(d, r, c, patchId, b) != expected_col_major) {
    223                 std::cout << "Mismatch detected at index i=" << i << " j=" << j << " r=" << r << " c=" << c << " d=" << d << " b=" << b << std::endl;
    224               }
    225               VERIFY_IS_EQUAL(twod_patch_col_major(d, r, c, patchId, b), expected_col_major);
    226 
    227               // RowMajor
    228               if (row_offset >= 0 && col_offset >= 0 && row_offset < tensor_row_major.dimension(2) && col_offset < tensor_row_major.dimension(1)) {
    229                 expected_row_major = tensor_row_major(b, col_offset, row_offset, d);
    230 
    231               }
    232               if (twod_patch_row_major(b, patchId, c, r, d) != expected_row_major) {
    233                 std::cout << "Mismatch detected at index i=" << i << " j=" << j << " r=" << r << " c=" << c << " d=" << d << " b=" << b << std::endl;
    234               }
    235               VERIFY_IS_EQUAL(twod_patch_row_major(b, patchId, c, r, d), expected_row_major);
    236               // Check that ColMajor and RowMajor agree.
    237               VERIFY_IS_EQUAL(expected_col_major, expected_row_major);
    238             }
    239           }
    240         }
    241       }
    242     }
    243   }
    244 
    245   sycl_device.deallocate(gpu_data_col_major);
    246   sycl_device.deallocate(gpu_data_row_major);
    247   sycl_device.deallocate(gpu_data_single_patch_col_major);
    248   sycl_device.deallocate(gpu_data_single_patch_row_major);
    249   sycl_device.deallocate(gpu_data_entire_image_patch_col_major);
    250   sycl_device.deallocate(gpu_data_entire_image_patch_row_major);
    251   sycl_device.deallocate(gpu_data_twod_patch_col_major);
    252   sycl_device.deallocate(gpu_data_twod_patch_row_major);
    253 
    254 }
    255 
    256 
    257 // Verifies VALID padding (no padding) with incrementing values.
    258 template <typename DataType, typename IndexType>
    259 static void test_patch_padding_valid_sycl(const Eigen::SyclDevice& sycl_device){
    260   IndexType input_depth = 3;
    261   IndexType input_rows = 3;
    262   IndexType input_cols = 3;
    263   IndexType input_batches = 1;
    264   IndexType ksize = 2;  // Corresponds to the Rows and Cols for tensor.extract_image_patches<>.
    265   IndexType stride = 2;  // Only same stride is supported.
    266 
    267   array<IndexType, 4> tensorColMajorRange = {{input_depth, input_rows, input_cols, input_batches}};
    268   array<IndexType, 4> tensorRowMajorRange = {{input_batches, input_cols, input_rows, input_depth}};
    269   Tensor<DataType, 4, DataLayout,IndexType> tensor_col_major(tensorColMajorRange);
    270   Tensor<DataType, 4, RowMajor,IndexType> tensor_row_major(tensorRowMajorRange);
    271 
    272   DataType* gpu_data_col_major  = static_cast<DataType*>(sycl_device.allocate(tensor_col_major.size()*sizeof(DataType)));
    273   DataType* gpu_data_row_major  = static_cast<DataType*>(sycl_device.allocate(tensor_row_major.size()*sizeof(DataType)));
    274   TensorMap<Tensor<DataType, 4, ColMajor, IndexType>> gpu_col_major(gpu_data_col_major, tensorColMajorRange);
    275   TensorMap<Tensor<DataType, 4, RowMajor, IndexType>> gpu_row_major(gpu_data_row_major, tensorRowMajorRange);
    276 
    277   sycl_device.memcpyHostToDevice(gpu_data_col_major, tensor_col_major.data(),(tensor_col_major.size())*sizeof(DataType));
    278   gpu_row_major.device(sycl_device)=gpu_col_major.swap_layout();
    279   sycl_device.memcpyDeviceToHost(tensor_row_major.data(), gpu_data_row_major, (tensor_col_major.size())*sizeof(DataType));
    280 
    281   VERIFY_IS_EQUAL(tensor_col_major.dimension(0), tensor_row_major.dimension(3));
    282   VERIFY_IS_EQUAL(tensor_col_major.dimension(1), tensor_row_major.dimension(2));
    283   VERIFY_IS_EQUAL(tensor_col_major.dimension(2), tensor_row_major.dimension(1));
    284   VERIFY_IS_EQUAL(tensor_col_major.dimension(3), tensor_row_major.dimension(0));
    285 
    286   // Initializes tensor with incrementing numbers.
    287   for (IndexType i = 0; i < tensor_col_major.size(); ++i) {
    288     tensor_col_major.data()[i] = i + 1;
    289   }
    290   // ColMajor
    291   array<IndexType, 5> patchColMajorTensorRange={{input_depth, ksize, ksize, 1, input_batches}};
    292   Tensor<DataType, 5, DataLayout,IndexType> result_col_major(patchColMajorTensorRange);
    293   size_t patchTensorBuffSize =result_col_major.size()*sizeof(DataType);
    294   DataType* gpu_data_result_col_major  = static_cast<DataType*>(sycl_device.allocate(patchTensorBuffSize));
    295   TensorMap<Tensor<DataType, 5, DataLayout,IndexType>> gpu_result_col_major(gpu_data_result_col_major, patchColMajorTensorRange);
    296   gpu_result_col_major.device(sycl_device)=gpu_col_major.extract_image_patches(ksize, ksize, stride, stride, 1, 1, PADDING_VALID);
    297   sycl_device.memcpyDeviceToHost(result_col_major.data(), gpu_data_result_col_major, patchTensorBuffSize);
    298 
    299   VERIFY_IS_EQUAL(result_col_major.dimension(0), input_depth);  // depth
    300   VERIFY_IS_EQUAL(result_col_major.dimension(1), ksize);  // kernel rows
    301   VERIFY_IS_EQUAL(result_col_major.dimension(2), ksize);  // kernel cols
    302   VERIFY_IS_EQUAL(result_col_major.dimension(3), 1);  // number of patches
    303   VERIFY_IS_EQUAL(result_col_major.dimension(4), input_batches);  // number of batches
    304 
    305   // RowMajor
    306   array<IndexType, 5> patchRowMajorTensorRange={{input_batches, 1, ksize, ksize, input_depth }};
    307   Tensor<DataType, 5, RowMajor,IndexType> result_row_major(patchRowMajorTensorRange);
    308   patchTensorBuffSize =result_row_major.size()*sizeof(DataType);
    309   DataType* gpu_data_result_row_major  = static_cast<DataType*>(sycl_device.allocate(patchTensorBuffSize));
    310   TensorMap<Tensor<DataType, 5, RowMajor,IndexType>> gpu_result_row_major(gpu_data_result_row_major, patchRowMajorTensorRange);
    311   gpu_result_row_major.device(sycl_device)=gpu_row_major.extract_image_patches(ksize, ksize, stride, stride, 1, 1, PADDING_VALID);
    312   sycl_device.memcpyDeviceToHost(result_row_major.data(), gpu_data_result_row_major, patchTensorBuffSize);
    313 
    314   VERIFY_IS_EQUAL(result_col_major.dimension(0), result_row_major.dimension(4));
    315   VERIFY_IS_EQUAL(result_col_major.dimension(1), result_row_major.dimension(3));
    316   VERIFY_IS_EQUAL(result_col_major.dimension(2), result_row_major.dimension(2));
    317   VERIFY_IS_EQUAL(result_col_major.dimension(3), result_row_major.dimension(1));
    318   VERIFY_IS_EQUAL(result_col_major.dimension(4), result_row_major.dimension(0));
    319 
    320   // No padding is carried out.
    321   IndexType row_padding = 0;
    322   IndexType col_padding = 0;
    323 
    324   for (IndexType i = 0; (i+stride+ksize-1) < input_rows; i += stride) {  // input rows
    325     for (IndexType j = 0; (j+stride+ksize-1) < input_cols; j += stride) {  // input cols
    326       IndexType patchId = i+input_rows*j;
    327       for (IndexType r = 0; r < ksize; ++r) {  // patch rows
    328         for (IndexType c = 0; c < ksize; ++c) {  // patch cols
    329           for (IndexType d = 0; d < input_depth; ++d) {  // depth
    330             for (IndexType b = 0; b < input_batches; ++b) {  // batch
    331               DataType expected_col_major = 0.0f;
    332               DataType expected_row_major = 0.0f;
    333               IndexType row_offset = r + i - row_padding;
    334               IndexType col_offset = c + j - col_padding;
    335               if (row_offset >= 0 && col_offset >= 0 && row_offset < input_rows && col_offset < input_cols) {
    336                 expected_col_major = tensor_col_major(d, row_offset, col_offset, b);
    337                 expected_row_major = tensor_row_major(b, col_offset, row_offset, d);
    338               }
    339               // ColMajor
    340               if (result_col_major(d, r, c, patchId, b) != expected_col_major) {
    341                 std::cout << "Mismatch detected at index i=" << i << " j=" << j << " r=" << r << " c=" << c << " d=" << d << " b=" << b << std::endl;
    342               }
    343               VERIFY_IS_EQUAL(result_col_major(d, r, c, patchId, b), expected_col_major);
    344               // RowMajor
    345               if (result_row_major(b, patchId, c, r, d) != expected_row_major) {
    346                 std::cout << "Mismatch detected at index i=" << i << " j=" << j << " r=" << r << " c=" << c << " d=" << d << " b=" << b << std::endl;
    347               }
    348               VERIFY_IS_EQUAL(result_row_major(b, patchId, c, r, d), expected_row_major);
    349               // Check that ColMajor and RowMajor agree.
    350               VERIFY_IS_EQUAL(expected_col_major, expected_row_major);
    351             }
    352           }
    353         }
    354       }
    355     }
    356   }
    357   sycl_device.deallocate(gpu_data_col_major);
    358   sycl_device.deallocate(gpu_data_row_major);
    359   sycl_device.deallocate(gpu_data_result_col_major);
    360   sycl_device.deallocate(gpu_data_result_row_major);
    361 }
    362 
    363 // Verifies VALID padding (no padding) with the same value.
    364 template <typename DataType, typename IndexType>
    365 static void test_patch_padding_valid_same_value_sycl(const Eigen::SyclDevice& sycl_device){
    366   IndexType input_depth = 1;
    367   IndexType input_rows = 5;
    368   IndexType input_cols = 5;
    369   IndexType input_batches = 2;
    370   IndexType ksize = 3;  // Corresponds to the Rows and Cols for tensor.extract_image_patches<>.
    371   IndexType stride = 2;  // Only same stride is supported.
    372   // ColMajor
    373 
    374   array<IndexType, 4> tensorColMajorRange = {{input_depth, input_rows, input_cols, input_batches}};
    375   array<IndexType, 4> tensorRowMajorRange = {{input_batches, input_cols, input_rows, input_depth}};
    376   Tensor<DataType, 4, DataLayout,IndexType> tensor_col_major(tensorColMajorRange);
    377   Tensor<DataType, 4, RowMajor,IndexType> tensor_row_major(tensorRowMajorRange);
    378 
    379   DataType* gpu_data_col_major  = static_cast<DataType*>(sycl_device.allocate(tensor_col_major.size()*sizeof(DataType)));
    380   DataType* gpu_data_row_major  = static_cast<DataType*>(sycl_device.allocate(tensor_row_major.size()*sizeof(DataType)));
    381   TensorMap<Tensor<DataType, 4, ColMajor, IndexType>> gpu_col_major(gpu_data_col_major, tensorColMajorRange);
    382   TensorMap<Tensor<DataType, 4, RowMajor, IndexType>> gpu_row_major(gpu_data_row_major, tensorRowMajorRange);
    383   gpu_col_major.device(sycl_device)=gpu_col_major.constant(11.0f);
    384   gpu_row_major.device(sycl_device)=gpu_col_major.swap_layout();
    385   sycl_device.memcpyDeviceToHost(tensor_col_major.data(), gpu_data_col_major, (tensor_col_major.size())*sizeof(DataType));
    386   sycl_device.memcpyDeviceToHost(tensor_row_major.data(), gpu_data_row_major, (tensor_row_major.size())*sizeof(DataType));
    387   VERIFY_IS_EQUAL(tensor_col_major.dimension(0), tensor_row_major.dimension(3));
    388   VERIFY_IS_EQUAL(tensor_col_major.dimension(1), tensor_row_major.dimension(2));
    389   VERIFY_IS_EQUAL(tensor_col_major.dimension(2), tensor_row_major.dimension(1));
    390   VERIFY_IS_EQUAL(tensor_col_major.dimension(3), tensor_row_major.dimension(0));
    391 
    392   array<IndexType, 5> patchColMajorTensorRange={{input_depth, ksize, ksize, 4, input_batches}};
    393   Tensor<DataType, 5, DataLayout,IndexType> result_col_major(patchColMajorTensorRange);
    394   size_t patchTensorBuffSize =result_col_major.size()*sizeof(DataType);
    395   DataType* gpu_data_result_col_major  = static_cast<DataType*>(sycl_device.allocate(patchTensorBuffSize));
    396   TensorMap<Tensor<DataType, 5, DataLayout,IndexType>> gpu_result_col_major(gpu_data_result_col_major, patchColMajorTensorRange);
    397   gpu_result_col_major.device(sycl_device)=gpu_col_major.extract_image_patches(ksize, ksize, stride, stride, 1, 1, PADDING_VALID);
    398   sycl_device.memcpyDeviceToHost(result_col_major.data(), gpu_data_result_col_major, patchTensorBuffSize);
    399 
    400   VERIFY_IS_EQUAL(result_col_major.dimension(0), input_depth);  // depth
    401   VERIFY_IS_EQUAL(result_col_major.dimension(1), ksize);  // kernel rows
    402   VERIFY_IS_EQUAL(result_col_major.dimension(2), ksize);  // kernel cols
    403   VERIFY_IS_EQUAL(result_col_major.dimension(3), 4);  // number of patches
    404   VERIFY_IS_EQUAL(result_col_major.dimension(4), input_batches);  // number of batches
    405 
    406   // RowMajor
    407   array<IndexType, 5> patchRowMajorTensorRange={{input_batches, 4, ksize, ksize, input_depth }};
    408   Tensor<DataType, 5, RowMajor,IndexType> result_row_major(patchRowMajorTensorRange);
    409   patchTensorBuffSize =result_row_major.size()*sizeof(DataType);
    410   DataType* gpu_data_result_row_major  = static_cast<DataType*>(sycl_device.allocate(patchTensorBuffSize));
    411   TensorMap<Tensor<DataType, 5, RowMajor,IndexType>> gpu_result_row_major(gpu_data_result_row_major, patchRowMajorTensorRange);
    412   gpu_result_row_major.device(sycl_device)=gpu_row_major.extract_image_patches(ksize, ksize, stride, stride, 1, 1, PADDING_VALID);
    413   sycl_device.memcpyDeviceToHost(result_row_major.data(), gpu_data_result_row_major, patchTensorBuffSize);
    414 
    415   VERIFY_IS_EQUAL(result_col_major.dimension(0), result_row_major.dimension(4));
    416   VERIFY_IS_EQUAL(result_col_major.dimension(1), result_row_major.dimension(3));
    417   VERIFY_IS_EQUAL(result_col_major.dimension(2), result_row_major.dimension(2));
    418   VERIFY_IS_EQUAL(result_col_major.dimension(3), result_row_major.dimension(1));
    419   VERIFY_IS_EQUAL(result_col_major.dimension(4), result_row_major.dimension(0));
    420 
    421   // No padding is carried out.
    422   IndexType row_padding = 0;
    423   IndexType col_padding = 0;
    424 
    425   for (IndexType i = 0; (i+stride+ksize-1) <= input_rows; i += stride) {  // input rows
    426     for (IndexType j = 0; (j+stride+ksize-1) <= input_cols; j += stride) {  // input cols
    427       IndexType patchId = i+input_rows*j;
    428       for (IndexType r = 0; r < ksize; ++r) {  // patch rows
    429         for (IndexType c = 0; c < ksize; ++c) {  // patch cols
    430           for (IndexType d = 0; d < input_depth; ++d) {  // depth
    431             for (IndexType b = 0; b < input_batches; ++b) {  // batch
    432               DataType expected_col_major = 0.0f;
    433               DataType expected_row_major = 0.0f;
    434               IndexType row_offset = r + i - row_padding;
    435               IndexType col_offset = c + j - col_padding;
    436               if (row_offset >= 0 && col_offset >= 0 && row_offset < input_rows && col_offset < input_cols) {
    437                 expected_col_major = tensor_col_major(d, row_offset, col_offset, b);
    438                 expected_row_major = tensor_row_major(b, col_offset, row_offset, d);
    439               }
    440               // ColMajor
    441               if (result_col_major(d, r, c, patchId, b) != expected_col_major) {
    442                 std::cout << "Mismatch detected at index i=" << i << " j=" << j << " r=" << r << " c=" << c << " d=" << d << " b=" << b << std::endl;
    443               }
    444               VERIFY_IS_EQUAL(result_col_major(d, r, c, patchId, b), expected_col_major);
    445               // RowMajor
    446               if (result_row_major(b, patchId, c, r, d) != expected_row_major) {
    447                 std::cout << "Mismatch detected at index i=" << i << " j=" << j << " r=" << r << " c=" << c << " d=" << d << " b=" << b << std::endl;
    448               }
    449               VERIFY_IS_EQUAL(result_row_major(b, patchId, c, r, d), expected_row_major);
    450               // Check that ColMajor and RowMajor agree.
    451               VERIFY_IS_EQUAL(expected_col_major, expected_row_major);
    452             }
    453           }
    454         }
    455       }
    456     }
    457   }
    458 }
    459 
    460 // Verifies SAME padding.
    461 template <typename DataType, typename IndexType>
    462 static void test_patch_padding_same_sycl(const Eigen::SyclDevice& sycl_device){
    463   IndexType input_depth = 3;
    464   IndexType input_rows = 4;
    465   IndexType input_cols = 2;
    466   IndexType input_batches = 1;
    467   IndexType ksize = 2;  // Corresponds to the Rows and Cols for tensor.extract_image_patches<>.
    468   IndexType stride = 2;  // Only same stride is supported.
    469 
    470   // ColMajor
    471   array<IndexType, 4> tensorColMajorRange = {{input_depth, input_rows, input_cols, input_batches}};
    472   array<IndexType, 4> tensorRowMajorRange = {{input_batches, input_cols, input_rows, input_depth}};
    473   Tensor<DataType, 4, DataLayout,IndexType> tensor_col_major(tensorColMajorRange);
    474   Tensor<DataType, 4, RowMajor,IndexType> tensor_row_major(tensorRowMajorRange);
    475 
    476   DataType* gpu_data_col_major  = static_cast<DataType*>(sycl_device.allocate(tensor_col_major.size()*sizeof(DataType)));
    477   DataType* gpu_data_row_major  = static_cast<DataType*>(sycl_device.allocate(tensor_row_major.size()*sizeof(DataType)));
    478   TensorMap<Tensor<DataType, 4, ColMajor, IndexType>> gpu_col_major(gpu_data_col_major, tensorColMajorRange);
    479   TensorMap<Tensor<DataType, 4, RowMajor, IndexType>> gpu_row_major(gpu_data_row_major, tensorRowMajorRange);
    480 
    481   sycl_device.memcpyHostToDevice(gpu_data_col_major, tensor_col_major.data(),(tensor_col_major.size())*sizeof(DataType));
    482   gpu_row_major.device(sycl_device)=gpu_col_major.swap_layout();
    483   sycl_device.memcpyDeviceToHost(tensor_row_major.data(), gpu_data_row_major, (tensor_col_major.size())*sizeof(DataType));
    484 
    485   VERIFY_IS_EQUAL(tensor_col_major.dimension(0), tensor_row_major.dimension(3));
    486   VERIFY_IS_EQUAL(tensor_col_major.dimension(1), tensor_row_major.dimension(2));
    487   VERIFY_IS_EQUAL(tensor_col_major.dimension(2), tensor_row_major.dimension(1));
    488   VERIFY_IS_EQUAL(tensor_col_major.dimension(3), tensor_row_major.dimension(0));
    489 
    490   // Initializes tensor with incrementing numbers.
    491   for (IndexType i = 0; i < tensor_col_major.size(); ++i) {
    492     tensor_col_major.data()[i] = i + 1;
    493   }
    494 
    495 array<IndexType, 5> patchColMajorTensorRange={{input_depth, ksize, ksize, 2, input_batches}};
    496 Tensor<DataType, 5, DataLayout,IndexType> result_col_major(patchColMajorTensorRange);
    497 size_t patchTensorBuffSize =result_col_major.size()*sizeof(DataType);
    498 DataType* gpu_data_result_col_major  = static_cast<DataType*>(sycl_device.allocate(patchTensorBuffSize));
    499 TensorMap<Tensor<DataType, 5, DataLayout,IndexType>> gpu_result_col_major(gpu_data_result_col_major, patchColMajorTensorRange);
    500 gpu_result_col_major.device(sycl_device)=gpu_col_major.extract_image_patches(ksize, ksize, stride, stride, PADDING_SAME);
    501 sycl_device.memcpyDeviceToHost(result_col_major.data(), gpu_data_result_col_major, patchTensorBuffSize);
    502 
    503 
    504   VERIFY_IS_EQUAL(result_col_major.dimension(0), input_depth);  // depth
    505   VERIFY_IS_EQUAL(result_col_major.dimension(1), ksize);  // kernel rows
    506   VERIFY_IS_EQUAL(result_col_major.dimension(2), ksize);  // kernel cols
    507   VERIFY_IS_EQUAL(result_col_major.dimension(3), 2);  // number of patches
    508   VERIFY_IS_EQUAL(result_col_major.dimension(4), input_batches);  // number of batches
    509 
    510   // RowMajor
    511 
    512   array<IndexType, 5> patchRowMajorTensorRange={{input_batches, 2, ksize, ksize, input_depth }};
    513   Tensor<DataType, 5, RowMajor,IndexType> result_row_major(patchRowMajorTensorRange);
    514   patchTensorBuffSize =result_row_major.size()*sizeof(DataType);
    515   DataType* gpu_data_result_row_major  = static_cast<DataType*>(sycl_device.allocate(patchTensorBuffSize));
    516   TensorMap<Tensor<DataType, 5, RowMajor,IndexType>> gpu_result_row_major(gpu_data_result_row_major, patchRowMajorTensorRange);
    517   gpu_result_row_major.device(sycl_device)=gpu_row_major.extract_image_patches(ksize, ksize, stride, stride, PADDING_SAME);
    518   sycl_device.memcpyDeviceToHost(result_row_major.data(), gpu_data_result_row_major, patchTensorBuffSize);
    519 
    520   VERIFY_IS_EQUAL(result_col_major.dimension(0), result_row_major.dimension(4));
    521   VERIFY_IS_EQUAL(result_col_major.dimension(1), result_row_major.dimension(3));
    522   VERIFY_IS_EQUAL(result_col_major.dimension(2), result_row_major.dimension(2));
    523   VERIFY_IS_EQUAL(result_col_major.dimension(3), result_row_major.dimension(1));
    524   VERIFY_IS_EQUAL(result_col_major.dimension(4), result_row_major.dimension(0));
    525 
    526   // Based on the calculation described in TensorTraits.h, padding happens to be 0.
    527   IndexType row_padding = 0;
    528   IndexType col_padding = 0;
    529 
    530   for (IndexType i = 0; (i+stride+ksize-1) <= input_rows; i += stride) {  // input rows
    531     for (IndexType j = 0; (j+stride+ksize-1) <= input_cols; j += stride) {  // input cols
    532       IndexType patchId = i+input_rows*j;
    533       for (IndexType r = 0; r < ksize; ++r) {  // patch rows
    534         for (IndexType c = 0; c < ksize; ++c) {  // patch cols
    535           for (IndexType d = 0; d < input_depth; ++d) {  // depth
    536             for (IndexType b = 0; b < input_batches; ++b) {  // batch
    537               DataType expected_col_major = 0.0f;
    538               DataType expected_row_major = 0.0f;
    539               IndexType row_offset = r*stride + i - row_padding;
    540               IndexType col_offset = c*stride + j - col_padding;
    541               if (row_offset >= 0 && col_offset >= 0 && row_offset < input_rows && col_offset < input_cols) {
    542                 expected_col_major = tensor_col_major(d, row_offset, col_offset, b);
    543                 expected_row_major = tensor_row_major(b, col_offset, row_offset, d);
    544               }
    545               // ColMajor
    546               if (result_col_major(d, r, c, patchId, b) != expected_col_major) {
    547                 std::cout << "Mismatch detected at index i=" << i << " j=" << j << " r=" << r << " c=" << c << " d=" << d << " b=" << b << std::endl;
    548               }
    549               VERIFY_IS_EQUAL(result_col_major(d, r, c, patchId, b), expected_col_major);
    550               // RowMajor
    551               if (result_row_major(b, patchId, c, r, d) != expected_row_major) {
    552                 std::cout << "Mismatch detected at index i=" << i << " j=" << j << " r=" << r << " c=" << c << " d=" << d << " b=" << b << std::endl;
    553               }
    554               VERIFY_IS_EQUAL(result_row_major(b, patchId, c, r, d), expected_row_major);
    555               // Check that ColMajor and RowMajor agree.
    556               VERIFY_IS_EQUAL(expected_col_major, expected_row_major);
    557             }
    558           }
    559         }
    560       }
    561     }
    562   }
    563 }
    564 
    565 
    566 template <typename DataType, typename IndexType>
    567 static void test_patch_no_extra_dim_sycl(const Eigen::SyclDevice& sycl_device){
    568 
    569   IndexType sizeDim1 = 2;
    570   IndexType sizeDim2 = 3;
    571   IndexType sizeDim3 = 5;
    572 
    573   // ColMajor
    574   array<IndexType, 3> tensorColMajorRange = {{sizeDim1, sizeDim2, sizeDim3}};
    575   array<IndexType, 3> tensorRowMajorRange = {{sizeDim3, sizeDim2, sizeDim1}};
    576   Tensor<DataType, 3, DataLayout,IndexType> tensor_col_major(tensorColMajorRange);
    577   tensor_col_major.setRandom();
    578   Tensor<DataType, 3, RowMajor,IndexType> tensor_row_major(tensorRowMajorRange);
    579 
    580   DataType* gpu_data_col_major  = static_cast<DataType*>(sycl_device.allocate(tensor_col_major.size()*sizeof(DataType)));
    581   DataType* gpu_data_row_major  = static_cast<DataType*>(sycl_device.allocate(tensor_row_major.size()*sizeof(DataType)));
    582   TensorMap<Tensor<DataType, 3, ColMajor, IndexType>> gpu_col_major(gpu_data_col_major, tensorColMajorRange);
    583   TensorMap<Tensor<DataType, 3, RowMajor, IndexType>> gpu_row_major(gpu_data_row_major, tensorRowMajorRange);
    584 
    585   sycl_device.memcpyHostToDevice(gpu_data_col_major, tensor_col_major.data(),(tensor_col_major.size())*sizeof(DataType));
    586   gpu_row_major.device(sycl_device)=gpu_col_major.swap_layout();
    587   sycl_device.memcpyDeviceToHost(tensor_row_major.data(), gpu_data_row_major, (tensor_row_major.size())*sizeof(DataType));
    588 
    589   VERIFY_IS_EQUAL(tensor_col_major.dimension(0), tensor_row_major.dimension(2));
    590   VERIFY_IS_EQUAL(tensor_col_major.dimension(1), tensor_row_major.dimension(1));
    591   VERIFY_IS_EQUAL(tensor_col_major.dimension(2), tensor_row_major.dimension(0));
    592 
    593 
    594   // Single pixel patch: ColMajor
    595   array<IndexType, 4> patchColMajorTensorRange={{sizeDim1, 1, 1, sizeDim2*sizeDim3}};
    596   Tensor<DataType, 4, DataLayout,IndexType> single_patch_col_major(patchColMajorTensorRange);
    597   size_t patchTensorBuffSize =single_patch_col_major.size()*sizeof(DataType);
    598   DataType* gpu_data_single_patch_col_major  = static_cast<DataType*>(sycl_device.allocate(patchTensorBuffSize));
    599   TensorMap<Tensor<DataType, 4, DataLayout,IndexType>> gpu_single_patch_col_major(gpu_data_single_patch_col_major, patchColMajorTensorRange);
    600   gpu_single_patch_col_major.device(sycl_device)=gpu_col_major.extract_image_patches(1, 1);
    601   sycl_device.memcpyDeviceToHost(single_patch_col_major.data(), gpu_data_single_patch_col_major, patchTensorBuffSize);
    602 
    603   VERIFY_IS_EQUAL(single_patch_col_major.dimension(0), sizeDim1);
    604   VERIFY_IS_EQUAL(single_patch_col_major.dimension(1), 1);
    605   VERIFY_IS_EQUAL(single_patch_col_major.dimension(2), 1);
    606   VERIFY_IS_EQUAL(single_patch_col_major.dimension(3), sizeDim2*sizeDim3);
    607 
    608   // Single pixel patch: RowMajor
    609   array<IndexType, 4> patchRowMajorTensorRange={{sizeDim2*sizeDim3, 1, 1, sizeDim1}};
    610   Tensor<DataType, 4, RowMajor,IndexType> single_patch_row_major(patchRowMajorTensorRange);
    611   patchTensorBuffSize =single_patch_row_major.size()*sizeof(DataType);
    612   DataType* gpu_data_single_patch_row_major  = static_cast<DataType*>(sycl_device.allocate(patchTensorBuffSize));
    613   TensorMap<Tensor<DataType, 4, RowMajor,IndexType>> gpu_single_patch_row_major(gpu_data_single_patch_row_major, patchRowMajorTensorRange);
    614   gpu_single_patch_row_major.device(sycl_device)=gpu_row_major.extract_image_patches(1, 1);
    615   sycl_device.memcpyDeviceToHost(single_patch_row_major.data(), gpu_data_single_patch_row_major, patchTensorBuffSize);
    616 
    617   VERIFY_IS_EQUAL(single_patch_row_major.dimension(0), sizeDim2*sizeDim3);
    618   VERIFY_IS_EQUAL(single_patch_row_major.dimension(1), 1);
    619   VERIFY_IS_EQUAL(single_patch_row_major.dimension(2), 1);
    620   VERIFY_IS_EQUAL(single_patch_row_major.dimension(3), sizeDim1);
    621 
    622   for (IndexType i = 0; i < tensor_col_major.size(); ++i) {
    623     // ColMajor
    624     if (tensor_col_major.data()[i] != single_patch_col_major.data()[i]) {
    625       std::cout << "Mismatch detected at index " << i << " : " << tensor_col_major.data()[i] << " vs " << single_patch_col_major.data()[i] << std::endl;
    626     }
    627     VERIFY_IS_EQUAL(single_patch_col_major.data()[i], tensor_col_major.data()[i]);
    628     // RowMajor
    629     if (tensor_row_major.data()[i] != single_patch_row_major.data()[i]) {
    630       std::cout << "Mismatch detected at index " << i << " : "
    631            << tensor_col_major.data()[i] << " vs "
    632            << single_patch_row_major.data()[i] << std::endl;
    633     }
    634     VERIFY_IS_EQUAL(single_patch_row_major.data()[i],
    635                     tensor_row_major.data()[i]);
    636     VERIFY_IS_EQUAL(tensor_col_major.data()[i], tensor_row_major.data()[i]);
    637     VERIFY_IS_EQUAL(single_patch_col_major.data()[i],
    638                     single_patch_row_major.data()[i]);
    639   }
    640 
    641   // Entire image patch: ColMajor
    642   patchColMajorTensorRange={{sizeDim1, sizeDim2, sizeDim3, sizeDim2*sizeDim3}};
    643   Tensor<DataType, 4, DataLayout,IndexType> entire_image_patch_col_major(patchColMajorTensorRange);
    644   patchTensorBuffSize =entire_image_patch_col_major.size()*sizeof(DataType);
    645   DataType* gpu_data_entire_image_patch_col_major  = static_cast<DataType*>(sycl_device.allocate(patchTensorBuffSize));
    646   TensorMap<Tensor<DataType, 4, DataLayout,IndexType>> gpu_entire_image_patch_col_major(gpu_data_entire_image_patch_col_major, patchColMajorTensorRange);
    647   gpu_entire_image_patch_col_major.device(sycl_device)=gpu_col_major.extract_image_patches(3, 5);
    648   sycl_device.memcpyDeviceToHost(entire_image_patch_col_major.data(), gpu_data_entire_image_patch_col_major, patchTensorBuffSize);
    649 
    650   VERIFY_IS_EQUAL(entire_image_patch_col_major.dimension(0), 2);
    651   VERIFY_IS_EQUAL(entire_image_patch_col_major.dimension(1), 3);
    652   VERIFY_IS_EQUAL(entire_image_patch_col_major.dimension(2), 5);
    653   VERIFY_IS_EQUAL(entire_image_patch_col_major.dimension(3), 3*5);
    654 
    655   // Entire image patch: RowMajor
    656 patchRowMajorTensorRange={{sizeDim2*sizeDim3, sizeDim3, sizeDim2, sizeDim1}};
    657 Tensor<DataType, 4, RowMajor,IndexType> entire_image_patch_row_major(patchRowMajorTensorRange);
    658 patchTensorBuffSize =entire_image_patch_row_major.size()*sizeof(DataType);
    659 DataType* gpu_data_entire_image_patch_row_major  = static_cast<DataType*>(sycl_device.allocate(patchTensorBuffSize));
    660 TensorMap<Tensor<DataType, 4, RowMajor,IndexType>> gpu_entire_image_patch_row_major(gpu_data_entire_image_patch_row_major, patchRowMajorTensorRange);
    661 gpu_entire_image_patch_row_major.device(sycl_device)=gpu_row_major.extract_image_patches(3, 5);
    662 sycl_device.memcpyDeviceToHost(entire_image_patch_row_major.data(), gpu_data_entire_image_patch_row_major, patchTensorBuffSize);
    663   VERIFY_IS_EQUAL(entire_image_patch_row_major.dimension(0), 3*5);
    664   VERIFY_IS_EQUAL(entire_image_patch_row_major.dimension(1), 5);
    665   VERIFY_IS_EQUAL(entire_image_patch_row_major.dimension(2), 3);
    666   VERIFY_IS_EQUAL(entire_image_patch_row_major.dimension(3), 2);
    667 
    668   for (IndexType i = 0; i < 3; ++i) {
    669     for (IndexType j = 0; j < 5; ++j) {
    670       IndexType patchId = i+3*j;
    671       for (IndexType r = 0; r < 3; ++r) {
    672         for (IndexType c = 0; c < 5; ++c) {
    673           for (IndexType d = 0; d < 2; ++d) {
    674             DataType expected_col_major = 0.0f;
    675             DataType expected_row_major = 0.0f;
    676             if (r-1+i >= 0 && c-2+j >= 0 && r-1+i < 3 && c-2+j < 5) {
    677               expected_col_major = tensor_col_major(d, r-1+i, c-2+j);
    678               expected_row_major = tensor_row_major(c-2+j, r-1+i, d);
    679             }
    680             // ColMajor
    681             if (entire_image_patch_col_major(d, r, c, patchId) != expected_col_major) {
    682               std::cout << "Mismatch detected at index i=" << i << " j=" << j << " r=" << r << " c=" << c << " d=" << d << std::endl;
    683             }
    684             VERIFY_IS_EQUAL(entire_image_patch_col_major(d, r, c, patchId), expected_col_major);
    685             // RowMajor
    686             if (entire_image_patch_row_major(patchId, c, r, d) !=
    687                 expected_row_major) {
    688               std::cout << "Mismatch detected at index i=" << i << " j=" << j << " r=" << r << " c=" << c << " d=" << d << std::endl;
    689             }
    690             VERIFY_IS_EQUAL(entire_image_patch_row_major(patchId, c, r, d),
    691                             expected_row_major);
    692             // Check that ColMajor and RowMajor agree.
    693             VERIFY_IS_EQUAL(expected_col_major, expected_row_major);
    694           }
    695         }
    696       }
    697     }
    698   }
    699 
    700   // 2D patch: ColMajor
    701   patchColMajorTensorRange={{sizeDim1, 2, 2, sizeDim2*sizeDim3}};
    702   Tensor<DataType, 4, DataLayout,IndexType> twod_patch_col_major(patchColMajorTensorRange);
    703   patchTensorBuffSize =twod_patch_col_major.size()*sizeof(DataType);
    704   DataType* gpu_data_twod_patch_col_major  = static_cast<DataType*>(sycl_device.allocate(patchTensorBuffSize));
    705   TensorMap<Tensor<DataType, 4, DataLayout,IndexType>> gpu_twod_patch_col_major(gpu_data_twod_patch_col_major, patchColMajorTensorRange);
    706   gpu_twod_patch_col_major.device(sycl_device)=gpu_col_major.extract_image_patches(2, 2);
    707   sycl_device.memcpyDeviceToHost(twod_patch_col_major.data(), gpu_data_twod_patch_col_major, patchTensorBuffSize);
    708 
    709   VERIFY_IS_EQUAL(twod_patch_col_major.dimension(0), 2);
    710   VERIFY_IS_EQUAL(twod_patch_col_major.dimension(1), 2);
    711   VERIFY_IS_EQUAL(twod_patch_col_major.dimension(2), 2);
    712   VERIFY_IS_EQUAL(twod_patch_col_major.dimension(3), 3*5);
    713 
    714   // 2D patch: RowMajor
    715   patchRowMajorTensorRange={{sizeDim2*sizeDim3, 2, 2, sizeDim1}};
    716   Tensor<DataType, 4, RowMajor,IndexType> twod_patch_row_major(patchRowMajorTensorRange);
    717   patchTensorBuffSize =twod_patch_row_major.size()*sizeof(DataType);
    718   DataType* gpu_data_twod_patch_row_major  = static_cast<DataType*>(sycl_device.allocate(patchTensorBuffSize));
    719   TensorMap<Tensor<DataType, 4, RowMajor,IndexType>> gpu_twod_patch_row_major(gpu_data_twod_patch_row_major, patchRowMajorTensorRange);
    720   gpu_twod_patch_row_major.device(sycl_device)=gpu_row_major.extract_image_patches(2, 2);
    721   sycl_device.memcpyDeviceToHost(twod_patch_row_major.data(), gpu_data_twod_patch_row_major, patchTensorBuffSize);
    722   VERIFY_IS_EQUAL(twod_patch_row_major.dimension(0), 3*5);
    723   VERIFY_IS_EQUAL(twod_patch_row_major.dimension(1), 2);
    724   VERIFY_IS_EQUAL(twod_patch_row_major.dimension(2), 2);
    725   VERIFY_IS_EQUAL(twod_patch_row_major.dimension(3), 2);
    726 
    727   // Based on the calculation described in TensorTraits.h, padding happens to be 0.
    728   IndexType row_padding = 0;
    729   IndexType col_padding = 0;
    730   IndexType stride = 1;
    731 
    732   for (IndexType i = 0; i < 3; ++i) {
    733     for (IndexType j = 0; j < 5; ++j) {
    734       IndexType patchId = i+3*j;
    735       for (IndexType r = 0; r < 2; ++r) {
    736         for (IndexType c = 0; c < 2; ++c) {
    737           for (IndexType d = 0; d < 2; ++d) {
    738             DataType expected_col_major = 0.0f;
    739             DataType expected_row_major = 0.0f;
    740             IndexType row_offset = r*stride + i - row_padding;
    741             IndexType col_offset = c*stride + j - col_padding;
    742             // ColMajor
    743             if (row_offset >= 0 && col_offset >= 0 && row_offset < tensor_col_major.dimension(1) && col_offset < tensor_col_major.dimension(2)) {
    744               expected_col_major = tensor_col_major(d, row_offset, col_offset);
    745             }
    746             if (twod_patch_col_major(d, r, c, patchId) != expected_col_major) {
    747               std::cout << "Mismatch detected at index i=" << i << " j=" << j << " r=" << r << " c=" << c << " d=" << d << std::endl;
    748             }
    749             VERIFY_IS_EQUAL(twod_patch_col_major(d, r, c, patchId), expected_col_major);
    750             // RowMajor
    751             if (row_offset >= 0 && col_offset >= 0 && row_offset < tensor_row_major.dimension(1) && col_offset < tensor_row_major.dimension(0)) {
    752               expected_row_major = tensor_row_major(col_offset, row_offset, d);
    753             }
    754             if (twod_patch_row_major(patchId, c, r, d) != expected_row_major) {
    755               std::cout << "Mismatch detected at index i=" << i << " j=" << j << " r=" << r << " c=" << c << " d=" << d << std::endl;
    756             }
    757             VERIFY_IS_EQUAL(twod_patch_row_major(patchId, c, r, d), expected_row_major);
    758             // Check that ColMajor and RowMajor agree.
    759             VERIFY_IS_EQUAL(expected_col_major, expected_row_major);
    760           }
    761         }
    762       }
    763     }
    764   }
    765 
    766   sycl_device.deallocate(gpu_data_col_major);
    767   sycl_device.deallocate(gpu_data_row_major);
    768   sycl_device.deallocate(gpu_data_single_patch_col_major);
    769   sycl_device.deallocate(gpu_data_single_patch_row_major);
    770   sycl_device.deallocate(gpu_data_entire_image_patch_col_major);
    771   sycl_device.deallocate(gpu_data_entire_image_patch_row_major);
    772   sycl_device.deallocate(gpu_data_twod_patch_col_major);
    773   sycl_device.deallocate(gpu_data_twod_patch_row_major);
    774 }
    775 
    776 template <typename DataType, typename IndexType>
    777 static void test_imagenet_patches_sycl(const Eigen::SyclDevice& sycl_device)
    778 {
    779   // Test the code on typical configurations used by the 'imagenet' benchmarks at
    780   // https://github.com/soumith/convnet-benchmarks
    781   // ColMajor
    782   IndexType sizeDim1 = 3;
    783   IndexType sizeDim2 = 128;
    784   IndexType sizeDim3 = 128;
    785   IndexType sizeDim4 = 16;
    786   array<IndexType, 4> tensorColMajorRange = {{sizeDim1, sizeDim2, sizeDim3, sizeDim4}};
    787   Tensor<DataType, 4, DataLayout,IndexType> l_in_col_major(tensorColMajorRange);
    788   l_in_col_major.setRandom();
    789 
    790   DataType* gpu_data_l_in_col_major  = static_cast<DataType*>(sycl_device.allocate(l_in_col_major.size()*sizeof(DataType)));
    791   TensorMap<Tensor<DataType, 4, ColMajor, IndexType>> gpu_l_in_col_major(gpu_data_l_in_col_major, tensorColMajorRange);
    792 
    793   sycl_device.memcpyHostToDevice(gpu_data_l_in_col_major, l_in_col_major.data(),(l_in_col_major.size())*sizeof(DataType));
    794 
    795   array<IndexType, 5> patchTensorRange={{sizeDim1, 11, 11, sizeDim2*sizeDim3, sizeDim4}};
    796   Tensor<DataType, 5, DataLayout,IndexType> l_out_col_major(patchTensorRange);
    797   size_t patchTensorBuffSize =l_out_col_major.size()*sizeof(DataType);
    798   DataType* gpu_data_l_out_col_major  = static_cast<DataType*>(sycl_device.allocate(patchTensorBuffSize));
    799   TensorMap<Tensor<DataType, 5, DataLayout,IndexType>> gpu_l_out_col_major(gpu_data_l_out_col_major, patchTensorRange);
    800   gpu_l_out_col_major.device(sycl_device)=gpu_l_in_col_major.extract_image_patches(11, 11);
    801   sycl_device.memcpyDeviceToHost(l_out_col_major.data(), gpu_data_l_out_col_major, patchTensorBuffSize);
    802 
    803   VERIFY_IS_EQUAL(l_out_col_major.dimension(0), sizeDim1);
    804   VERIFY_IS_EQUAL(l_out_col_major.dimension(1), 11);
    805   VERIFY_IS_EQUAL(l_out_col_major.dimension(2), 11);
    806   VERIFY_IS_EQUAL(l_out_col_major.dimension(3), sizeDim2*sizeDim3);
    807   VERIFY_IS_EQUAL(l_out_col_major.dimension(4), sizeDim4);
    808 
    809   // RowMajor
    810   patchTensorRange={{sizeDim4, sizeDim2*sizeDim3, 11, 11, sizeDim1}};
    811   Tensor<DataType, 5, RowMajor,IndexType> l_out_row_major(patchTensorRange);
    812   patchTensorBuffSize =l_out_row_major.size()*sizeof(DataType);
    813   DataType* gpu_data_l_out_row_major  = static_cast<DataType*>(sycl_device.allocate(patchTensorBuffSize));
    814   TensorMap<Tensor<DataType, 5, RowMajor,IndexType>> gpu_l_out_row_major(gpu_data_l_out_row_major, patchTensorRange);
    815   gpu_l_out_row_major.device(sycl_device)=gpu_l_in_col_major.swap_layout().extract_image_patches(11, 11);
    816   sycl_device.memcpyDeviceToHost(l_out_row_major.data(), gpu_data_l_out_row_major, patchTensorBuffSize);
    817 
    818   VERIFY_IS_EQUAL(l_out_row_major.dimension(0), sizeDim4);
    819   VERIFY_IS_EQUAL(l_out_row_major.dimension(1), sizeDim2*sizeDim3);
    820   VERIFY_IS_EQUAL(l_out_row_major.dimension(2), 11);
    821   VERIFY_IS_EQUAL(l_out_row_major.dimension(3), 11);
    822   VERIFY_IS_EQUAL(l_out_row_major.dimension(4), sizeDim1);
    823 
    824   for (IndexType b = 0; b < 16; ++b) {
    825     for (IndexType i = 0; i < 128; ++i) {
    826       for (IndexType j = 0; j < 128; ++j) {
    827         IndexType patchId = i+128*j;
    828         for (IndexType c = 0; c < 11; ++c) {
    829           for (IndexType r = 0; r < 11; ++r) {
    830             for (IndexType d = 0; d < 3; ++d) {
    831               DataType expected = 0.0f;
    832               if (r-5+i >= 0 && c-5+j >= 0 && r-5+i < 128 && c-5+j < 128) {
    833                 expected = l_in_col_major(d, r-5+i, c-5+j, b);
    834               }
    835               // ColMajor
    836               if (l_out_col_major(d, r, c, patchId, b) != expected) {
    837                 std::cout << "Mismatch detected at index i=" << i << " j=" << j << " r=" << r << " c=" << c << " d=" << d << " b=" << b << std::endl;
    838               }
    839               VERIFY_IS_EQUAL(l_out_col_major(d, r, c, patchId, b), expected);
    840               // RowMajor
    841               if (l_out_row_major(b, patchId, c, r, d) !=
    842                   expected) {
    843                 std::cout << "Mismatch detected at index i=" << i << " j=" << j
    844                      << " r=" << r << " c=" << c << " d=" << d << " b=" << b
    845                      << std::endl;
    846               }
    847               VERIFY_IS_EQUAL(l_out_row_major(b, patchId, c, r, d),
    848                               expected);
    849             }
    850           }
    851         }
    852       }
    853     }
    854   }
    855 
    856   // ColMajor
    857   sycl_device.deallocate(gpu_data_l_in_col_major);
    858   sycl_device.deallocate(gpu_data_l_out_col_major);
    859   sizeDim1 = 16;
    860   sizeDim2 = 64;
    861   sizeDim3 = 64;
    862   sizeDim4 = 32;
    863   tensorColMajorRange = {{sizeDim1, sizeDim2, sizeDim3, sizeDim4}};
    864   l_in_col_major.resize(tensorColMajorRange);
    865   l_in_col_major.setRandom();
    866   gpu_data_l_in_col_major  = static_cast<DataType*>(sycl_device.allocate(l_in_col_major.size()*sizeof(DataType)));
    867   TensorMap<Tensor<DataType, 4, ColMajor, IndexType>>gpu_l_in_col_major_resize1(gpu_data_l_in_col_major, tensorColMajorRange);
    868 
    869   patchTensorRange={{sizeDim1, 9, 9, sizeDim2*sizeDim3, sizeDim4}};
    870   l_out_col_major.resize(patchTensorRange);
    871   patchTensorBuffSize =l_out_col_major.size()*sizeof(DataType);
    872   gpu_data_l_out_col_major  = static_cast<DataType*>(sycl_device.allocate(patchTensorBuffSize));
    873   TensorMap<Tensor<DataType, 5, DataLayout,IndexType>>gpu_l_out_col_major_resize1(gpu_data_l_out_col_major, patchTensorRange);
    874   sycl_device.memcpyHostToDevice(gpu_data_l_in_col_major, l_in_col_major.data(),(l_in_col_major.size())*sizeof(DataType));
    875   gpu_l_out_col_major_resize1.device(sycl_device)=gpu_l_in_col_major_resize1.extract_image_patches(9, 9);
    876   sycl_device.memcpyDeviceToHost(l_out_col_major.data(), gpu_data_l_out_col_major, patchTensorBuffSize);
    877   VERIFY_IS_EQUAL(l_out_col_major.dimension(0), 16);
    878   VERIFY_IS_EQUAL(l_out_col_major.dimension(1), 9);
    879   VERIFY_IS_EQUAL(l_out_col_major.dimension(2), 9);
    880   VERIFY_IS_EQUAL(l_out_col_major.dimension(3), 64*64);
    881   VERIFY_IS_EQUAL(l_out_col_major.dimension(4), 32);
    882 
    883 // RowMajor
    884   sycl_device.deallocate(gpu_data_l_out_row_major);
    885   patchTensorRange={{sizeDim4, sizeDim2*sizeDim3, 9, 9 ,sizeDim1}};
    886   l_out_row_major.resize(patchTensorRange);
    887   patchTensorBuffSize =l_out_row_major.size()*sizeof(DataType);
    888   gpu_data_l_out_row_major  = static_cast<DataType*>(sycl_device.allocate(patchTensorBuffSize));
    889   TensorMap<Tensor<DataType, 5, RowMajor,IndexType>>gpu_l_out_row_major_resize1(gpu_data_l_out_row_major, patchTensorRange);
    890   gpu_l_out_row_major_resize1.device(sycl_device)=gpu_l_in_col_major_resize1.swap_layout().extract_image_patches(9, 9);
    891   sycl_device.memcpyDeviceToHost(l_out_row_major.data(), gpu_data_l_out_row_major, patchTensorBuffSize);
    892 
    893   VERIFY_IS_EQUAL(l_out_row_major.dimension(0), 32);
    894   VERIFY_IS_EQUAL(l_out_row_major.dimension(1), 64*64);
    895   VERIFY_IS_EQUAL(l_out_row_major.dimension(2), 9);
    896   VERIFY_IS_EQUAL(l_out_row_major.dimension(3), 9);
    897   VERIFY_IS_EQUAL(l_out_row_major.dimension(4), 16);
    898 
    899   for (IndexType b = 0; b < 32; ++b) {
    900     for (IndexType i = 0; i < 64; ++i) {
    901       for (IndexType j = 0; j < 64; ++j) {
    902         IndexType patchId = i+64*j;
    903         for (IndexType c = 0; c < 9; ++c) {
    904           for (IndexType r = 0; r < 9; ++r) {
    905             for (IndexType d = 0; d < 16; ++d) {
    906               DataType expected = 0.0f;
    907               if (r-4+i >= 0 && c-4+j >= 0 && r-4+i < 64 && c-4+j < 64) {
    908                 expected = l_in_col_major(d, r-4+i, c-4+j, b);
    909               }
    910               // ColMajor
    911               if (l_out_col_major(d, r, c, patchId, b) != expected) {
    912                 std::cout << "Mismatch detected at index i=" << i << " j=" << j << " r=" << r << " c=" << c << " d=" << d << " b=" << b << std::endl;
    913               }
    914               VERIFY_IS_EQUAL(l_out_col_major(d, r, c, patchId, b), expected);
    915               // RowMajor
    916               if (l_out_row_major(b, patchId, c, r, d) != expected) {
    917                 std::cout << "Mismatch detected at index i=" << i << " j=" << j << " r=" << r << " c=" << c << " d=" << d << " b=" << b << std::endl;
    918               }
    919               VERIFY_IS_EQUAL(l_out_row_major(b, patchId, c, r, d), expected);
    920             }
    921           }
    922         }
    923       }
    924     }
    925   }
    926 
    927   // ColMajor
    928 
    929   sycl_device.deallocate(gpu_data_l_in_col_major);
    930   sycl_device.deallocate(gpu_data_l_out_col_major);
    931   sizeDim1 = 32;
    932   sizeDim2 = 16;
    933   sizeDim3 = 16;
    934   sizeDim4 = 32;
    935   tensorColMajorRange = {{sizeDim1, sizeDim2, sizeDim3, sizeDim4}};
    936   l_in_col_major.resize(tensorColMajorRange);
    937   l_in_col_major.setRandom();
    938   gpu_data_l_in_col_major  = static_cast<DataType*>(sycl_device.allocate(l_in_col_major.size()*sizeof(DataType)));
    939   TensorMap<Tensor<DataType, 4, ColMajor, IndexType>>gpu_l_in_col_major_resize2(gpu_data_l_in_col_major, tensorColMajorRange);
    940 
    941   patchTensorRange={{sizeDim1, 7, 7, sizeDim2*sizeDim3, sizeDim4}};
    942   l_out_col_major.resize(patchTensorRange);
    943   patchTensorBuffSize =l_out_col_major.size()*sizeof(DataType);
    944   gpu_data_l_out_col_major  = static_cast<DataType*>(sycl_device.allocate(patchTensorBuffSize));
    945   TensorMap<Tensor<DataType, 5, DataLayout,IndexType>>gpu_l_out_col_major_resize2(gpu_data_l_out_col_major, patchTensorRange);
    946   sycl_device.memcpyHostToDevice(gpu_data_l_in_col_major, l_in_col_major.data(),(l_in_col_major.size())*sizeof(DataType));
    947   gpu_l_out_col_major_resize2.device(sycl_device)=gpu_l_in_col_major_resize2.extract_image_patches(7, 7);
    948   sycl_device.memcpyDeviceToHost(l_out_col_major.data(), gpu_data_l_out_col_major, patchTensorBuffSize);
    949 
    950   VERIFY_IS_EQUAL(l_out_col_major.dimension(0), 32);
    951   VERIFY_IS_EQUAL(l_out_col_major.dimension(1), 7);
    952   VERIFY_IS_EQUAL(l_out_col_major.dimension(2), 7);
    953   VERIFY_IS_EQUAL(l_out_col_major.dimension(3), 16*16);
    954   VERIFY_IS_EQUAL(l_out_col_major.dimension(4), 32);
    955 
    956   // RowMajor
    957   sycl_device.deallocate(gpu_data_l_out_row_major);
    958   patchTensorRange={{sizeDim4, sizeDim2*sizeDim3, 7, 7 ,sizeDim1}};
    959   l_out_row_major.resize(patchTensorRange);
    960   patchTensorBuffSize =l_out_row_major.size()*sizeof(DataType);
    961   gpu_data_l_out_row_major  = static_cast<DataType*>(sycl_device.allocate(patchTensorBuffSize));
    962   TensorMap<Tensor<DataType, 5, RowMajor,IndexType>>gpu_l_out_row_major_resize2(gpu_data_l_out_row_major, patchTensorRange);
    963   gpu_l_out_row_major_resize2.device(sycl_device)=gpu_l_in_col_major_resize2.swap_layout().extract_image_patches(7, 7);
    964   sycl_device.memcpyDeviceToHost(l_out_row_major.data(), gpu_data_l_out_row_major, patchTensorBuffSize);
    965 
    966   VERIFY_IS_EQUAL(l_out_row_major.dimension(0), 32);
    967   VERIFY_IS_EQUAL(l_out_row_major.dimension(1), 16*16);
    968   VERIFY_IS_EQUAL(l_out_row_major.dimension(2), 7);
    969   VERIFY_IS_EQUAL(l_out_row_major.dimension(3), 7);
    970   VERIFY_IS_EQUAL(l_out_row_major.dimension(4), 32);
    971 
    972   for (IndexType b = 0; b < 32; ++b) {
    973     for (IndexType i = 0; i < 16; ++i) {
    974       for (IndexType j = 0; j < 16; ++j) {
    975         IndexType patchId = i+16*j;
    976         for (IndexType c = 0; c < 7; ++c) {
    977           for (IndexType r = 0; r < 7; ++r) {
    978             for (IndexType d = 0; d < 32; ++d) {
    979               DataType expected = 0.0f;
    980               if (r-3+i >= 0 && c-3+j >= 0 && r-3+i < 16 && c-3+j < 16) {
    981                 expected = l_in_col_major(d, r-3+i, c-3+j, b);
    982               }
    983               // ColMajor
    984               if (l_out_col_major(d, r, c, patchId, b) != expected) {
    985                 std::cout << "Mismatch detected at index i=" << i << " j=" << j << " r=" << r << " c=" << c << " d=" << d << " b=" << b << std::endl;
    986               }
    987               VERIFY_IS_EQUAL(l_out_col_major(d, r, c, patchId, b), expected);
    988               // RowMajor
    989               if (l_out_row_major(b, patchId, c, r, d) != expected) {
    990                 std::cout << "Mismatch detected at index i=" << i << " j=" << j << " r=" << r << " c=" << c << " d=" << d << " b=" << b << std::endl;
    991               }
    992               VERIFY_IS_EQUAL(l_out_row_major(b, patchId, c, r, d), expected);
    993             }
    994           }
    995         }
    996       }
    997     }
    998   }
    999 
   1000   // ColMajor
   1001   sycl_device.deallocate(gpu_data_l_in_col_major);
   1002   sycl_device.deallocate(gpu_data_l_out_col_major);
   1003   sizeDim1 = 64;
   1004   sizeDim2 = 13;
   1005   sizeDim3 = 13;
   1006   sizeDim4 = 32;
   1007   tensorColMajorRange = {{sizeDim1, sizeDim2, sizeDim3, sizeDim4}};
   1008   l_in_col_major.resize(tensorColMajorRange);
   1009   l_in_col_major.setRandom();
   1010   gpu_data_l_in_col_major  = static_cast<DataType*>(sycl_device.allocate(l_in_col_major.size()*sizeof(DataType)));
   1011   TensorMap<Tensor<DataType, 4, ColMajor, IndexType>>gpu_l_in_col_major_resize3(gpu_data_l_in_col_major, tensorColMajorRange);
   1012 
   1013   patchTensorRange={{sizeDim1, 3, 3, sizeDim2*sizeDim3, sizeDim4}};
   1014   l_out_col_major.resize(patchTensorRange);
   1015   patchTensorBuffSize =l_out_col_major.size()*sizeof(DataType);
   1016   gpu_data_l_out_col_major  = static_cast<DataType*>(sycl_device.allocate(patchTensorBuffSize));
   1017   TensorMap<Tensor<DataType, 5, DataLayout,IndexType>>gpu_l_out_col_major_resize3(gpu_data_l_out_col_major, patchTensorRange);
   1018   sycl_device.memcpyHostToDevice(gpu_data_l_in_col_major, l_in_col_major.data(),(l_in_col_major.size())*sizeof(DataType));
   1019   gpu_l_out_col_major_resize3.device(sycl_device)=gpu_l_in_col_major_resize3.extract_image_patches(3, 3);
   1020   sycl_device.memcpyDeviceToHost(l_out_col_major.data(), gpu_data_l_out_col_major, patchTensorBuffSize);
   1021 
   1022   VERIFY_IS_EQUAL(l_out_col_major.dimension(0), 64);
   1023   VERIFY_IS_EQUAL(l_out_col_major.dimension(1), 3);
   1024   VERIFY_IS_EQUAL(l_out_col_major.dimension(2), 3);
   1025   VERIFY_IS_EQUAL(l_out_col_major.dimension(3), 13*13);
   1026   VERIFY_IS_EQUAL(l_out_col_major.dimension(4), 32);
   1027 
   1028   // RowMajor
   1029   sycl_device.deallocate(gpu_data_l_out_row_major);
   1030   patchTensorRange={{sizeDim4, sizeDim2*sizeDim3, 3, 3 ,sizeDim1}};
   1031   l_out_row_major.resize(patchTensorRange);
   1032   patchTensorBuffSize =l_out_row_major.size()*sizeof(DataType);
   1033   gpu_data_l_out_row_major  = static_cast<DataType*>(sycl_device.allocate(patchTensorBuffSize));
   1034   TensorMap<Tensor<DataType, 5, RowMajor,IndexType>>gpu_l_out_row_major_resize3(gpu_data_l_out_row_major, patchTensorRange);
   1035   gpu_l_out_row_major_resize3.device(sycl_device)=gpu_l_in_col_major_resize3.swap_layout().extract_image_patches(3, 3);
   1036   sycl_device.memcpyDeviceToHost(l_out_row_major.data(), gpu_data_l_out_row_major, patchTensorBuffSize);
   1037 
   1038   VERIFY_IS_EQUAL(l_out_row_major.dimension(0), 32);
   1039   VERIFY_IS_EQUAL(l_out_row_major.dimension(1), 13*13);
   1040   VERIFY_IS_EQUAL(l_out_row_major.dimension(2), 3);
   1041   VERIFY_IS_EQUAL(l_out_row_major.dimension(3), 3);
   1042   VERIFY_IS_EQUAL(l_out_row_major.dimension(4), 64);
   1043 
   1044   for (IndexType b = 0; b < 32; ++b) {
   1045     for (IndexType i = 0; i < 13; ++i) {
   1046       for (IndexType j = 0; j < 13; ++j) {
   1047         IndexType patchId = i+13*j;
   1048         for (IndexType c = 0; c < 3; ++c) {
   1049           for (IndexType r = 0; r < 3; ++r) {
   1050             for (IndexType d = 0; d < 64; ++d) {
   1051               DataType expected = 0.0f;
   1052               if (r-1+i >= 0 && c-1+j >= 0 && r-1+i < 13 && c-1+j < 13) {
   1053                 expected = l_in_col_major(d, r-1+i, c-1+j, b);
   1054               }
   1055               // ColMajor
   1056               if (l_out_col_major(d, r, c, patchId, b) != expected) {
   1057                 std::cout << "Mismatch detected at index i=" << i << " j=" << j << " r=" << r << " c=" << c << " d=" << d << " b=" << b << std::endl;
   1058               }
   1059               VERIFY_IS_EQUAL(l_out_col_major(d, r, c, patchId, b), expected);
   1060               // RowMajor
   1061               if (l_out_row_major(b, patchId, c, r, d) != expected) {
   1062                 std::cout << "Mismatch detected at index i=" << i << " j=" << j << " r=" << r << " c=" << c << " d=" << d << " b=" << b << std::endl;
   1063               }
   1064               VERIFY_IS_EQUAL(l_out_row_major(b, patchId, c, r, d), expected);
   1065             }
   1066           }
   1067         }
   1068       }
   1069     }
   1070   }
   1071   sycl_device.deallocate(gpu_data_l_in_col_major);
   1072   sycl_device.deallocate(gpu_data_l_out_col_major);
   1073   sycl_device.deallocate(gpu_data_l_out_row_major);
   1074 }
   1075 
   1076 
   1077 template<typename DataType, typename dev_Selector> void sycl_tensor_image_patch_test_per_device(dev_Selector s){
   1078 QueueInterface queueInterface(s);
   1079 auto sycl_device = Eigen::SyclDevice(&queueInterface);
   1080 test_simple_image_patch_sycl<DataType, int64_t>(sycl_device);
   1081 test_patch_padding_valid_sycl<DataType, int64_t>(sycl_device);
   1082 test_patch_padding_valid_same_value_sycl<DataType, int64_t>(sycl_device);
   1083 test_patch_padding_same_sycl<DataType, int64_t>(sycl_device);
   1084 test_patch_no_extra_dim_sycl<DataType, int64_t>(sycl_device);
   1085 test_imagenet_patches_sycl<DataType, int64_t>(sycl_device);
   1086 }
   1087 EIGEN_DECLARE_TEST(cxx11_tensor_image_patch_sycl)
   1088 {
   1089 for (const auto& device :Eigen::get_sycl_supported_devices()) {
   1090   CALL_SUBTEST(sycl_tensor_image_patch_test_per_device<float>(device));
   1091 }
   1092 }