cart-elc

Source code for CART-ELC
git clone git://git.laack.co/cart-elc.git
Log | Files | Refs | README | LICENSE

cxx11_tensor_chipping_sycl.cpp (26158B)


      1 // This file is part of Eigen, a lightweight C++ template library
      2 // for linear algebra.
      3 //
      4 // Copyright (C) 2016
      5 // Mehdi Goli    Codeplay Software Ltd.
      6 // Ralph Potter  Codeplay Software Ltd.
      7 // Luke Iwanski  Codeplay Software Ltd.
      8 // Contact: <eigen@codeplay.com>
      9 // Benoit Steiner <benoit.steiner.goog@gmail.com>
     10 //
     11 // This Source Code Form is subject to the terms of the Mozilla
     12 // Public License v. 2.0. If a copy of the MPL was not distributed
     13 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
     14 
     15 
     16 #define EIGEN_TEST_NO_LONGDOUBLE
     17 #define EIGEN_TEST_NO_COMPLEX
     18 
     19 #define EIGEN_DEFAULT_DENSE_INDEX_TYPE int64_t
     20 #define EIGEN_USE_SYCL
     21 
     22 #include "main.h"
     23 
     24 #include <Eigen/CXX11/Tensor>
     25 
     26 using Eigen::Tensor;
     27 
     28 template <typename DataType, int DataLayout, typename IndexType>
     29 static void test_static_chip_sycl(const Eigen::SyclDevice& sycl_device)
     30 {
     31   IndexType sizeDim1 = 2;
     32   IndexType sizeDim2 = 3;
     33   IndexType sizeDim3 = 5;
     34   IndexType sizeDim4 = 7;
     35   IndexType sizeDim5 = 11;
     36 
     37   array<IndexType, 5> tensorRange = {{sizeDim1, sizeDim2, sizeDim3, sizeDim4, sizeDim5}};
     38   array<IndexType, 4> chip1TensorRange = {{sizeDim2, sizeDim3, sizeDim4, sizeDim5}};
     39 
     40   Tensor<DataType, 5, DataLayout,IndexType> tensor(tensorRange);
     41   Tensor<DataType, 4, DataLayout,IndexType> chip1(chip1TensorRange);
     42 
     43   tensor.setRandom();
     44 
     45   const size_t tensorBuffSize =tensor.size()*sizeof(DataType);
     46   const size_t chip1TensorBuffSize =chip1.size()*sizeof(DataType);
     47   DataType* gpu_data_tensor  = static_cast<DataType*>(sycl_device.allocate(tensorBuffSize));
     48   DataType* gpu_data_chip1  = static_cast<DataType*>(sycl_device.allocate(chip1TensorBuffSize));
     49 
     50   TensorMap<Tensor<DataType, 5, DataLayout,IndexType>> gpu_tensor(gpu_data_tensor, tensorRange);
     51   TensorMap<Tensor<DataType, 4, DataLayout,IndexType>> gpu_chip1(gpu_data_chip1, chip1TensorRange);
     52 
     53   sycl_device.memcpyHostToDevice(gpu_data_tensor, tensor.data(), tensorBuffSize);
     54   gpu_chip1.device(sycl_device)=gpu_tensor.template chip<0l>(1l);
     55   sycl_device.memcpyDeviceToHost(chip1.data(), gpu_data_chip1, chip1TensorBuffSize);
     56 
     57   VERIFY_IS_EQUAL(chip1.dimension(0), sizeDim2);
     58   VERIFY_IS_EQUAL(chip1.dimension(1), sizeDim3);
     59   VERIFY_IS_EQUAL(chip1.dimension(2), sizeDim4);
     60   VERIFY_IS_EQUAL(chip1.dimension(3), sizeDim5);
     61 
     62   for (IndexType i = 0; i < sizeDim2; ++i) {
     63     for (IndexType j = 0; j < sizeDim3; ++j) {
     64       for (IndexType k = 0; k < sizeDim4; ++k) {
     65         for (IndexType l = 0; l < sizeDim5; ++l) {
     66           VERIFY_IS_EQUAL(chip1(i,j,k,l), tensor(1l,i,j,k,l));
     67         }
     68       }
     69     }
     70   }
     71 
     72   array<IndexType, 4> chip2TensorRange = {{sizeDim1, sizeDim3, sizeDim4, sizeDim5}};
     73   Tensor<DataType, 4, DataLayout,IndexType> chip2(chip2TensorRange);
     74   const size_t chip2TensorBuffSize =chip2.size()*sizeof(DataType);
     75   DataType* gpu_data_chip2  = static_cast<DataType*>(sycl_device.allocate(chip2TensorBuffSize));
     76   TensorMap<Tensor<DataType, 4, DataLayout,IndexType>> gpu_chip2(gpu_data_chip2, chip2TensorRange);
     77 
     78   gpu_chip2.device(sycl_device)=gpu_tensor.template chip<1l>(1l);
     79   sycl_device.memcpyDeviceToHost(chip2.data(), gpu_data_chip2, chip2TensorBuffSize);
     80 
     81   VERIFY_IS_EQUAL(chip2.dimension(0), sizeDim1);
     82   VERIFY_IS_EQUAL(chip2.dimension(1), sizeDim3);
     83   VERIFY_IS_EQUAL(chip2.dimension(2), sizeDim4);
     84   VERIFY_IS_EQUAL(chip2.dimension(3), sizeDim5);
     85 
     86   for (IndexType i = 0; i < sizeDim1; ++i) {
     87     for (IndexType j = 0; j < sizeDim3; ++j) {
     88       for (IndexType k = 0; k < sizeDim4; ++k) {
     89         for (IndexType l = 0; l < sizeDim5; ++l) {
     90           VERIFY_IS_EQUAL(chip2(i,j,k,l), tensor(i,1l,j,k,l));
     91         }
     92       }
     93     }
     94   }
     95 
     96   array<IndexType, 4> chip3TensorRange = {{sizeDim1, sizeDim2, sizeDim4, sizeDim5}};
     97   Tensor<DataType, 4, DataLayout,IndexType> chip3(chip3TensorRange);
     98   const size_t chip3TensorBuffSize =chip3.size()*sizeof(DataType);
     99   DataType* gpu_data_chip3  = static_cast<DataType*>(sycl_device.allocate(chip3TensorBuffSize));
    100   TensorMap<Tensor<DataType, 4, DataLayout,IndexType>> gpu_chip3(gpu_data_chip3, chip3TensorRange);
    101 
    102   gpu_chip3.device(sycl_device)=gpu_tensor.template chip<2l>(2l);
    103   sycl_device.memcpyDeviceToHost(chip3.data(), gpu_data_chip3, chip3TensorBuffSize);
    104 
    105   VERIFY_IS_EQUAL(chip3.dimension(0), sizeDim1);
    106   VERIFY_IS_EQUAL(chip3.dimension(1), sizeDim2);
    107   VERIFY_IS_EQUAL(chip3.dimension(2), sizeDim4);
    108   VERIFY_IS_EQUAL(chip3.dimension(3), sizeDim5);
    109 
    110   for (IndexType i = 0; i < sizeDim1; ++i) {
    111     for (IndexType j = 0; j < sizeDim2; ++j) {
    112       for (IndexType k = 0; k < sizeDim4; ++k) {
    113         for (IndexType l = 0; l < sizeDim5; ++l) {
    114           VERIFY_IS_EQUAL(chip3(i,j,k,l), tensor(i,j,2l,k,l));
    115         }
    116       }
    117     }
    118   }
    119 
    120   array<IndexType, 4> chip4TensorRange = {{sizeDim1, sizeDim2, sizeDim3, sizeDim5}};
    121   Tensor<DataType, 4, DataLayout,IndexType> chip4(chip4TensorRange);
    122   const size_t chip4TensorBuffSize =chip4.size()*sizeof(DataType);
    123   DataType* gpu_data_chip4  = static_cast<DataType*>(sycl_device.allocate(chip4TensorBuffSize));
    124   TensorMap<Tensor<DataType, 4, DataLayout,IndexType>> gpu_chip4(gpu_data_chip4, chip4TensorRange);
    125 
    126   gpu_chip4.device(sycl_device)=gpu_tensor.template chip<3l>(5l);
    127   sycl_device.memcpyDeviceToHost(chip4.data(), gpu_data_chip4, chip4TensorBuffSize);
    128 
    129   VERIFY_IS_EQUAL(chip4.dimension(0), sizeDim1);
    130   VERIFY_IS_EQUAL(chip4.dimension(1), sizeDim2);
    131   VERIFY_IS_EQUAL(chip4.dimension(2), sizeDim3);
    132   VERIFY_IS_EQUAL(chip4.dimension(3), sizeDim5);
    133 
    134   for (IndexType i = 0; i < sizeDim1; ++i) {
    135     for (IndexType j = 0; j < sizeDim2; ++j) {
    136       for (IndexType k = 0; k < sizeDim3; ++k) {
    137         for (IndexType l = 0; l < sizeDim5; ++l) {
    138           VERIFY_IS_EQUAL(chip4(i,j,k,l), tensor(i,j,k,5l,l));
    139         }
    140       }
    141     }
    142   }
    143 
    144 
    145   array<IndexType, 4> chip5TensorRange = {{sizeDim1, sizeDim2, sizeDim3, sizeDim4}};
    146   Tensor<DataType, 4, DataLayout,IndexType> chip5(chip5TensorRange);
    147   const size_t chip5TensorBuffSize =chip5.size()*sizeof(DataType);
    148   DataType* gpu_data_chip5  = static_cast<DataType*>(sycl_device.allocate(chip5TensorBuffSize));
    149   TensorMap<Tensor<DataType, 4, DataLayout,IndexType>> gpu_chip5(gpu_data_chip5, chip5TensorRange);
    150 
    151   gpu_chip5.device(sycl_device)=gpu_tensor.template chip<4l>(7l);
    152   sycl_device.memcpyDeviceToHost(chip5.data(), gpu_data_chip5, chip5TensorBuffSize);
    153 
    154   VERIFY_IS_EQUAL(chip5.dimension(0), sizeDim1);
    155   VERIFY_IS_EQUAL(chip5.dimension(1), sizeDim2);
    156   VERIFY_IS_EQUAL(chip5.dimension(2), sizeDim3);
    157   VERIFY_IS_EQUAL(chip5.dimension(3), sizeDim4);
    158 
    159   for (IndexType i = 0; i < sizeDim1; ++i) {
    160     for (IndexType j = 0; j < sizeDim2; ++j) {
    161       for (IndexType k = 0; k < sizeDim3; ++k) {
    162         for (IndexType l = 0; l < sizeDim4; ++l) {
    163           VERIFY_IS_EQUAL(chip5(i,j,k,l), tensor(i,j,k,l,7l));
    164         }
    165       }
    166     }
    167   }
    168 
    169   sycl_device.deallocate(gpu_data_tensor);
    170   sycl_device.deallocate(gpu_data_chip1);
    171   sycl_device.deallocate(gpu_data_chip2);
    172   sycl_device.deallocate(gpu_data_chip3);
    173   sycl_device.deallocate(gpu_data_chip4);
    174   sycl_device.deallocate(gpu_data_chip5);
    175 }
    176 
    177 template <typename DataType, int DataLayout, typename IndexType>
    178 static void test_dynamic_chip_sycl(const Eigen::SyclDevice& sycl_device)
    179 {
    180   IndexType sizeDim1 = 2;
    181   IndexType sizeDim2 = 3;
    182   IndexType sizeDim3 = 5;
    183   IndexType sizeDim4 = 7;
    184   IndexType sizeDim5 = 11;
    185 
    186   array<IndexType, 5> tensorRange = {{sizeDim1, sizeDim2, sizeDim3, sizeDim4, sizeDim5}};
    187   array<IndexType, 4> chip1TensorRange = {{sizeDim2, sizeDim3, sizeDim4, sizeDim5}};
    188 
    189   Tensor<DataType, 5, DataLayout,IndexType> tensor(tensorRange);
    190   Tensor<DataType, 4, DataLayout,IndexType> chip1(chip1TensorRange);
    191 
    192   tensor.setRandom();
    193 
    194   const size_t tensorBuffSize =tensor.size()*sizeof(DataType);
    195   const size_t chip1TensorBuffSize =chip1.size()*sizeof(DataType);
    196   DataType* gpu_data_tensor  = static_cast<DataType*>(sycl_device.allocate(tensorBuffSize));
    197   DataType* gpu_data_chip1  = static_cast<DataType*>(sycl_device.allocate(chip1TensorBuffSize));
    198 
    199   TensorMap<Tensor<DataType, 5, DataLayout,IndexType>> gpu_tensor(gpu_data_tensor, tensorRange);
    200   TensorMap<Tensor<DataType, 4, DataLayout,IndexType>> gpu_chip1(gpu_data_chip1, chip1TensorRange);
    201 
    202   sycl_device.memcpyHostToDevice(gpu_data_tensor, tensor.data(), tensorBuffSize);
    203   gpu_chip1.device(sycl_device)=gpu_tensor.chip(1l,0l);
    204   sycl_device.memcpyDeviceToHost(chip1.data(), gpu_data_chip1, chip1TensorBuffSize);
    205 
    206   VERIFY_IS_EQUAL(chip1.dimension(0), sizeDim2);
    207   VERIFY_IS_EQUAL(chip1.dimension(1), sizeDim3);
    208   VERIFY_IS_EQUAL(chip1.dimension(2), sizeDim4);
    209   VERIFY_IS_EQUAL(chip1.dimension(3), sizeDim5);
    210 
    211   for (IndexType i = 0; i < sizeDim2; ++i) {
    212     for (IndexType j = 0; j < sizeDim3; ++j) {
    213       for (IndexType k = 0; k < sizeDim4; ++k) {
    214         for (IndexType l = 0; l < sizeDim5; ++l) {
    215           VERIFY_IS_EQUAL(chip1(i,j,k,l), tensor(1l,i,j,k,l));
    216         }
    217       }
    218     }
    219   }
    220 
    221   array<IndexType, 4> chip2TensorRange = {{sizeDim1, sizeDim3, sizeDim4, sizeDim5}};
    222   Tensor<DataType, 4, DataLayout,IndexType> chip2(chip2TensorRange);
    223   const size_t chip2TensorBuffSize =chip2.size()*sizeof(DataType);
    224   DataType* gpu_data_chip2  = static_cast<DataType*>(sycl_device.allocate(chip2TensorBuffSize));
    225   TensorMap<Tensor<DataType, 4, DataLayout,IndexType>> gpu_chip2(gpu_data_chip2, chip2TensorRange);
    226 
    227   gpu_chip2.device(sycl_device)=gpu_tensor.chip(1l,1l);
    228   sycl_device.memcpyDeviceToHost(chip2.data(), gpu_data_chip2, chip2TensorBuffSize);
    229 
    230   VERIFY_IS_EQUAL(chip2.dimension(0), sizeDim1);
    231   VERIFY_IS_EQUAL(chip2.dimension(1), sizeDim3);
    232   VERIFY_IS_EQUAL(chip2.dimension(2), sizeDim4);
    233   VERIFY_IS_EQUAL(chip2.dimension(3), sizeDim5);
    234 
    235   for (IndexType i = 0; i < sizeDim1; ++i) {
    236     for (IndexType j = 0; j < sizeDim3; ++j) {
    237       for (IndexType k = 0; k < sizeDim4; ++k) {
    238         for (IndexType l = 0; l < sizeDim5; ++l) {
    239           VERIFY_IS_EQUAL(chip2(i,j,k,l), tensor(i,1l,j,k,l));
    240         }
    241       }
    242     }
    243   }
    244 
    245   array<IndexType, 4> chip3TensorRange = {{sizeDim1, sizeDim2, sizeDim4, sizeDim5}};
    246   Tensor<DataType, 4, DataLayout,IndexType> chip3(chip3TensorRange);
    247   const size_t chip3TensorBuffSize =chip3.size()*sizeof(DataType);
    248   DataType* gpu_data_chip3  = static_cast<DataType*>(sycl_device.allocate(chip3TensorBuffSize));
    249   TensorMap<Tensor<DataType, 4, DataLayout,IndexType>> gpu_chip3(gpu_data_chip3, chip3TensorRange);
    250 
    251   gpu_chip3.device(sycl_device)=gpu_tensor.chip(2l,2l);
    252   sycl_device.memcpyDeviceToHost(chip3.data(), gpu_data_chip3, chip3TensorBuffSize);
    253 
    254   VERIFY_IS_EQUAL(chip3.dimension(0), sizeDim1);
    255   VERIFY_IS_EQUAL(chip3.dimension(1), sizeDim2);
    256   VERIFY_IS_EQUAL(chip3.dimension(2), sizeDim4);
    257   VERIFY_IS_EQUAL(chip3.dimension(3), sizeDim5);
    258 
    259   for (IndexType i = 0; i < sizeDim1; ++i) {
    260     for (IndexType j = 0; j < sizeDim2; ++j) {
    261       for (IndexType k = 0; k < sizeDim4; ++k) {
    262         for (IndexType l = 0; l < sizeDim5; ++l) {
    263           VERIFY_IS_EQUAL(chip3(i,j,k,l), tensor(i,j,2l,k,l));
    264         }
    265       }
    266     }
    267   }
    268 
    269   array<IndexType, 4> chip4TensorRange = {{sizeDim1, sizeDim2, sizeDim3, sizeDim5}};
    270   Tensor<DataType, 4, DataLayout,IndexType> chip4(chip4TensorRange);
    271   const size_t chip4TensorBuffSize =chip4.size()*sizeof(DataType);
    272   DataType* gpu_data_chip4  = static_cast<DataType*>(sycl_device.allocate(chip4TensorBuffSize));
    273   TensorMap<Tensor<DataType, 4, DataLayout,IndexType>> gpu_chip4(gpu_data_chip4, chip4TensorRange);
    274 
    275   gpu_chip4.device(sycl_device)=gpu_tensor.chip(5l,3l);
    276   sycl_device.memcpyDeviceToHost(chip4.data(), gpu_data_chip4, chip4TensorBuffSize);
    277 
    278   VERIFY_IS_EQUAL(chip4.dimension(0), sizeDim1);
    279   VERIFY_IS_EQUAL(chip4.dimension(1), sizeDim2);
    280   VERIFY_IS_EQUAL(chip4.dimension(2), sizeDim3);
    281   VERIFY_IS_EQUAL(chip4.dimension(3), sizeDim5);
    282 
    283   for (IndexType i = 0; i < sizeDim1; ++i) {
    284     for (IndexType j = 0; j < sizeDim2; ++j) {
    285       for (IndexType k = 0; k < sizeDim3; ++k) {
    286         for (IndexType l = 0; l < sizeDim5; ++l) {
    287           VERIFY_IS_EQUAL(chip4(i,j,k,l), tensor(i,j,k,5l,l));
    288         }
    289       }
    290     }
    291   }
    292 
    293 
    294   array<IndexType, 4> chip5TensorRange = {{sizeDim1, sizeDim2, sizeDim3, sizeDim4}};
    295   Tensor<DataType, 4, DataLayout,IndexType> chip5(chip5TensorRange);
    296   const size_t chip5TensorBuffSize =chip5.size()*sizeof(DataType);
    297   DataType* gpu_data_chip5  = static_cast<DataType*>(sycl_device.allocate(chip5TensorBuffSize));
    298   TensorMap<Tensor<DataType, 4, DataLayout,IndexType>> gpu_chip5(gpu_data_chip5, chip5TensorRange);
    299 
    300   gpu_chip5.device(sycl_device)=gpu_tensor.chip(7l,4l);
    301   sycl_device.memcpyDeviceToHost(chip5.data(), gpu_data_chip5, chip5TensorBuffSize);
    302 
    303   VERIFY_IS_EQUAL(chip5.dimension(0), sizeDim1);
    304   VERIFY_IS_EQUAL(chip5.dimension(1), sizeDim2);
    305   VERIFY_IS_EQUAL(chip5.dimension(2), sizeDim3);
    306   VERIFY_IS_EQUAL(chip5.dimension(3), sizeDim4);
    307 
    308   for (IndexType i = 0; i < sizeDim1; ++i) {
    309     for (IndexType j = 0; j < sizeDim2; ++j) {
    310       for (IndexType k = 0; k < sizeDim3; ++k) {
    311         for (IndexType l = 0; l < sizeDim4; ++l) {
    312           VERIFY_IS_EQUAL(chip5(i,j,k,l), tensor(i,j,k,l,7l));
    313         }
    314       }
    315     }
    316   }
    317   sycl_device.deallocate(gpu_data_tensor);
    318   sycl_device.deallocate(gpu_data_chip1);
    319   sycl_device.deallocate(gpu_data_chip2);
    320   sycl_device.deallocate(gpu_data_chip3);
    321   sycl_device.deallocate(gpu_data_chip4);
    322   sycl_device.deallocate(gpu_data_chip5);
    323 }
    324 
    325 template <typename DataType, int DataLayout, typename IndexType>
    326 static void test_chip_in_expr(const Eigen::SyclDevice& sycl_device) {
    327 
    328   IndexType sizeDim1 = 2;
    329   IndexType sizeDim2 = 3;
    330   IndexType sizeDim3 = 5;
    331   IndexType sizeDim4 = 7;
    332   IndexType sizeDim5 = 11;
    333 
    334   array<IndexType, 5> tensorRange = {{sizeDim1, sizeDim2, sizeDim3, sizeDim4, sizeDim5}};
    335   array<IndexType, 4> chip1TensorRange = {{sizeDim2, sizeDim3, sizeDim4, sizeDim5}};
    336 
    337   Tensor<DataType, 5, DataLayout,IndexType> tensor(tensorRange);
    338 
    339   Tensor<DataType, 4, DataLayout,IndexType> chip1(chip1TensorRange);
    340   Tensor<DataType, 4, DataLayout,IndexType> tensor1(chip1TensorRange);
    341   tensor.setRandom();
    342   tensor1.setRandom();
    343 
    344   const size_t tensorBuffSize =tensor.size()*sizeof(DataType);
    345   const size_t chip1TensorBuffSize =chip1.size()*sizeof(DataType);
    346   DataType* gpu_data_tensor  = static_cast<DataType*>(sycl_device.allocate(tensorBuffSize));
    347   DataType* gpu_data_chip1  = static_cast<DataType*>(sycl_device.allocate(chip1TensorBuffSize));
    348   DataType* gpu_data_tensor1  = static_cast<DataType*>(sycl_device.allocate(chip1TensorBuffSize));
    349 
    350   TensorMap<Tensor<DataType, 5, DataLayout,IndexType>> gpu_tensor(gpu_data_tensor, tensorRange);
    351   TensorMap<Tensor<DataType, 4, DataLayout,IndexType>> gpu_chip1(gpu_data_chip1, chip1TensorRange);
    352   TensorMap<Tensor<DataType, 4, DataLayout,IndexType>> gpu_tensor1(gpu_data_tensor1, chip1TensorRange);
    353 
    354 
    355   sycl_device.memcpyHostToDevice(gpu_data_tensor, tensor.data(), tensorBuffSize);
    356   sycl_device.memcpyHostToDevice(gpu_data_tensor1, tensor1.data(), chip1TensorBuffSize);
    357   gpu_chip1.device(sycl_device)=gpu_tensor.template chip<0l>(0l) + gpu_tensor1;
    358   sycl_device.memcpyDeviceToHost(chip1.data(), gpu_data_chip1, chip1TensorBuffSize);
    359 
    360   for (int i = 0; i < sizeDim2; ++i) {
    361     for (int j = 0; j < sizeDim3; ++j) {
    362       for (int k = 0; k < sizeDim4; ++k) {
    363         for (int l = 0; l < sizeDim5; ++l) {
    364           float expected = tensor(0l,i,j,k,l) + tensor1(i,j,k,l);
    365           VERIFY_IS_EQUAL(chip1(i,j,k,l), expected);
    366         }
    367       }
    368     }
    369   }
    370 
    371   array<IndexType, 3> chip2TensorRange = {{sizeDim2, sizeDim4, sizeDim5}};
    372   Tensor<DataType, 3, DataLayout,IndexType> tensor2(chip2TensorRange);
    373   Tensor<DataType, 3, DataLayout,IndexType> chip2(chip2TensorRange);
    374   tensor2.setRandom();
    375   const size_t chip2TensorBuffSize =tensor2.size()*sizeof(DataType);
    376   DataType* gpu_data_tensor2  = static_cast<DataType*>(sycl_device.allocate(chip2TensorBuffSize));
    377   DataType* gpu_data_chip2  = static_cast<DataType*>(sycl_device.allocate(chip2TensorBuffSize));
    378   TensorMap<Tensor<DataType, 3, DataLayout,IndexType>> gpu_tensor2(gpu_data_tensor2, chip2TensorRange);
    379   TensorMap<Tensor<DataType, 3, DataLayout,IndexType>> gpu_chip2(gpu_data_chip2, chip2TensorRange);
    380 
    381   sycl_device.memcpyHostToDevice(gpu_data_tensor2, tensor2.data(), chip2TensorBuffSize);
    382   gpu_chip2.device(sycl_device)=gpu_tensor.template chip<0l>(0l).template chip<1l>(2l) + gpu_tensor2;
    383   sycl_device.memcpyDeviceToHost(chip2.data(), gpu_data_chip2, chip2TensorBuffSize);
    384 
    385   for (int i = 0; i < sizeDim2; ++i) {
    386     for (int j = 0; j < sizeDim4; ++j) {
    387       for (int k = 0; k < sizeDim5; ++k) {
    388         float expected = tensor(0l,i,2l,j,k) + tensor2(i,j,k);
    389         VERIFY_IS_EQUAL(chip2(i,j,k), expected);
    390       }
    391     }
    392   }
    393   sycl_device.deallocate(gpu_data_tensor);
    394   sycl_device.deallocate(gpu_data_tensor1);
    395   sycl_device.deallocate(gpu_data_chip1);
    396   sycl_device.deallocate(gpu_data_tensor2);
    397   sycl_device.deallocate(gpu_data_chip2);
    398 }
    399 
    400 template <typename DataType, int DataLayout, typename IndexType>
    401 static void test_chip_as_lvalue_sycl(const Eigen::SyclDevice& sycl_device)
    402 {
    403 
    404   IndexType sizeDim1 = 2;
    405   IndexType sizeDim2 = 3;
    406   IndexType sizeDim3 = 5;
    407   IndexType sizeDim4 = 7;
    408   IndexType sizeDim5 = 11;
    409 
    410   array<IndexType, 5> tensorRange = {{sizeDim1, sizeDim2, sizeDim3, sizeDim4, sizeDim5}};
    411   array<IndexType, 4> input2TensorRange = {{sizeDim2, sizeDim3, sizeDim4, sizeDim5}};
    412 
    413   Tensor<DataType, 5, DataLayout,IndexType> tensor(tensorRange);
    414   Tensor<DataType, 5, DataLayout,IndexType> input1(tensorRange);
    415   Tensor<DataType, 4, DataLayout,IndexType> input2(input2TensorRange);
    416   input1.setRandom();
    417   input2.setRandom();
    418 
    419 
    420   const size_t tensorBuffSize =tensor.size()*sizeof(DataType);
    421   const size_t input2TensorBuffSize =input2.size()*sizeof(DataType);
    422   std::cout << tensorBuffSize << " , "<<  input2TensorBuffSize << std::endl;
    423   DataType* gpu_data_tensor  = static_cast<DataType*>(sycl_device.allocate(tensorBuffSize));
    424   DataType* gpu_data_input1  = static_cast<DataType*>(sycl_device.allocate(tensorBuffSize));
    425   DataType* gpu_data_input2  = static_cast<DataType*>(sycl_device.allocate(input2TensorBuffSize));
    426 
    427   TensorMap<Tensor<DataType, 5, DataLayout,IndexType>> gpu_tensor(gpu_data_tensor, tensorRange);
    428   TensorMap<Tensor<DataType, 5, DataLayout,IndexType>> gpu_input1(gpu_data_input1, tensorRange);
    429   TensorMap<Tensor<DataType, 4, DataLayout,IndexType>> gpu_input2(gpu_data_input2, input2TensorRange);
    430 
    431   sycl_device.memcpyHostToDevice(gpu_data_input1, input1.data(), tensorBuffSize);
    432   gpu_tensor.device(sycl_device)=gpu_input1;
    433   sycl_device.memcpyHostToDevice(gpu_data_input2, input2.data(), input2TensorBuffSize);
    434   gpu_tensor.template chip<0l>(1l).device(sycl_device)=gpu_input2;
    435   sycl_device.memcpyDeviceToHost(tensor.data(), gpu_data_tensor, tensorBuffSize);
    436 
    437   for (int i = 0; i < sizeDim1; ++i) {
    438     for (int j = 0; j < sizeDim2; ++j) {
    439       for (int k = 0; k < sizeDim3; ++k) {
    440         for (int l = 0; l < sizeDim4; ++l) {
    441           for (int m = 0; m < sizeDim5; ++m) {
    442             if (i != 1) {
    443               VERIFY_IS_EQUAL(tensor(i,j,k,l,m), input1(i,j,k,l,m));
    444             } else {
    445               VERIFY_IS_EQUAL(tensor(i,j,k,l,m), input2(j,k,l,m));
    446             }
    447           }
    448         }
    449       }
    450     }
    451   }
    452 
    453   gpu_tensor.device(sycl_device)=gpu_input1;
    454   array<IndexType, 4> input3TensorRange = {{sizeDim1, sizeDim3, sizeDim4, sizeDim5}};
    455   Tensor<DataType, 4, DataLayout,IndexType> input3(input3TensorRange);
    456   input3.setRandom();
    457 
    458   const size_t input3TensorBuffSize =input3.size()*sizeof(DataType);
    459   DataType* gpu_data_input3  = static_cast<DataType*>(sycl_device.allocate(input3TensorBuffSize));
    460   TensorMap<Tensor<DataType, 4, DataLayout,IndexType>> gpu_input3(gpu_data_input3, input3TensorRange);
    461 
    462   sycl_device.memcpyHostToDevice(gpu_data_input3, input3.data(), input3TensorBuffSize);
    463   gpu_tensor.template chip<1l>(1l).device(sycl_device)=gpu_input3;
    464   sycl_device.memcpyDeviceToHost(tensor.data(), gpu_data_tensor, tensorBuffSize);
    465 
    466   for (int i = 0; i < sizeDim1; ++i) {
    467     for (int j = 0; j < sizeDim2; ++j) {
    468       for (int k = 0; k <sizeDim3; ++k) {
    469         for (int l = 0; l < sizeDim4; ++l) {
    470           for (int m = 0; m < sizeDim5; ++m) {
    471             if (j != 1) {
    472               VERIFY_IS_EQUAL(tensor(i,j,k,l,m), input1(i,j,k,l,m));
    473             } else {
    474               VERIFY_IS_EQUAL(tensor(i,j,k,l,m), input3(i,k,l,m));
    475             }
    476           }
    477         }
    478       }
    479     }
    480   }
    481 
    482   gpu_tensor.device(sycl_device)=gpu_input1;
    483   array<IndexType, 4> input4TensorRange = {{sizeDim1, sizeDim2, sizeDim4, sizeDim5}};
    484   Tensor<DataType, 4, DataLayout,IndexType> input4(input4TensorRange);
    485   input4.setRandom();
    486 
    487   const size_t input4TensorBuffSize =input4.size()*sizeof(DataType);
    488   DataType* gpu_data_input4  = static_cast<DataType*>(sycl_device.allocate(input4TensorBuffSize));
    489   TensorMap<Tensor<DataType, 4, DataLayout,IndexType>> gpu_input4(gpu_data_input4, input4TensorRange);
    490 
    491   sycl_device.memcpyHostToDevice(gpu_data_input4, input4.data(), input4TensorBuffSize);
    492   gpu_tensor.template chip<2l>(3l).device(sycl_device)=gpu_input4;
    493   sycl_device.memcpyDeviceToHost(tensor.data(), gpu_data_tensor, tensorBuffSize);
    494 
    495   for (int i = 0; i < sizeDim1; ++i) {
    496     for (int j = 0; j < sizeDim2; ++j) {
    497       for (int k = 0; k <sizeDim3; ++k) {
    498         for (int l = 0; l < sizeDim4; ++l) {
    499           for (int m = 0; m < sizeDim5; ++m) {
    500             if (k != 3) {
    501               VERIFY_IS_EQUAL(tensor(i,j,k,l,m), input1(i,j,k,l,m));
    502             } else {
    503               VERIFY_IS_EQUAL(tensor(i,j,k,l,m), input4(i,j,l,m));
    504             }
    505           }
    506         }
    507       }
    508     }
    509   }
    510 
    511   gpu_tensor.device(sycl_device)=gpu_input1;
    512   array<IndexType, 4> input5TensorRange = {{sizeDim1, sizeDim2, sizeDim3, sizeDim5}};
    513   Tensor<DataType, 4, DataLayout,IndexType> input5(input5TensorRange);
    514   input5.setRandom();
    515 
    516   const size_t input5TensorBuffSize =input5.size()*sizeof(DataType);
    517   DataType* gpu_data_input5  = static_cast<DataType*>(sycl_device.allocate(input5TensorBuffSize));
    518   TensorMap<Tensor<DataType, 4, DataLayout,IndexType>> gpu_input5(gpu_data_input5, input5TensorRange);
    519 
    520   sycl_device.memcpyHostToDevice(gpu_data_input5, input5.data(), input5TensorBuffSize);
    521   gpu_tensor.template chip<3l>(4l).device(sycl_device)=gpu_input5;
    522   sycl_device.memcpyDeviceToHost(tensor.data(), gpu_data_tensor, tensorBuffSize);
    523 
    524   for (int i = 0; i < sizeDim1; ++i) {
    525     for (int j = 0; j < sizeDim2; ++j) {
    526       for (int k = 0; k <sizeDim3; ++k) {
    527         for (int l = 0; l < sizeDim4; ++l) {
    528           for (int m = 0; m < sizeDim5; ++m) {
    529             if (l != 4) {
    530               VERIFY_IS_EQUAL(tensor(i,j,k,l,m), input1(i,j,k,l,m));
    531             } else {
    532               VERIFY_IS_EQUAL(tensor(i,j,k,l,m), input5(i,j,k,m));
    533             }
    534           }
    535         }
    536       }
    537     }
    538   }
    539   gpu_tensor.device(sycl_device)=gpu_input1;
    540   array<IndexType, 4> input6TensorRange = {{sizeDim1, sizeDim2, sizeDim3, sizeDim4}};
    541   Tensor<DataType, 4, DataLayout,IndexType> input6(input6TensorRange);
    542   input6.setRandom();
    543 
    544   const size_t input6TensorBuffSize =input6.size()*sizeof(DataType);
    545   DataType* gpu_data_input6  = static_cast<DataType*>(sycl_device.allocate(input6TensorBuffSize));
    546   TensorMap<Tensor<DataType, 4, DataLayout,IndexType>> gpu_input6(gpu_data_input6, input6TensorRange);
    547 
    548   sycl_device.memcpyHostToDevice(gpu_data_input6, input6.data(), input6TensorBuffSize);
    549   gpu_tensor.template chip<4l>(5l).device(sycl_device)=gpu_input6;
    550   sycl_device.memcpyDeviceToHost(tensor.data(), gpu_data_tensor, tensorBuffSize);
    551 
    552   for (int i = 0; i < sizeDim1; ++i) {
    553     for (int j = 0; j < sizeDim2; ++j) {
    554       for (int k = 0; k <sizeDim3; ++k) {
    555         for (int l = 0; l < sizeDim4; ++l) {
    556           for (int m = 0; m < sizeDim5; ++m) {
    557             if (m != 5) {
    558               VERIFY_IS_EQUAL(tensor(i,j,k,l,m), input1(i,j,k,l,m));
    559             } else {
    560               VERIFY_IS_EQUAL(tensor(i,j,k,l,m), input6(i,j,k,l));
    561             }
    562           }
    563         }
    564       }
    565     }
    566   }
    567 
    568 
    569   gpu_tensor.device(sycl_device)=gpu_input1;
    570   Tensor<DataType, 5, DataLayout,IndexType> input7(tensorRange);
    571   input7.setRandom();
    572 
    573   DataType* gpu_data_input7  = static_cast<DataType*>(sycl_device.allocate(tensorBuffSize));
    574   TensorMap<Tensor<DataType, 5, DataLayout,IndexType>> gpu_input7(gpu_data_input7, tensorRange);
    575 
    576   sycl_device.memcpyHostToDevice(gpu_data_input7, input7.data(), tensorBuffSize);
    577   gpu_tensor.chip(0l,0l).device(sycl_device)=gpu_input7.chip(0l,0l);
    578   sycl_device.memcpyDeviceToHost(tensor.data(), gpu_data_tensor, tensorBuffSize);
    579 
    580   for (int i = 0; i < sizeDim1; ++i) {
    581     for (int j = 0; j < sizeDim2; ++j) {
    582       for (int k = 0; k <sizeDim3; ++k) {
    583         for (int l = 0; l < sizeDim4; ++l) {
    584           for (int m = 0; m < sizeDim5; ++m) {
    585             if (i != 0) {
    586               VERIFY_IS_EQUAL(tensor(i,j,k,l,m), input1(i,j,k,l,m));
    587             } else {
    588               VERIFY_IS_EQUAL(tensor(i,j,k,l,m), input7(i,j,k,l,m));
    589             }
    590           }
    591         }
    592       }
    593     }
    594   }
    595   sycl_device.deallocate(gpu_data_tensor);
    596   sycl_device.deallocate(gpu_data_input1);
    597   sycl_device.deallocate(gpu_data_input2);
    598   sycl_device.deallocate(gpu_data_input3);
    599   sycl_device.deallocate(gpu_data_input4);
    600   sycl_device.deallocate(gpu_data_input5);
    601   sycl_device.deallocate(gpu_data_input6);
    602   sycl_device.deallocate(gpu_data_input7);
    603 
    604 }
    605 
    606 template<typename DataType, typename dev_Selector> void sycl_chipping_test_per_device(dev_Selector s){
    607   QueueInterface queueInterface(s);
    608   auto sycl_device = Eigen::SyclDevice(&queueInterface);
    609  /* test_static_chip_sycl<DataType, RowMajor, int64_t>(sycl_device);
    610   test_static_chip_sycl<DataType, ColMajor, int64_t>(sycl_device);
    611   test_dynamic_chip_sycl<DataType, RowMajor, int64_t>(sycl_device);
    612   test_dynamic_chip_sycl<DataType, ColMajor, int64_t>(sycl_device);
    613   test_chip_in_expr<DataType, RowMajor, int64_t>(sycl_device);
    614   test_chip_in_expr<DataType, ColMajor, int64_t>(sycl_device);*/
    615   test_chip_as_lvalue_sycl<DataType, RowMajor, int64_t>(sycl_device);
    616  // test_chip_as_lvalue_sycl<DataType, ColMajor, int64_t>(sycl_device);
    617 }
    618 EIGEN_DECLARE_TEST(cxx11_tensor_chipping_sycl)
    619 {
    620   for (const auto& device :Eigen::get_sycl_supported_devices()) {
    621     CALL_SUBTEST(sycl_chipping_test_per_device<float>(device));
    622   }
    623 }