cart-elc

Source code for CART-ELC
git clone git://git.laack.co/cart-elc.git
Log | Files | Refs | README | LICENSE

cxx11_tensor_concatenation_sycl.cpp (8411B)


      1 // This file is part of Eigen, a lightweight C++ template library
      2 // for linear algebra.
      3 //
      4 // Copyright (C) 2016
      5 // Mehdi Goli    Codeplay Software Ltd.
      6 // Ralph Potter  Codeplay Software Ltd.
      7 // Luke Iwanski  Codeplay Software Ltd.
      8 // Contact: <eigen@codeplay.com>
      9 //
     10 // This Source Code Form is subject to the terms of the Mozilla
     11 // Public License v. 2.0. If a copy of the MPL was not distributed
     12 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
     13 
     14 #define EIGEN_TEST_NO_LONGDOUBLE
     15 #define EIGEN_TEST_NO_COMPLEX
     16 
     17 #define EIGEN_DEFAULT_DENSE_INDEX_TYPE int64_t
     18 #define EIGEN_USE_SYCL
     19 
     20 #include "main.h"
     21 #include <unsupported/Eigen/CXX11/Tensor>
     22 
     23 using Eigen::Tensor;
     24 
     25 template<typename DataType, int DataLayout, typename IndexType>
     26 static void test_simple_concatenation(const Eigen::SyclDevice& sycl_device)
     27 {
     28   IndexType leftDim1 = 2;
     29   IndexType leftDim2 = 3;
     30   IndexType leftDim3 = 1;
     31   Eigen::array<IndexType, 3> leftRange = {{leftDim1, leftDim2, leftDim3}};
     32   IndexType rightDim1 = 2;
     33   IndexType rightDim2 = 3;
     34   IndexType rightDim3 = 1;
     35   Eigen::array<IndexType, 3> rightRange = {{rightDim1, rightDim2, rightDim3}};
     36 
     37   //IndexType concatDim1 = 3;
     38 //	IndexType concatDim2 = 3;
     39 //	IndexType concatDim3 = 1;
     40   //Eigen::array<IndexType, 3> concatRange = {{concatDim1, concatDim2, concatDim3}};
     41 
     42   Tensor<DataType, 3, DataLayout, IndexType> left(leftRange);
     43   Tensor<DataType, 3, DataLayout, IndexType> right(rightRange);
     44   left.setRandom();
     45   right.setRandom();
     46 
     47   DataType * gpu_in1_data  = static_cast<DataType*>(sycl_device.allocate(left.dimensions().TotalSize()*sizeof(DataType)));
     48   DataType * gpu_in2_data  = static_cast<DataType*>(sycl_device.allocate(right.dimensions().TotalSize()*sizeof(DataType)));
     49 
     50   Eigen::TensorMap<Eigen::Tensor<DataType, 3, DataLayout, IndexType>> gpu_in1(gpu_in1_data, leftRange);
     51   Eigen::TensorMap<Eigen::Tensor<DataType, 3, DataLayout, IndexType>> gpu_in2(gpu_in2_data, rightRange);
     52   sycl_device.memcpyHostToDevice(gpu_in1_data, left.data(),(left.dimensions().TotalSize())*sizeof(DataType));
     53   sycl_device.memcpyHostToDevice(gpu_in2_data, right.data(),(right.dimensions().TotalSize())*sizeof(DataType));
     54   ///
     55   Tensor<DataType, 3, DataLayout, IndexType> concatenation1(leftDim1+rightDim1, leftDim2, leftDim3);
     56   DataType * gpu_out_data1 =  static_cast<DataType*>(sycl_device.allocate(concatenation1.dimensions().TotalSize()*sizeof(DataType)));
     57   Eigen::TensorMap<Eigen::Tensor<DataType, 3, DataLayout, IndexType>> gpu_out1(gpu_out_data1, concatenation1.dimensions());
     58 
     59   //concatenation = left.concatenate(right, 0);
     60   gpu_out1.device(sycl_device) =gpu_in1.concatenate(gpu_in2, 0);
     61   sycl_device.memcpyDeviceToHost(concatenation1.data(), gpu_out_data1,(concatenation1.dimensions().TotalSize())*sizeof(DataType));
     62 
     63   VERIFY_IS_EQUAL(concatenation1.dimension(0), 4);
     64   VERIFY_IS_EQUAL(concatenation1.dimension(1), 3);
     65   VERIFY_IS_EQUAL(concatenation1.dimension(2), 1);
     66   for (IndexType j = 0; j < 3; ++j) {
     67     for (IndexType i = 0; i < 2; ++i) {
     68       VERIFY_IS_EQUAL(concatenation1(i, j, 0), left(i, j, 0));
     69     }
     70     for (IndexType i = 2; i < 4; ++i) {
     71       VERIFY_IS_EQUAL(concatenation1(i, j, 0), right(i - 2, j, 0));
     72     }
     73   }
     74 
     75   sycl_device.deallocate(gpu_out_data1);
     76   Tensor<DataType, 3, DataLayout, IndexType> concatenation2(leftDim1, leftDim2 +rightDim2, leftDim3);
     77   DataType * gpu_out_data2 =  static_cast<DataType*>(sycl_device.allocate(concatenation2.dimensions().TotalSize()*sizeof(DataType)));
     78   Eigen::TensorMap<Eigen::Tensor<DataType, 3, DataLayout, IndexType>> gpu_out2(gpu_out_data2, concatenation2.dimensions());
     79   gpu_out2.device(sycl_device) =gpu_in1.concatenate(gpu_in2, 1);
     80   sycl_device.memcpyDeviceToHost(concatenation2.data(), gpu_out_data2,(concatenation2.dimensions().TotalSize())*sizeof(DataType));
     81 
     82   //concatenation = left.concatenate(right, 1);
     83   VERIFY_IS_EQUAL(concatenation2.dimension(0), 2);
     84   VERIFY_IS_EQUAL(concatenation2.dimension(1), 6);
     85   VERIFY_IS_EQUAL(concatenation2.dimension(2), 1);
     86   for (IndexType i = 0; i < 2; ++i) {
     87     for (IndexType j = 0; j < 3; ++j) {
     88       VERIFY_IS_EQUAL(concatenation2(i, j, 0), left(i, j, 0));
     89     }
     90     for (IndexType j = 3; j < 6; ++j) {
     91       VERIFY_IS_EQUAL(concatenation2(i, j, 0), right(i, j - 3, 0));
     92     }
     93   }
     94   sycl_device.deallocate(gpu_out_data2);
     95   Tensor<DataType, 3, DataLayout, IndexType> concatenation3(leftDim1, leftDim2, leftDim3+rightDim3);
     96   DataType * gpu_out_data3 =  static_cast<DataType*>(sycl_device.allocate(concatenation3.dimensions().TotalSize()*sizeof(DataType)));
     97   Eigen::TensorMap<Eigen::Tensor<DataType, 3, DataLayout, IndexType>> gpu_out3(gpu_out_data3, concatenation3.dimensions());
     98   gpu_out3.device(sycl_device) =gpu_in1.concatenate(gpu_in2, 2);
     99   sycl_device.memcpyDeviceToHost(concatenation3.data(), gpu_out_data3,(concatenation3.dimensions().TotalSize())*sizeof(DataType));
    100 
    101   //concatenation = left.concatenate(right, 2);
    102   VERIFY_IS_EQUAL(concatenation3.dimension(0), 2);
    103   VERIFY_IS_EQUAL(concatenation3.dimension(1), 3);
    104   VERIFY_IS_EQUAL(concatenation3.dimension(2), 2);
    105   for (IndexType i = 0; i < 2; ++i) {
    106     for (IndexType j = 0; j < 3; ++j) {
    107       VERIFY_IS_EQUAL(concatenation3(i, j, 0), left(i, j, 0));
    108       VERIFY_IS_EQUAL(concatenation3(i, j, 1), right(i, j, 0));
    109     }
    110   }
    111   sycl_device.deallocate(gpu_out_data3);
    112   sycl_device.deallocate(gpu_in1_data);
    113   sycl_device.deallocate(gpu_in2_data);
    114 }
    115 template<typename DataType, int DataLayout, typename IndexType>
    116 static void test_concatenation_as_lvalue(const Eigen::SyclDevice& sycl_device)
    117 {
    118 
    119   IndexType leftDim1 = 2;
    120   IndexType leftDim2 = 3;
    121   Eigen::array<IndexType, 2> leftRange = {{leftDim1, leftDim2}};
    122 
    123   IndexType rightDim1 = 2;
    124   IndexType rightDim2 = 3;
    125   Eigen::array<IndexType, 2> rightRange = {{rightDim1, rightDim2}};
    126 
    127   IndexType concatDim1 = 4;
    128   IndexType concatDim2 = 3;
    129   Eigen::array<IndexType, 2> resRange = {{concatDim1, concatDim2}};
    130 
    131   Tensor<DataType, 2, DataLayout, IndexType> left(leftRange);
    132   Tensor<DataType, 2, DataLayout, IndexType> right(rightRange);
    133   Tensor<DataType, 2, DataLayout, IndexType> result(resRange);
    134 
    135   left.setRandom();
    136   right.setRandom();
    137   result.setRandom();
    138 
    139   DataType * gpu_in1_data  = static_cast<DataType*>(sycl_device.allocate(left.dimensions().TotalSize()*sizeof(DataType)));
    140   DataType * gpu_in2_data  = static_cast<DataType*>(sycl_device.allocate(right.dimensions().TotalSize()*sizeof(DataType)));
    141   DataType * gpu_out_data =  static_cast<DataType*>(sycl_device.allocate(result.dimensions().TotalSize()*sizeof(DataType)));
    142 
    143 
    144   Eigen::TensorMap<Eigen::Tensor<DataType, 2, DataLayout, IndexType>> gpu_in1(gpu_in1_data, leftRange);
    145   Eigen::TensorMap<Eigen::Tensor<DataType, 2, DataLayout, IndexType>> gpu_in2(gpu_in2_data, rightRange);
    146   Eigen::TensorMap<Eigen::Tensor<DataType, 2, DataLayout, IndexType>> gpu_out(gpu_out_data, resRange);
    147 
    148   sycl_device.memcpyHostToDevice(gpu_in1_data, left.data(),(left.dimensions().TotalSize())*sizeof(DataType));
    149   sycl_device.memcpyHostToDevice(gpu_in2_data, right.data(),(right.dimensions().TotalSize())*sizeof(DataType));
    150   sycl_device.memcpyHostToDevice(gpu_out_data, result.data(),(result.dimensions().TotalSize())*sizeof(DataType));
    151 
    152 //  t1.concatenate(t2, 0) = result;
    153  gpu_in1.concatenate(gpu_in2, 0).device(sycl_device) =gpu_out;
    154  sycl_device.memcpyDeviceToHost(left.data(), gpu_in1_data,(left.dimensions().TotalSize())*sizeof(DataType));
    155  sycl_device.memcpyDeviceToHost(right.data(), gpu_in2_data,(right.dimensions().TotalSize())*sizeof(DataType));
    156 
    157   for (IndexType i = 0; i < 2; ++i) {
    158     for (IndexType j = 0; j < 3; ++j) {
    159       VERIFY_IS_EQUAL(left(i, j), result(i, j));
    160       VERIFY_IS_EQUAL(right(i, j), result(i+2, j));
    161     }
    162   }
    163   sycl_device.deallocate(gpu_in1_data);
    164   sycl_device.deallocate(gpu_in2_data);
    165   sycl_device.deallocate(gpu_out_data);
    166 }
    167 
    168 
    169 template <typename DataType, typename Dev_selector> void tensorConcat_perDevice(Dev_selector s){
    170   QueueInterface queueInterface(s);
    171   auto sycl_device = Eigen::SyclDevice(&queueInterface);
    172   test_simple_concatenation<DataType, RowMajor, int64_t>(sycl_device);
    173   test_simple_concatenation<DataType, ColMajor, int64_t>(sycl_device);
    174   test_concatenation_as_lvalue<DataType, ColMajor, int64_t>(sycl_device);
    175 }
    176 EIGEN_DECLARE_TEST(cxx11_tensor_concatenation_sycl) {
    177   for (const auto& device :Eigen::get_sycl_supported_devices()) {
    178     CALL_SUBTEST(tensorConcat_perDevice<float>(device));
    179   }
    180 }