cart-elc

Source code for CART-ELC
git clone git://git.laack.co/cart-elc.git
Log | Files | Refs | README | LICENSE

cxx11_tensor_complex_cwise_ops_gpu.cu (2871B)


      1 // This file is part of Eigen, a lightweight C++ template library
      2 // for linear algebra.
      3 //
      4 // Copyright (C) 2016 Benoit Steiner <benoit.steiner.goog@gmail.com>
      5 //
      6 // This Source Code Form is subject to the terms of the Mozilla
      7 // Public License v. 2.0. If a copy of the MPL was not distributed
      8 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
      9 
     10 #define EIGEN_TEST_NO_LONGDOUBLE
     11 
     12 #define EIGEN_USE_GPU
     13 
     14 #include "main.h"
     15 #include <unsupported/Eigen/CXX11/Tensor>
     16 
     17 using Eigen::Tensor;
     18 
     19 template<typename T>
     20 void test_cuda_complex_cwise_ops() {
     21   const int kNumItems = 2;
     22   std::size_t complex_bytes = kNumItems * sizeof(std::complex<T>);
     23 
     24   std::complex<T>* d_in1;
     25   std::complex<T>* d_in2;
     26   std::complex<T>* d_out;
     27   cudaMalloc((void**)(&d_in1), complex_bytes);
     28   cudaMalloc((void**)(&d_in2), complex_bytes);
     29   cudaMalloc((void**)(&d_out), complex_bytes);
     30 
     31   Eigen::GpuStreamDevice stream;
     32   Eigen::GpuDevice gpu_device(&stream);
     33 
     34   Eigen::TensorMap<Eigen::Tensor<std::complex<T>, 1, 0, int>, Eigen::Aligned> gpu_in1(
     35       d_in1, kNumItems);
     36   Eigen::TensorMap<Eigen::Tensor<std::complex<T>, 1, 0, int>, Eigen::Aligned> gpu_in2(
     37       d_in2, kNumItems);
     38   Eigen::TensorMap<Eigen::Tensor<std::complex<T>, 1, 0, int>, Eigen::Aligned> gpu_out(
     39       d_out, kNumItems);
     40 
     41   const std::complex<T> a(3.14f, 2.7f);
     42   const std::complex<T> b(-10.6f, 1.4f);
     43 
     44   gpu_in1.device(gpu_device) = gpu_in1.constant(a);
     45   gpu_in2.device(gpu_device) = gpu_in2.constant(b);
     46 
     47   enum CwiseOp {
     48     Add = 0,
     49     Sub,
     50     Mul,
     51     Div,
     52     Neg,
     53     NbOps
     54   };
     55 
     56   Tensor<std::complex<T>, 1, 0, int> actual(kNumItems);
     57   for (int op = Add; op < NbOps; op++) {
     58     std::complex<T> expected;
     59     switch (static_cast<CwiseOp>(op)) {
     60       case Add:
     61         gpu_out.device(gpu_device) = gpu_in1 + gpu_in2;
     62         expected = a + b;
     63         break;
     64       case Sub:
     65         gpu_out.device(gpu_device) = gpu_in1 - gpu_in2;
     66         expected = a - b;
     67         break;
     68       case Mul:
     69         gpu_out.device(gpu_device) = gpu_in1 * gpu_in2;
     70         expected = a * b;
     71         break;
     72       case Div:
     73         gpu_out.device(gpu_device) = gpu_in1 / gpu_in2;
     74         expected = a / b;
     75         break;
     76       case Neg:
     77         gpu_out.device(gpu_device) = -gpu_in1;
     78         expected = -a;
     79         break;
     80       case NbOps:
     81         break;
     82     }
     83     assert(cudaMemcpyAsync(actual.data(), d_out, complex_bytes, cudaMemcpyDeviceToHost,
     84                            gpu_device.stream()) == cudaSuccess);
     85     assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess);
     86 
     87     for (int i = 0; i < kNumItems; ++i) {
     88       VERIFY_IS_APPROX(actual(i), expected);
     89     }
     90   }
     91 
     92   cudaFree(d_in1);
     93   cudaFree(d_in2);
     94   cudaFree(d_out);
     95 }
     96 
     97 
     98 EIGEN_DECLARE_TEST(test_cxx11_tensor_complex_cwise_ops)
     99 {
    100   CALL_SUBTEST(test_cuda_complex_cwise_ops<float>());
    101   CALL_SUBTEST(test_cuda_complex_cwise_ops<double>());
    102 }