cxx11_tensor_complex_cwise_ops_gpu.cu (2871B)
1 // This file is part of Eigen, a lightweight C++ template library 2 // for linear algebra. 3 // 4 // Copyright (C) 2016 Benoit Steiner <benoit.steiner.goog@gmail.com> 5 // 6 // This Source Code Form is subject to the terms of the Mozilla 7 // Public License v. 2.0. If a copy of the MPL was not distributed 8 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. 9 10 #define EIGEN_TEST_NO_LONGDOUBLE 11 12 #define EIGEN_USE_GPU 13 14 #include "main.h" 15 #include <unsupported/Eigen/CXX11/Tensor> 16 17 using Eigen::Tensor; 18 19 template<typename T> 20 void test_cuda_complex_cwise_ops() { 21 const int kNumItems = 2; 22 std::size_t complex_bytes = kNumItems * sizeof(std::complex<T>); 23 24 std::complex<T>* d_in1; 25 std::complex<T>* d_in2; 26 std::complex<T>* d_out; 27 cudaMalloc((void**)(&d_in1), complex_bytes); 28 cudaMalloc((void**)(&d_in2), complex_bytes); 29 cudaMalloc((void**)(&d_out), complex_bytes); 30 31 Eigen::GpuStreamDevice stream; 32 Eigen::GpuDevice gpu_device(&stream); 33 34 Eigen::TensorMap<Eigen::Tensor<std::complex<T>, 1, 0, int>, Eigen::Aligned> gpu_in1( 35 d_in1, kNumItems); 36 Eigen::TensorMap<Eigen::Tensor<std::complex<T>, 1, 0, int>, Eigen::Aligned> gpu_in2( 37 d_in2, kNumItems); 38 Eigen::TensorMap<Eigen::Tensor<std::complex<T>, 1, 0, int>, Eigen::Aligned> gpu_out( 39 d_out, kNumItems); 40 41 const std::complex<T> a(3.14f, 2.7f); 42 const std::complex<T> b(-10.6f, 1.4f); 43 44 gpu_in1.device(gpu_device) = gpu_in1.constant(a); 45 gpu_in2.device(gpu_device) = gpu_in2.constant(b); 46 47 enum CwiseOp { 48 Add = 0, 49 Sub, 50 Mul, 51 Div, 52 Neg, 53 NbOps 54 }; 55 56 Tensor<std::complex<T>, 1, 0, int> actual(kNumItems); 57 for (int op = Add; op < NbOps; op++) { 58 std::complex<T> expected; 59 switch (static_cast<CwiseOp>(op)) { 60 case Add: 61 gpu_out.device(gpu_device) = gpu_in1 + gpu_in2; 62 expected = a + b; 63 break; 64 case Sub: 65 gpu_out.device(gpu_device) = gpu_in1 - gpu_in2; 66 expected = a - b; 67 break; 68 case Mul: 69 gpu_out.device(gpu_device) = gpu_in1 * gpu_in2; 70 expected = a * b; 71 break; 72 case Div: 73 gpu_out.device(gpu_device) = gpu_in1 / gpu_in2; 74 expected = a / b; 75 break; 76 case Neg: 77 gpu_out.device(gpu_device) = -gpu_in1; 78 expected = -a; 79 break; 80 case NbOps: 81 break; 82 } 83 assert(cudaMemcpyAsync(actual.data(), d_out, complex_bytes, cudaMemcpyDeviceToHost, 84 gpu_device.stream()) == cudaSuccess); 85 assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess); 86 87 for (int i = 0; i < kNumItems; ++i) { 88 VERIFY_IS_APPROX(actual(i), expected); 89 } 90 } 91 92 cudaFree(d_in1); 93 cudaFree(d_in2); 94 cudaFree(d_out); 95 } 96 97 98 EIGEN_DECLARE_TEST(test_cxx11_tensor_complex_cwise_ops) 99 { 100 CALL_SUBTEST(test_cuda_complex_cwise_ops<float>()); 101 CALL_SUBTEST(test_cuda_complex_cwise_ops<double>()); 102 }