cxx11_tensor_image_patch_sycl.cpp (62111B)
1 // This file is part of Eigen, a lightweight C++ template library 2 // for linear algebra. 3 // 4 // Copyright (C) 2016 5 // Mehdi Goli Codeplay Software Ltd. 6 // Ralph Potter Codeplay Software Ltd. 7 // Luke Iwanski Codeplay Software Ltd. 8 // Contact: <eigen@codeplay.com> 9 // 10 // This Source Code Form is subject to the terms of the Mozilla 11 // Public License v. 2.0. If a copy of the MPL was not distributed 12 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. 13 14 #define EIGEN_TEST_NO_LONGDOUBLE 15 #define EIGEN_TEST_NO_COMPLEX 16 17 #define EIGEN_DEFAULT_DENSE_INDEX_TYPE int64_t 18 #define EIGEN_USE_SYCL 19 20 #include "main.h" 21 #include <unsupported/Eigen/CXX11/Tensor> 22 23 using Eigen::Tensor; 24 static const int DataLayout = ColMajor; 25 26 template <typename DataType, typename IndexType> 27 static void test_simple_image_patch_sycl(const Eigen::SyclDevice& sycl_device) 28 { 29 IndexType sizeDim1 = 2; 30 IndexType sizeDim2 = 3; 31 IndexType sizeDim3 = 5; 32 IndexType sizeDim4 = 7; 33 array<IndexType, 4> tensorColMajorRange = {{sizeDim1, sizeDim2, sizeDim3, sizeDim4}}; 34 array<IndexType, 4> tensorRowMajorRange = {{sizeDim4, sizeDim3, sizeDim2, sizeDim1}}; 35 Tensor<DataType, 4, DataLayout,IndexType> tensor_col_major(tensorColMajorRange); 36 Tensor<DataType, 4, RowMajor,IndexType> tensor_row_major(tensorRowMajorRange); 37 tensor_col_major.setRandom(); 38 39 DataType* gpu_data_col_major = static_cast<DataType*>(sycl_device.allocate(tensor_col_major.size()*sizeof(DataType))); 40 DataType* gpu_data_row_major = static_cast<DataType*>(sycl_device.allocate(tensor_row_major.size()*sizeof(DataType))); 41 TensorMap<Tensor<DataType, 4, ColMajor, IndexType>> gpu_col_major(gpu_data_col_major, tensorColMajorRange); 42 TensorMap<Tensor<DataType, 4, RowMajor, IndexType>> gpu_row_major(gpu_data_row_major, tensorRowMajorRange); 43 44 sycl_device.memcpyHostToDevice(gpu_data_col_major, tensor_col_major.data(),(tensor_col_major.size())*sizeof(DataType)); 45 gpu_row_major.device(sycl_device)=gpu_col_major.swap_layout(); 46 sycl_device.memcpyDeviceToHost(tensor_row_major.data(), gpu_data_row_major, (tensor_col_major.size())*sizeof(DataType)); 47 48 VERIFY_IS_EQUAL(tensor_col_major.dimension(0), tensor_row_major.dimension(3)); 49 VERIFY_IS_EQUAL(tensor_col_major.dimension(1), tensor_row_major.dimension(2)); 50 VERIFY_IS_EQUAL(tensor_col_major.dimension(2), tensor_row_major.dimension(1)); 51 VERIFY_IS_EQUAL(tensor_col_major.dimension(3), tensor_row_major.dimension(0)); 52 53 // Single pixel patch: ColMajor 54 array<IndexType, 5> patchColMajorTensorRange={{sizeDim1, 1, 1, sizeDim2*sizeDim3, sizeDim4}}; 55 Tensor<DataType, 5, DataLayout,IndexType> single_patch_col_major(patchColMajorTensorRange); 56 size_t patchTensorBuffSize =single_patch_col_major.size()*sizeof(DataType); 57 DataType* gpu_data_single_patch_col_major = static_cast<DataType*>(sycl_device.allocate(patchTensorBuffSize)); 58 TensorMap<Tensor<DataType, 5, DataLayout,IndexType>> gpu_single_patch_col_major(gpu_data_single_patch_col_major, patchColMajorTensorRange); 59 gpu_single_patch_col_major.device(sycl_device)=gpu_col_major.extract_image_patches(1, 1); 60 sycl_device.memcpyDeviceToHost(single_patch_col_major.data(), gpu_data_single_patch_col_major, patchTensorBuffSize); 61 62 VERIFY_IS_EQUAL(single_patch_col_major.dimension(0), 2); 63 VERIFY_IS_EQUAL(single_patch_col_major.dimension(1), 1); 64 VERIFY_IS_EQUAL(single_patch_col_major.dimension(2), 1); 65 VERIFY_IS_EQUAL(single_patch_col_major.dimension(3), 3*5); 66 VERIFY_IS_EQUAL(single_patch_col_major.dimension(4), 7); 67 68 // Single pixel patch: RowMajor 69 array<IndexType, 5> patchRowMajorTensorRange={{sizeDim4, sizeDim2*sizeDim3, 1, 1, sizeDim1}}; 70 Tensor<DataType, 5, RowMajor,IndexType> single_patch_row_major(patchRowMajorTensorRange); 71 patchTensorBuffSize =single_patch_row_major.size()*sizeof(DataType); 72 DataType* gpu_data_single_patch_row_major = static_cast<DataType*>(sycl_device.allocate(patchTensorBuffSize)); 73 TensorMap<Tensor<DataType, 5, RowMajor,IndexType>> gpu_single_patch_row_major(gpu_data_single_patch_row_major, patchRowMajorTensorRange); 74 gpu_single_patch_row_major.device(sycl_device)=gpu_row_major.extract_image_patches(1, 1); 75 sycl_device.memcpyDeviceToHost(single_patch_row_major.data(), gpu_data_single_patch_row_major, patchTensorBuffSize); 76 77 VERIFY_IS_EQUAL(single_patch_row_major.dimension(0), 7); 78 VERIFY_IS_EQUAL(single_patch_row_major.dimension(1), 3*5); 79 VERIFY_IS_EQUAL(single_patch_row_major.dimension(2), 1); 80 VERIFY_IS_EQUAL(single_patch_row_major.dimension(3), 1); 81 VERIFY_IS_EQUAL(single_patch_row_major.dimension(4), 2); 82 83 for (IndexType i = 0; i < tensor_col_major.size(); ++i) { 84 // ColMajor 85 if (tensor_col_major.data()[i] != single_patch_col_major.data()[i]) { 86 std::cout << "Mismatch detected at index colmajor " << i << " : " 87 << tensor_col_major.data()[i] << " vs " << single_patch_col_major.data()[i] 88 << std::endl; 89 } 90 VERIFY_IS_EQUAL(single_patch_col_major.data()[i], tensor_col_major.data()[i]); 91 // RowMajor 92 if (tensor_row_major.data()[i] != single_patch_row_major.data()[i]) { 93 std::cout << "Mismatch detected at index row major" << i << " : " 94 << tensor_row_major.data()[i] << " vs " 95 << single_patch_row_major.data()[i] << std::endl; 96 } 97 VERIFY_IS_EQUAL(single_patch_row_major.data()[i], 98 tensor_row_major.data()[i]); 99 VERIFY_IS_EQUAL(tensor_col_major.data()[i], tensor_row_major.data()[i]); 100 VERIFY_IS_EQUAL(single_patch_col_major.data()[i], 101 single_patch_row_major.data()[i]); 102 } 103 104 105 // Entire image patch: ColMajor 106 patchColMajorTensorRange={{sizeDim1, sizeDim2, sizeDim3, sizeDim2*sizeDim3, sizeDim4}}; 107 Tensor<DataType, 5, DataLayout,IndexType> entire_image_patch_col_major(patchColMajorTensorRange); 108 patchTensorBuffSize =entire_image_patch_col_major.size()*sizeof(DataType); 109 DataType* gpu_data_entire_image_patch_col_major = static_cast<DataType*>(sycl_device.allocate(patchTensorBuffSize)); 110 TensorMap<Tensor<DataType, 5, DataLayout,IndexType>> gpu_entire_image_patch_col_major(gpu_data_entire_image_patch_col_major, patchColMajorTensorRange); 111 gpu_entire_image_patch_col_major.device(sycl_device)=gpu_col_major.extract_image_patches(3, 5); 112 sycl_device.memcpyDeviceToHost(entire_image_patch_col_major.data(), gpu_data_entire_image_patch_col_major, patchTensorBuffSize); 113 114 VERIFY_IS_EQUAL(entire_image_patch_col_major.dimension(0), 2); 115 VERIFY_IS_EQUAL(entire_image_patch_col_major.dimension(1), 3); 116 VERIFY_IS_EQUAL(entire_image_patch_col_major.dimension(2), 5); 117 VERIFY_IS_EQUAL(entire_image_patch_col_major.dimension(3), 3*5); 118 VERIFY_IS_EQUAL(entire_image_patch_col_major.dimension(4), 7); 119 120 // Entire image patch: RowMajor 121 patchRowMajorTensorRange={{sizeDim4, sizeDim2*sizeDim3, sizeDim3, sizeDim2, sizeDim1}}; 122 Tensor<DataType, 5, RowMajor,IndexType> entire_image_patch_row_major(patchRowMajorTensorRange); 123 patchTensorBuffSize =entire_image_patch_row_major.size()*sizeof(DataType); 124 DataType* gpu_data_entire_image_patch_row_major = static_cast<DataType*>(sycl_device.allocate(patchTensorBuffSize)); 125 TensorMap<Tensor<DataType, 5, RowMajor,IndexType>> gpu_entire_image_patch_row_major(gpu_data_entire_image_patch_row_major, patchRowMajorTensorRange); 126 gpu_entire_image_patch_row_major.device(sycl_device)=gpu_row_major.extract_image_patches(3, 5); 127 sycl_device.memcpyDeviceToHost(entire_image_patch_row_major.data(), gpu_data_entire_image_patch_row_major, patchTensorBuffSize); 128 129 VERIFY_IS_EQUAL(entire_image_patch_row_major.dimension(0), 7); 130 VERIFY_IS_EQUAL(entire_image_patch_row_major.dimension(1), 3*5); 131 VERIFY_IS_EQUAL(entire_image_patch_row_major.dimension(2), 5); 132 VERIFY_IS_EQUAL(entire_image_patch_row_major.dimension(3), 3); 133 VERIFY_IS_EQUAL(entire_image_patch_row_major.dimension(4), 2); 134 135 for (IndexType i = 0; i < 3; ++i) { 136 for (IndexType j = 0; j < 5; ++j) { 137 IndexType patchId = i+3*j; 138 for (IndexType r = 0; r < 3; ++r) { 139 for (IndexType c = 0; c < 5; ++c) { 140 for (IndexType d = 0; d < 2; ++d) { 141 for (IndexType b = 0; b < 7; ++b) { 142 DataType expected_col_major = 0.0f; 143 DataType expected_row_major = 0.0f; 144 if (r-1+i >= 0 && c-2+j >= 0 && r-1+i < 3 && c-2+j < 5) { 145 expected_col_major = tensor_col_major(d, r-1+i, c-2+j, b); 146 expected_row_major = tensor_row_major(b, c-2+j, r-1+i, d); 147 } 148 // ColMajor 149 if (entire_image_patch_col_major(d, r, c, patchId, b) != expected_col_major) { 150 std::cout << "Mismatch detected at index i=" << i << " j=" << j << " r=" << r << " c=" << c << " d=" << d << " b=" << b << std::endl; 151 } 152 VERIFY_IS_EQUAL(entire_image_patch_col_major(d, r, c, patchId, b), expected_col_major); 153 // RowMajor 154 if (entire_image_patch_row_major(b, patchId, c, r, d) != 155 expected_row_major) { 156 std::cout << "Mismatch detected at index i=" << i << " j=" << j 157 << " r=" << r << " c=" << c << " d=" << d << " b=" << b 158 << std::endl; 159 } 160 VERIFY_IS_EQUAL(entire_image_patch_row_major(b, patchId, c, r, d), 161 expected_row_major); 162 // Check that ColMajor and RowMajor agree. 163 VERIFY_IS_EQUAL(expected_col_major, expected_row_major); 164 } 165 } 166 } 167 } 168 } 169 } 170 171 // 2D patch: ColMajor 172 patchColMajorTensorRange={{sizeDim1, 2, 2, sizeDim2*sizeDim3, sizeDim4}}; 173 Tensor<DataType, 5, DataLayout,IndexType> twod_patch_col_major(patchColMajorTensorRange); 174 patchTensorBuffSize =twod_patch_col_major.size()*sizeof(DataType); 175 DataType* gpu_data_twod_patch_col_major = static_cast<DataType*>(sycl_device.allocate(patchTensorBuffSize)); 176 TensorMap<Tensor<DataType, 5, DataLayout,IndexType>> gpu_twod_patch_col_major(gpu_data_twod_patch_col_major, patchColMajorTensorRange); 177 gpu_twod_patch_col_major.device(sycl_device)=gpu_col_major.extract_image_patches(2, 2); 178 sycl_device.memcpyDeviceToHost(twod_patch_col_major.data(), gpu_data_twod_patch_col_major, patchTensorBuffSize); 179 180 VERIFY_IS_EQUAL(twod_patch_col_major.dimension(0), 2); 181 VERIFY_IS_EQUAL(twod_patch_col_major.dimension(1), 2); 182 VERIFY_IS_EQUAL(twod_patch_col_major.dimension(2), 2); 183 VERIFY_IS_EQUAL(twod_patch_col_major.dimension(3), 3*5); 184 VERIFY_IS_EQUAL(twod_patch_col_major.dimension(4), 7); 185 186 // 2D patch: RowMajor 187 patchRowMajorTensorRange={{sizeDim4, sizeDim2*sizeDim3, 2, 2, sizeDim1}}; 188 Tensor<DataType, 5, RowMajor,IndexType> twod_patch_row_major(patchRowMajorTensorRange); 189 patchTensorBuffSize =twod_patch_row_major.size()*sizeof(DataType); 190 DataType* gpu_data_twod_patch_row_major = static_cast<DataType*>(sycl_device.allocate(patchTensorBuffSize)); 191 TensorMap<Tensor<DataType, 5, RowMajor,IndexType>> gpu_twod_patch_row_major(gpu_data_twod_patch_row_major, patchRowMajorTensorRange); 192 gpu_twod_patch_row_major.device(sycl_device)=gpu_row_major.extract_image_patches(2, 2); 193 sycl_device.memcpyDeviceToHost(twod_patch_row_major.data(), gpu_data_twod_patch_row_major, patchTensorBuffSize); 194 195 VERIFY_IS_EQUAL(twod_patch_row_major.dimension(0), 7); 196 VERIFY_IS_EQUAL(twod_patch_row_major.dimension(1), 3*5); 197 VERIFY_IS_EQUAL(twod_patch_row_major.dimension(2), 2); 198 VERIFY_IS_EQUAL(twod_patch_row_major.dimension(3), 2); 199 VERIFY_IS_EQUAL(twod_patch_row_major.dimension(4), 2); 200 201 202 // Based on the calculation described in TensorTraits.h, padding happens to be 0. 203 IndexType row_padding = 0; 204 IndexType col_padding = 0; 205 IndexType stride = 1; 206 207 for (IndexType i = 0; i < 3; ++i) { 208 for (IndexType j = 0; j < 5; ++j) { 209 IndexType patchId = i+3*j; 210 for (IndexType r = 0; r < 2; ++r) { 211 for (IndexType c = 0; c < 2; ++c) { 212 for (IndexType d = 0; d < 2; ++d) { 213 for (IndexType b = 0; b < 7; ++b) { 214 DataType expected_col_major = 0.0f; 215 DataType expected_row_major = 0.0f; 216 IndexType row_offset = r*stride + i - row_padding; 217 IndexType col_offset = c*stride + j - col_padding; 218 // ColMajor 219 if (row_offset >= 0 && col_offset >= 0 && row_offset < tensor_col_major.dimension(1) && col_offset < tensor_col_major.dimension(2)) { 220 expected_col_major = tensor_col_major(d, row_offset, col_offset, b); 221 } 222 if (twod_patch_col_major(d, r, c, patchId, b) != expected_col_major) { 223 std::cout << "Mismatch detected at index i=" << i << " j=" << j << " r=" << r << " c=" << c << " d=" << d << " b=" << b << std::endl; 224 } 225 VERIFY_IS_EQUAL(twod_patch_col_major(d, r, c, patchId, b), expected_col_major); 226 227 // RowMajor 228 if (row_offset >= 0 && col_offset >= 0 && row_offset < tensor_row_major.dimension(2) && col_offset < tensor_row_major.dimension(1)) { 229 expected_row_major = tensor_row_major(b, col_offset, row_offset, d); 230 231 } 232 if (twod_patch_row_major(b, patchId, c, r, d) != expected_row_major) { 233 std::cout << "Mismatch detected at index i=" << i << " j=" << j << " r=" << r << " c=" << c << " d=" << d << " b=" << b << std::endl; 234 } 235 VERIFY_IS_EQUAL(twod_patch_row_major(b, patchId, c, r, d), expected_row_major); 236 // Check that ColMajor and RowMajor agree. 237 VERIFY_IS_EQUAL(expected_col_major, expected_row_major); 238 } 239 } 240 } 241 } 242 } 243 } 244 245 sycl_device.deallocate(gpu_data_col_major); 246 sycl_device.deallocate(gpu_data_row_major); 247 sycl_device.deallocate(gpu_data_single_patch_col_major); 248 sycl_device.deallocate(gpu_data_single_patch_row_major); 249 sycl_device.deallocate(gpu_data_entire_image_patch_col_major); 250 sycl_device.deallocate(gpu_data_entire_image_patch_row_major); 251 sycl_device.deallocate(gpu_data_twod_patch_col_major); 252 sycl_device.deallocate(gpu_data_twod_patch_row_major); 253 254 } 255 256 257 // Verifies VALID padding (no padding) with incrementing values. 258 template <typename DataType, typename IndexType> 259 static void test_patch_padding_valid_sycl(const Eigen::SyclDevice& sycl_device){ 260 IndexType input_depth = 3; 261 IndexType input_rows = 3; 262 IndexType input_cols = 3; 263 IndexType input_batches = 1; 264 IndexType ksize = 2; // Corresponds to the Rows and Cols for tensor.extract_image_patches<>. 265 IndexType stride = 2; // Only same stride is supported. 266 267 array<IndexType, 4> tensorColMajorRange = {{input_depth, input_rows, input_cols, input_batches}}; 268 array<IndexType, 4> tensorRowMajorRange = {{input_batches, input_cols, input_rows, input_depth}}; 269 Tensor<DataType, 4, DataLayout,IndexType> tensor_col_major(tensorColMajorRange); 270 Tensor<DataType, 4, RowMajor,IndexType> tensor_row_major(tensorRowMajorRange); 271 272 DataType* gpu_data_col_major = static_cast<DataType*>(sycl_device.allocate(tensor_col_major.size()*sizeof(DataType))); 273 DataType* gpu_data_row_major = static_cast<DataType*>(sycl_device.allocate(tensor_row_major.size()*sizeof(DataType))); 274 TensorMap<Tensor<DataType, 4, ColMajor, IndexType>> gpu_col_major(gpu_data_col_major, tensorColMajorRange); 275 TensorMap<Tensor<DataType, 4, RowMajor, IndexType>> gpu_row_major(gpu_data_row_major, tensorRowMajorRange); 276 277 sycl_device.memcpyHostToDevice(gpu_data_col_major, tensor_col_major.data(),(tensor_col_major.size())*sizeof(DataType)); 278 gpu_row_major.device(sycl_device)=gpu_col_major.swap_layout(); 279 sycl_device.memcpyDeviceToHost(tensor_row_major.data(), gpu_data_row_major, (tensor_col_major.size())*sizeof(DataType)); 280 281 VERIFY_IS_EQUAL(tensor_col_major.dimension(0), tensor_row_major.dimension(3)); 282 VERIFY_IS_EQUAL(tensor_col_major.dimension(1), tensor_row_major.dimension(2)); 283 VERIFY_IS_EQUAL(tensor_col_major.dimension(2), tensor_row_major.dimension(1)); 284 VERIFY_IS_EQUAL(tensor_col_major.dimension(3), tensor_row_major.dimension(0)); 285 286 // Initializes tensor with incrementing numbers. 287 for (IndexType i = 0; i < tensor_col_major.size(); ++i) { 288 tensor_col_major.data()[i] = i + 1; 289 } 290 // ColMajor 291 array<IndexType, 5> patchColMajorTensorRange={{input_depth, ksize, ksize, 1, input_batches}}; 292 Tensor<DataType, 5, DataLayout,IndexType> result_col_major(patchColMajorTensorRange); 293 size_t patchTensorBuffSize =result_col_major.size()*sizeof(DataType); 294 DataType* gpu_data_result_col_major = static_cast<DataType*>(sycl_device.allocate(patchTensorBuffSize)); 295 TensorMap<Tensor<DataType, 5, DataLayout,IndexType>> gpu_result_col_major(gpu_data_result_col_major, patchColMajorTensorRange); 296 gpu_result_col_major.device(sycl_device)=gpu_col_major.extract_image_patches(ksize, ksize, stride, stride, 1, 1, PADDING_VALID); 297 sycl_device.memcpyDeviceToHost(result_col_major.data(), gpu_data_result_col_major, patchTensorBuffSize); 298 299 VERIFY_IS_EQUAL(result_col_major.dimension(0), input_depth); // depth 300 VERIFY_IS_EQUAL(result_col_major.dimension(1), ksize); // kernel rows 301 VERIFY_IS_EQUAL(result_col_major.dimension(2), ksize); // kernel cols 302 VERIFY_IS_EQUAL(result_col_major.dimension(3), 1); // number of patches 303 VERIFY_IS_EQUAL(result_col_major.dimension(4), input_batches); // number of batches 304 305 // RowMajor 306 array<IndexType, 5> patchRowMajorTensorRange={{input_batches, 1, ksize, ksize, input_depth }}; 307 Tensor<DataType, 5, RowMajor,IndexType> result_row_major(patchRowMajorTensorRange); 308 patchTensorBuffSize =result_row_major.size()*sizeof(DataType); 309 DataType* gpu_data_result_row_major = static_cast<DataType*>(sycl_device.allocate(patchTensorBuffSize)); 310 TensorMap<Tensor<DataType, 5, RowMajor,IndexType>> gpu_result_row_major(gpu_data_result_row_major, patchRowMajorTensorRange); 311 gpu_result_row_major.device(sycl_device)=gpu_row_major.extract_image_patches(ksize, ksize, stride, stride, 1, 1, PADDING_VALID); 312 sycl_device.memcpyDeviceToHost(result_row_major.data(), gpu_data_result_row_major, patchTensorBuffSize); 313 314 VERIFY_IS_EQUAL(result_col_major.dimension(0), result_row_major.dimension(4)); 315 VERIFY_IS_EQUAL(result_col_major.dimension(1), result_row_major.dimension(3)); 316 VERIFY_IS_EQUAL(result_col_major.dimension(2), result_row_major.dimension(2)); 317 VERIFY_IS_EQUAL(result_col_major.dimension(3), result_row_major.dimension(1)); 318 VERIFY_IS_EQUAL(result_col_major.dimension(4), result_row_major.dimension(0)); 319 320 // No padding is carried out. 321 IndexType row_padding = 0; 322 IndexType col_padding = 0; 323 324 for (IndexType i = 0; (i+stride+ksize-1) < input_rows; i += stride) { // input rows 325 for (IndexType j = 0; (j+stride+ksize-1) < input_cols; j += stride) { // input cols 326 IndexType patchId = i+input_rows*j; 327 for (IndexType r = 0; r < ksize; ++r) { // patch rows 328 for (IndexType c = 0; c < ksize; ++c) { // patch cols 329 for (IndexType d = 0; d < input_depth; ++d) { // depth 330 for (IndexType b = 0; b < input_batches; ++b) { // batch 331 DataType expected_col_major = 0.0f; 332 DataType expected_row_major = 0.0f; 333 IndexType row_offset = r + i - row_padding; 334 IndexType col_offset = c + j - col_padding; 335 if (row_offset >= 0 && col_offset >= 0 && row_offset < input_rows && col_offset < input_cols) { 336 expected_col_major = tensor_col_major(d, row_offset, col_offset, b); 337 expected_row_major = tensor_row_major(b, col_offset, row_offset, d); 338 } 339 // ColMajor 340 if (result_col_major(d, r, c, patchId, b) != expected_col_major) { 341 std::cout << "Mismatch detected at index i=" << i << " j=" << j << " r=" << r << " c=" << c << " d=" << d << " b=" << b << std::endl; 342 } 343 VERIFY_IS_EQUAL(result_col_major(d, r, c, patchId, b), expected_col_major); 344 // RowMajor 345 if (result_row_major(b, patchId, c, r, d) != expected_row_major) { 346 std::cout << "Mismatch detected at index i=" << i << " j=" << j << " r=" << r << " c=" << c << " d=" << d << " b=" << b << std::endl; 347 } 348 VERIFY_IS_EQUAL(result_row_major(b, patchId, c, r, d), expected_row_major); 349 // Check that ColMajor and RowMajor agree. 350 VERIFY_IS_EQUAL(expected_col_major, expected_row_major); 351 } 352 } 353 } 354 } 355 } 356 } 357 sycl_device.deallocate(gpu_data_col_major); 358 sycl_device.deallocate(gpu_data_row_major); 359 sycl_device.deallocate(gpu_data_result_col_major); 360 sycl_device.deallocate(gpu_data_result_row_major); 361 } 362 363 // Verifies VALID padding (no padding) with the same value. 364 template <typename DataType, typename IndexType> 365 static void test_patch_padding_valid_same_value_sycl(const Eigen::SyclDevice& sycl_device){ 366 IndexType input_depth = 1; 367 IndexType input_rows = 5; 368 IndexType input_cols = 5; 369 IndexType input_batches = 2; 370 IndexType ksize = 3; // Corresponds to the Rows and Cols for tensor.extract_image_patches<>. 371 IndexType stride = 2; // Only same stride is supported. 372 // ColMajor 373 374 array<IndexType, 4> tensorColMajorRange = {{input_depth, input_rows, input_cols, input_batches}}; 375 array<IndexType, 4> tensorRowMajorRange = {{input_batches, input_cols, input_rows, input_depth}}; 376 Tensor<DataType, 4, DataLayout,IndexType> tensor_col_major(tensorColMajorRange); 377 Tensor<DataType, 4, RowMajor,IndexType> tensor_row_major(tensorRowMajorRange); 378 379 DataType* gpu_data_col_major = static_cast<DataType*>(sycl_device.allocate(tensor_col_major.size()*sizeof(DataType))); 380 DataType* gpu_data_row_major = static_cast<DataType*>(sycl_device.allocate(tensor_row_major.size()*sizeof(DataType))); 381 TensorMap<Tensor<DataType, 4, ColMajor, IndexType>> gpu_col_major(gpu_data_col_major, tensorColMajorRange); 382 TensorMap<Tensor<DataType, 4, RowMajor, IndexType>> gpu_row_major(gpu_data_row_major, tensorRowMajorRange); 383 gpu_col_major.device(sycl_device)=gpu_col_major.constant(11.0f); 384 gpu_row_major.device(sycl_device)=gpu_col_major.swap_layout(); 385 sycl_device.memcpyDeviceToHost(tensor_col_major.data(), gpu_data_col_major, (tensor_col_major.size())*sizeof(DataType)); 386 sycl_device.memcpyDeviceToHost(tensor_row_major.data(), gpu_data_row_major, (tensor_row_major.size())*sizeof(DataType)); 387 VERIFY_IS_EQUAL(tensor_col_major.dimension(0), tensor_row_major.dimension(3)); 388 VERIFY_IS_EQUAL(tensor_col_major.dimension(1), tensor_row_major.dimension(2)); 389 VERIFY_IS_EQUAL(tensor_col_major.dimension(2), tensor_row_major.dimension(1)); 390 VERIFY_IS_EQUAL(tensor_col_major.dimension(3), tensor_row_major.dimension(0)); 391 392 array<IndexType, 5> patchColMajorTensorRange={{input_depth, ksize, ksize, 4, input_batches}}; 393 Tensor<DataType, 5, DataLayout,IndexType> result_col_major(patchColMajorTensorRange); 394 size_t patchTensorBuffSize =result_col_major.size()*sizeof(DataType); 395 DataType* gpu_data_result_col_major = static_cast<DataType*>(sycl_device.allocate(patchTensorBuffSize)); 396 TensorMap<Tensor<DataType, 5, DataLayout,IndexType>> gpu_result_col_major(gpu_data_result_col_major, patchColMajorTensorRange); 397 gpu_result_col_major.device(sycl_device)=gpu_col_major.extract_image_patches(ksize, ksize, stride, stride, 1, 1, PADDING_VALID); 398 sycl_device.memcpyDeviceToHost(result_col_major.data(), gpu_data_result_col_major, patchTensorBuffSize); 399 400 VERIFY_IS_EQUAL(result_col_major.dimension(0), input_depth); // depth 401 VERIFY_IS_EQUAL(result_col_major.dimension(1), ksize); // kernel rows 402 VERIFY_IS_EQUAL(result_col_major.dimension(2), ksize); // kernel cols 403 VERIFY_IS_EQUAL(result_col_major.dimension(3), 4); // number of patches 404 VERIFY_IS_EQUAL(result_col_major.dimension(4), input_batches); // number of batches 405 406 // RowMajor 407 array<IndexType, 5> patchRowMajorTensorRange={{input_batches, 4, ksize, ksize, input_depth }}; 408 Tensor<DataType, 5, RowMajor,IndexType> result_row_major(patchRowMajorTensorRange); 409 patchTensorBuffSize =result_row_major.size()*sizeof(DataType); 410 DataType* gpu_data_result_row_major = static_cast<DataType*>(sycl_device.allocate(patchTensorBuffSize)); 411 TensorMap<Tensor<DataType, 5, RowMajor,IndexType>> gpu_result_row_major(gpu_data_result_row_major, patchRowMajorTensorRange); 412 gpu_result_row_major.device(sycl_device)=gpu_row_major.extract_image_patches(ksize, ksize, stride, stride, 1, 1, PADDING_VALID); 413 sycl_device.memcpyDeviceToHost(result_row_major.data(), gpu_data_result_row_major, patchTensorBuffSize); 414 415 VERIFY_IS_EQUAL(result_col_major.dimension(0), result_row_major.dimension(4)); 416 VERIFY_IS_EQUAL(result_col_major.dimension(1), result_row_major.dimension(3)); 417 VERIFY_IS_EQUAL(result_col_major.dimension(2), result_row_major.dimension(2)); 418 VERIFY_IS_EQUAL(result_col_major.dimension(3), result_row_major.dimension(1)); 419 VERIFY_IS_EQUAL(result_col_major.dimension(4), result_row_major.dimension(0)); 420 421 // No padding is carried out. 422 IndexType row_padding = 0; 423 IndexType col_padding = 0; 424 425 for (IndexType i = 0; (i+stride+ksize-1) <= input_rows; i += stride) { // input rows 426 for (IndexType j = 0; (j+stride+ksize-1) <= input_cols; j += stride) { // input cols 427 IndexType patchId = i+input_rows*j; 428 for (IndexType r = 0; r < ksize; ++r) { // patch rows 429 for (IndexType c = 0; c < ksize; ++c) { // patch cols 430 for (IndexType d = 0; d < input_depth; ++d) { // depth 431 for (IndexType b = 0; b < input_batches; ++b) { // batch 432 DataType expected_col_major = 0.0f; 433 DataType expected_row_major = 0.0f; 434 IndexType row_offset = r + i - row_padding; 435 IndexType col_offset = c + j - col_padding; 436 if (row_offset >= 0 && col_offset >= 0 && row_offset < input_rows && col_offset < input_cols) { 437 expected_col_major = tensor_col_major(d, row_offset, col_offset, b); 438 expected_row_major = tensor_row_major(b, col_offset, row_offset, d); 439 } 440 // ColMajor 441 if (result_col_major(d, r, c, patchId, b) != expected_col_major) { 442 std::cout << "Mismatch detected at index i=" << i << " j=" << j << " r=" << r << " c=" << c << " d=" << d << " b=" << b << std::endl; 443 } 444 VERIFY_IS_EQUAL(result_col_major(d, r, c, patchId, b), expected_col_major); 445 // RowMajor 446 if (result_row_major(b, patchId, c, r, d) != expected_row_major) { 447 std::cout << "Mismatch detected at index i=" << i << " j=" << j << " r=" << r << " c=" << c << " d=" << d << " b=" << b << std::endl; 448 } 449 VERIFY_IS_EQUAL(result_row_major(b, patchId, c, r, d), expected_row_major); 450 // Check that ColMajor and RowMajor agree. 451 VERIFY_IS_EQUAL(expected_col_major, expected_row_major); 452 } 453 } 454 } 455 } 456 } 457 } 458 } 459 460 // Verifies SAME padding. 461 template <typename DataType, typename IndexType> 462 static void test_patch_padding_same_sycl(const Eigen::SyclDevice& sycl_device){ 463 IndexType input_depth = 3; 464 IndexType input_rows = 4; 465 IndexType input_cols = 2; 466 IndexType input_batches = 1; 467 IndexType ksize = 2; // Corresponds to the Rows and Cols for tensor.extract_image_patches<>. 468 IndexType stride = 2; // Only same stride is supported. 469 470 // ColMajor 471 array<IndexType, 4> tensorColMajorRange = {{input_depth, input_rows, input_cols, input_batches}}; 472 array<IndexType, 4> tensorRowMajorRange = {{input_batches, input_cols, input_rows, input_depth}}; 473 Tensor<DataType, 4, DataLayout,IndexType> tensor_col_major(tensorColMajorRange); 474 Tensor<DataType, 4, RowMajor,IndexType> tensor_row_major(tensorRowMajorRange); 475 476 DataType* gpu_data_col_major = static_cast<DataType*>(sycl_device.allocate(tensor_col_major.size()*sizeof(DataType))); 477 DataType* gpu_data_row_major = static_cast<DataType*>(sycl_device.allocate(tensor_row_major.size()*sizeof(DataType))); 478 TensorMap<Tensor<DataType, 4, ColMajor, IndexType>> gpu_col_major(gpu_data_col_major, tensorColMajorRange); 479 TensorMap<Tensor<DataType, 4, RowMajor, IndexType>> gpu_row_major(gpu_data_row_major, tensorRowMajorRange); 480 481 sycl_device.memcpyHostToDevice(gpu_data_col_major, tensor_col_major.data(),(tensor_col_major.size())*sizeof(DataType)); 482 gpu_row_major.device(sycl_device)=gpu_col_major.swap_layout(); 483 sycl_device.memcpyDeviceToHost(tensor_row_major.data(), gpu_data_row_major, (tensor_col_major.size())*sizeof(DataType)); 484 485 VERIFY_IS_EQUAL(tensor_col_major.dimension(0), tensor_row_major.dimension(3)); 486 VERIFY_IS_EQUAL(tensor_col_major.dimension(1), tensor_row_major.dimension(2)); 487 VERIFY_IS_EQUAL(tensor_col_major.dimension(2), tensor_row_major.dimension(1)); 488 VERIFY_IS_EQUAL(tensor_col_major.dimension(3), tensor_row_major.dimension(0)); 489 490 // Initializes tensor with incrementing numbers. 491 for (IndexType i = 0; i < tensor_col_major.size(); ++i) { 492 tensor_col_major.data()[i] = i + 1; 493 } 494 495 array<IndexType, 5> patchColMajorTensorRange={{input_depth, ksize, ksize, 2, input_batches}}; 496 Tensor<DataType, 5, DataLayout,IndexType> result_col_major(patchColMajorTensorRange); 497 size_t patchTensorBuffSize =result_col_major.size()*sizeof(DataType); 498 DataType* gpu_data_result_col_major = static_cast<DataType*>(sycl_device.allocate(patchTensorBuffSize)); 499 TensorMap<Tensor<DataType, 5, DataLayout,IndexType>> gpu_result_col_major(gpu_data_result_col_major, patchColMajorTensorRange); 500 gpu_result_col_major.device(sycl_device)=gpu_col_major.extract_image_patches(ksize, ksize, stride, stride, PADDING_SAME); 501 sycl_device.memcpyDeviceToHost(result_col_major.data(), gpu_data_result_col_major, patchTensorBuffSize); 502 503 504 VERIFY_IS_EQUAL(result_col_major.dimension(0), input_depth); // depth 505 VERIFY_IS_EQUAL(result_col_major.dimension(1), ksize); // kernel rows 506 VERIFY_IS_EQUAL(result_col_major.dimension(2), ksize); // kernel cols 507 VERIFY_IS_EQUAL(result_col_major.dimension(3), 2); // number of patches 508 VERIFY_IS_EQUAL(result_col_major.dimension(4), input_batches); // number of batches 509 510 // RowMajor 511 512 array<IndexType, 5> patchRowMajorTensorRange={{input_batches, 2, ksize, ksize, input_depth }}; 513 Tensor<DataType, 5, RowMajor,IndexType> result_row_major(patchRowMajorTensorRange); 514 patchTensorBuffSize =result_row_major.size()*sizeof(DataType); 515 DataType* gpu_data_result_row_major = static_cast<DataType*>(sycl_device.allocate(patchTensorBuffSize)); 516 TensorMap<Tensor<DataType, 5, RowMajor,IndexType>> gpu_result_row_major(gpu_data_result_row_major, patchRowMajorTensorRange); 517 gpu_result_row_major.device(sycl_device)=gpu_row_major.extract_image_patches(ksize, ksize, stride, stride, PADDING_SAME); 518 sycl_device.memcpyDeviceToHost(result_row_major.data(), gpu_data_result_row_major, patchTensorBuffSize); 519 520 VERIFY_IS_EQUAL(result_col_major.dimension(0), result_row_major.dimension(4)); 521 VERIFY_IS_EQUAL(result_col_major.dimension(1), result_row_major.dimension(3)); 522 VERIFY_IS_EQUAL(result_col_major.dimension(2), result_row_major.dimension(2)); 523 VERIFY_IS_EQUAL(result_col_major.dimension(3), result_row_major.dimension(1)); 524 VERIFY_IS_EQUAL(result_col_major.dimension(4), result_row_major.dimension(0)); 525 526 // Based on the calculation described in TensorTraits.h, padding happens to be 0. 527 IndexType row_padding = 0; 528 IndexType col_padding = 0; 529 530 for (IndexType i = 0; (i+stride+ksize-1) <= input_rows; i += stride) { // input rows 531 for (IndexType j = 0; (j+stride+ksize-1) <= input_cols; j += stride) { // input cols 532 IndexType patchId = i+input_rows*j; 533 for (IndexType r = 0; r < ksize; ++r) { // patch rows 534 for (IndexType c = 0; c < ksize; ++c) { // patch cols 535 for (IndexType d = 0; d < input_depth; ++d) { // depth 536 for (IndexType b = 0; b < input_batches; ++b) { // batch 537 DataType expected_col_major = 0.0f; 538 DataType expected_row_major = 0.0f; 539 IndexType row_offset = r*stride + i - row_padding; 540 IndexType col_offset = c*stride + j - col_padding; 541 if (row_offset >= 0 && col_offset >= 0 && row_offset < input_rows && col_offset < input_cols) { 542 expected_col_major = tensor_col_major(d, row_offset, col_offset, b); 543 expected_row_major = tensor_row_major(b, col_offset, row_offset, d); 544 } 545 // ColMajor 546 if (result_col_major(d, r, c, patchId, b) != expected_col_major) { 547 std::cout << "Mismatch detected at index i=" << i << " j=" << j << " r=" << r << " c=" << c << " d=" << d << " b=" << b << std::endl; 548 } 549 VERIFY_IS_EQUAL(result_col_major(d, r, c, patchId, b), expected_col_major); 550 // RowMajor 551 if (result_row_major(b, patchId, c, r, d) != expected_row_major) { 552 std::cout << "Mismatch detected at index i=" << i << " j=" << j << " r=" << r << " c=" << c << " d=" << d << " b=" << b << std::endl; 553 } 554 VERIFY_IS_EQUAL(result_row_major(b, patchId, c, r, d), expected_row_major); 555 // Check that ColMajor and RowMajor agree. 556 VERIFY_IS_EQUAL(expected_col_major, expected_row_major); 557 } 558 } 559 } 560 } 561 } 562 } 563 } 564 565 566 template <typename DataType, typename IndexType> 567 static void test_patch_no_extra_dim_sycl(const Eigen::SyclDevice& sycl_device){ 568 569 IndexType sizeDim1 = 2; 570 IndexType sizeDim2 = 3; 571 IndexType sizeDim3 = 5; 572 573 // ColMajor 574 array<IndexType, 3> tensorColMajorRange = {{sizeDim1, sizeDim2, sizeDim3}}; 575 array<IndexType, 3> tensorRowMajorRange = {{sizeDim3, sizeDim2, sizeDim1}}; 576 Tensor<DataType, 3, DataLayout,IndexType> tensor_col_major(tensorColMajorRange); 577 tensor_col_major.setRandom(); 578 Tensor<DataType, 3, RowMajor,IndexType> tensor_row_major(tensorRowMajorRange); 579 580 DataType* gpu_data_col_major = static_cast<DataType*>(sycl_device.allocate(tensor_col_major.size()*sizeof(DataType))); 581 DataType* gpu_data_row_major = static_cast<DataType*>(sycl_device.allocate(tensor_row_major.size()*sizeof(DataType))); 582 TensorMap<Tensor<DataType, 3, ColMajor, IndexType>> gpu_col_major(gpu_data_col_major, tensorColMajorRange); 583 TensorMap<Tensor<DataType, 3, RowMajor, IndexType>> gpu_row_major(gpu_data_row_major, tensorRowMajorRange); 584 585 sycl_device.memcpyHostToDevice(gpu_data_col_major, tensor_col_major.data(),(tensor_col_major.size())*sizeof(DataType)); 586 gpu_row_major.device(sycl_device)=gpu_col_major.swap_layout(); 587 sycl_device.memcpyDeviceToHost(tensor_row_major.data(), gpu_data_row_major, (tensor_row_major.size())*sizeof(DataType)); 588 589 VERIFY_IS_EQUAL(tensor_col_major.dimension(0), tensor_row_major.dimension(2)); 590 VERIFY_IS_EQUAL(tensor_col_major.dimension(1), tensor_row_major.dimension(1)); 591 VERIFY_IS_EQUAL(tensor_col_major.dimension(2), tensor_row_major.dimension(0)); 592 593 594 // Single pixel patch: ColMajor 595 array<IndexType, 4> patchColMajorTensorRange={{sizeDim1, 1, 1, sizeDim2*sizeDim3}}; 596 Tensor<DataType, 4, DataLayout,IndexType> single_patch_col_major(patchColMajorTensorRange); 597 size_t patchTensorBuffSize =single_patch_col_major.size()*sizeof(DataType); 598 DataType* gpu_data_single_patch_col_major = static_cast<DataType*>(sycl_device.allocate(patchTensorBuffSize)); 599 TensorMap<Tensor<DataType, 4, DataLayout,IndexType>> gpu_single_patch_col_major(gpu_data_single_patch_col_major, patchColMajorTensorRange); 600 gpu_single_patch_col_major.device(sycl_device)=gpu_col_major.extract_image_patches(1, 1); 601 sycl_device.memcpyDeviceToHost(single_patch_col_major.data(), gpu_data_single_patch_col_major, patchTensorBuffSize); 602 603 VERIFY_IS_EQUAL(single_patch_col_major.dimension(0), sizeDim1); 604 VERIFY_IS_EQUAL(single_patch_col_major.dimension(1), 1); 605 VERIFY_IS_EQUAL(single_patch_col_major.dimension(2), 1); 606 VERIFY_IS_EQUAL(single_patch_col_major.dimension(3), sizeDim2*sizeDim3); 607 608 // Single pixel patch: RowMajor 609 array<IndexType, 4> patchRowMajorTensorRange={{sizeDim2*sizeDim3, 1, 1, sizeDim1}}; 610 Tensor<DataType, 4, RowMajor,IndexType> single_patch_row_major(patchRowMajorTensorRange); 611 patchTensorBuffSize =single_patch_row_major.size()*sizeof(DataType); 612 DataType* gpu_data_single_patch_row_major = static_cast<DataType*>(sycl_device.allocate(patchTensorBuffSize)); 613 TensorMap<Tensor<DataType, 4, RowMajor,IndexType>> gpu_single_patch_row_major(gpu_data_single_patch_row_major, patchRowMajorTensorRange); 614 gpu_single_patch_row_major.device(sycl_device)=gpu_row_major.extract_image_patches(1, 1); 615 sycl_device.memcpyDeviceToHost(single_patch_row_major.data(), gpu_data_single_patch_row_major, patchTensorBuffSize); 616 617 VERIFY_IS_EQUAL(single_patch_row_major.dimension(0), sizeDim2*sizeDim3); 618 VERIFY_IS_EQUAL(single_patch_row_major.dimension(1), 1); 619 VERIFY_IS_EQUAL(single_patch_row_major.dimension(2), 1); 620 VERIFY_IS_EQUAL(single_patch_row_major.dimension(3), sizeDim1); 621 622 for (IndexType i = 0; i < tensor_col_major.size(); ++i) { 623 // ColMajor 624 if (tensor_col_major.data()[i] != single_patch_col_major.data()[i]) { 625 std::cout << "Mismatch detected at index " << i << " : " << tensor_col_major.data()[i] << " vs " << single_patch_col_major.data()[i] << std::endl; 626 } 627 VERIFY_IS_EQUAL(single_patch_col_major.data()[i], tensor_col_major.data()[i]); 628 // RowMajor 629 if (tensor_row_major.data()[i] != single_patch_row_major.data()[i]) { 630 std::cout << "Mismatch detected at index " << i << " : " 631 << tensor_col_major.data()[i] << " vs " 632 << single_patch_row_major.data()[i] << std::endl; 633 } 634 VERIFY_IS_EQUAL(single_patch_row_major.data()[i], 635 tensor_row_major.data()[i]); 636 VERIFY_IS_EQUAL(tensor_col_major.data()[i], tensor_row_major.data()[i]); 637 VERIFY_IS_EQUAL(single_patch_col_major.data()[i], 638 single_patch_row_major.data()[i]); 639 } 640 641 // Entire image patch: ColMajor 642 patchColMajorTensorRange={{sizeDim1, sizeDim2, sizeDim3, sizeDim2*sizeDim3}}; 643 Tensor<DataType, 4, DataLayout,IndexType> entire_image_patch_col_major(patchColMajorTensorRange); 644 patchTensorBuffSize =entire_image_patch_col_major.size()*sizeof(DataType); 645 DataType* gpu_data_entire_image_patch_col_major = static_cast<DataType*>(sycl_device.allocate(patchTensorBuffSize)); 646 TensorMap<Tensor<DataType, 4, DataLayout,IndexType>> gpu_entire_image_patch_col_major(gpu_data_entire_image_patch_col_major, patchColMajorTensorRange); 647 gpu_entire_image_patch_col_major.device(sycl_device)=gpu_col_major.extract_image_patches(3, 5); 648 sycl_device.memcpyDeviceToHost(entire_image_patch_col_major.data(), gpu_data_entire_image_patch_col_major, patchTensorBuffSize); 649 650 VERIFY_IS_EQUAL(entire_image_patch_col_major.dimension(0), 2); 651 VERIFY_IS_EQUAL(entire_image_patch_col_major.dimension(1), 3); 652 VERIFY_IS_EQUAL(entire_image_patch_col_major.dimension(2), 5); 653 VERIFY_IS_EQUAL(entire_image_patch_col_major.dimension(3), 3*5); 654 655 // Entire image patch: RowMajor 656 patchRowMajorTensorRange={{sizeDim2*sizeDim3, sizeDim3, sizeDim2, sizeDim1}}; 657 Tensor<DataType, 4, RowMajor,IndexType> entire_image_patch_row_major(patchRowMajorTensorRange); 658 patchTensorBuffSize =entire_image_patch_row_major.size()*sizeof(DataType); 659 DataType* gpu_data_entire_image_patch_row_major = static_cast<DataType*>(sycl_device.allocate(patchTensorBuffSize)); 660 TensorMap<Tensor<DataType, 4, RowMajor,IndexType>> gpu_entire_image_patch_row_major(gpu_data_entire_image_patch_row_major, patchRowMajorTensorRange); 661 gpu_entire_image_patch_row_major.device(sycl_device)=gpu_row_major.extract_image_patches(3, 5); 662 sycl_device.memcpyDeviceToHost(entire_image_patch_row_major.data(), gpu_data_entire_image_patch_row_major, patchTensorBuffSize); 663 VERIFY_IS_EQUAL(entire_image_patch_row_major.dimension(0), 3*5); 664 VERIFY_IS_EQUAL(entire_image_patch_row_major.dimension(1), 5); 665 VERIFY_IS_EQUAL(entire_image_patch_row_major.dimension(2), 3); 666 VERIFY_IS_EQUAL(entire_image_patch_row_major.dimension(3), 2); 667 668 for (IndexType i = 0; i < 3; ++i) { 669 for (IndexType j = 0; j < 5; ++j) { 670 IndexType patchId = i+3*j; 671 for (IndexType r = 0; r < 3; ++r) { 672 for (IndexType c = 0; c < 5; ++c) { 673 for (IndexType d = 0; d < 2; ++d) { 674 DataType expected_col_major = 0.0f; 675 DataType expected_row_major = 0.0f; 676 if (r-1+i >= 0 && c-2+j >= 0 && r-1+i < 3 && c-2+j < 5) { 677 expected_col_major = tensor_col_major(d, r-1+i, c-2+j); 678 expected_row_major = tensor_row_major(c-2+j, r-1+i, d); 679 } 680 // ColMajor 681 if (entire_image_patch_col_major(d, r, c, patchId) != expected_col_major) { 682 std::cout << "Mismatch detected at index i=" << i << " j=" << j << " r=" << r << " c=" << c << " d=" << d << std::endl; 683 } 684 VERIFY_IS_EQUAL(entire_image_patch_col_major(d, r, c, patchId), expected_col_major); 685 // RowMajor 686 if (entire_image_patch_row_major(patchId, c, r, d) != 687 expected_row_major) { 688 std::cout << "Mismatch detected at index i=" << i << " j=" << j << " r=" << r << " c=" << c << " d=" << d << std::endl; 689 } 690 VERIFY_IS_EQUAL(entire_image_patch_row_major(patchId, c, r, d), 691 expected_row_major); 692 // Check that ColMajor and RowMajor agree. 693 VERIFY_IS_EQUAL(expected_col_major, expected_row_major); 694 } 695 } 696 } 697 } 698 } 699 700 // 2D patch: ColMajor 701 patchColMajorTensorRange={{sizeDim1, 2, 2, sizeDim2*sizeDim3}}; 702 Tensor<DataType, 4, DataLayout,IndexType> twod_patch_col_major(patchColMajorTensorRange); 703 patchTensorBuffSize =twod_patch_col_major.size()*sizeof(DataType); 704 DataType* gpu_data_twod_patch_col_major = static_cast<DataType*>(sycl_device.allocate(patchTensorBuffSize)); 705 TensorMap<Tensor<DataType, 4, DataLayout,IndexType>> gpu_twod_patch_col_major(gpu_data_twod_patch_col_major, patchColMajorTensorRange); 706 gpu_twod_patch_col_major.device(sycl_device)=gpu_col_major.extract_image_patches(2, 2); 707 sycl_device.memcpyDeviceToHost(twod_patch_col_major.data(), gpu_data_twod_patch_col_major, patchTensorBuffSize); 708 709 VERIFY_IS_EQUAL(twod_patch_col_major.dimension(0), 2); 710 VERIFY_IS_EQUAL(twod_patch_col_major.dimension(1), 2); 711 VERIFY_IS_EQUAL(twod_patch_col_major.dimension(2), 2); 712 VERIFY_IS_EQUAL(twod_patch_col_major.dimension(3), 3*5); 713 714 // 2D patch: RowMajor 715 patchRowMajorTensorRange={{sizeDim2*sizeDim3, 2, 2, sizeDim1}}; 716 Tensor<DataType, 4, RowMajor,IndexType> twod_patch_row_major(patchRowMajorTensorRange); 717 patchTensorBuffSize =twod_patch_row_major.size()*sizeof(DataType); 718 DataType* gpu_data_twod_patch_row_major = static_cast<DataType*>(sycl_device.allocate(patchTensorBuffSize)); 719 TensorMap<Tensor<DataType, 4, RowMajor,IndexType>> gpu_twod_patch_row_major(gpu_data_twod_patch_row_major, patchRowMajorTensorRange); 720 gpu_twod_patch_row_major.device(sycl_device)=gpu_row_major.extract_image_patches(2, 2); 721 sycl_device.memcpyDeviceToHost(twod_patch_row_major.data(), gpu_data_twod_patch_row_major, patchTensorBuffSize); 722 VERIFY_IS_EQUAL(twod_patch_row_major.dimension(0), 3*5); 723 VERIFY_IS_EQUAL(twod_patch_row_major.dimension(1), 2); 724 VERIFY_IS_EQUAL(twod_patch_row_major.dimension(2), 2); 725 VERIFY_IS_EQUAL(twod_patch_row_major.dimension(3), 2); 726 727 // Based on the calculation described in TensorTraits.h, padding happens to be 0. 728 IndexType row_padding = 0; 729 IndexType col_padding = 0; 730 IndexType stride = 1; 731 732 for (IndexType i = 0; i < 3; ++i) { 733 for (IndexType j = 0; j < 5; ++j) { 734 IndexType patchId = i+3*j; 735 for (IndexType r = 0; r < 2; ++r) { 736 for (IndexType c = 0; c < 2; ++c) { 737 for (IndexType d = 0; d < 2; ++d) { 738 DataType expected_col_major = 0.0f; 739 DataType expected_row_major = 0.0f; 740 IndexType row_offset = r*stride + i - row_padding; 741 IndexType col_offset = c*stride + j - col_padding; 742 // ColMajor 743 if (row_offset >= 0 && col_offset >= 0 && row_offset < tensor_col_major.dimension(1) && col_offset < tensor_col_major.dimension(2)) { 744 expected_col_major = tensor_col_major(d, row_offset, col_offset); 745 } 746 if (twod_patch_col_major(d, r, c, patchId) != expected_col_major) { 747 std::cout << "Mismatch detected at index i=" << i << " j=" << j << " r=" << r << " c=" << c << " d=" << d << std::endl; 748 } 749 VERIFY_IS_EQUAL(twod_patch_col_major(d, r, c, patchId), expected_col_major); 750 // RowMajor 751 if (row_offset >= 0 && col_offset >= 0 && row_offset < tensor_row_major.dimension(1) && col_offset < tensor_row_major.dimension(0)) { 752 expected_row_major = tensor_row_major(col_offset, row_offset, d); 753 } 754 if (twod_patch_row_major(patchId, c, r, d) != expected_row_major) { 755 std::cout << "Mismatch detected at index i=" << i << " j=" << j << " r=" << r << " c=" << c << " d=" << d << std::endl; 756 } 757 VERIFY_IS_EQUAL(twod_patch_row_major(patchId, c, r, d), expected_row_major); 758 // Check that ColMajor and RowMajor agree. 759 VERIFY_IS_EQUAL(expected_col_major, expected_row_major); 760 } 761 } 762 } 763 } 764 } 765 766 sycl_device.deallocate(gpu_data_col_major); 767 sycl_device.deallocate(gpu_data_row_major); 768 sycl_device.deallocate(gpu_data_single_patch_col_major); 769 sycl_device.deallocate(gpu_data_single_patch_row_major); 770 sycl_device.deallocate(gpu_data_entire_image_patch_col_major); 771 sycl_device.deallocate(gpu_data_entire_image_patch_row_major); 772 sycl_device.deallocate(gpu_data_twod_patch_col_major); 773 sycl_device.deallocate(gpu_data_twod_patch_row_major); 774 } 775 776 template <typename DataType, typename IndexType> 777 static void test_imagenet_patches_sycl(const Eigen::SyclDevice& sycl_device) 778 { 779 // Test the code on typical configurations used by the 'imagenet' benchmarks at 780 // https://github.com/soumith/convnet-benchmarks 781 // ColMajor 782 IndexType sizeDim1 = 3; 783 IndexType sizeDim2 = 128; 784 IndexType sizeDim3 = 128; 785 IndexType sizeDim4 = 16; 786 array<IndexType, 4> tensorColMajorRange = {{sizeDim1, sizeDim2, sizeDim3, sizeDim4}}; 787 Tensor<DataType, 4, DataLayout,IndexType> l_in_col_major(tensorColMajorRange); 788 l_in_col_major.setRandom(); 789 790 DataType* gpu_data_l_in_col_major = static_cast<DataType*>(sycl_device.allocate(l_in_col_major.size()*sizeof(DataType))); 791 TensorMap<Tensor<DataType, 4, ColMajor, IndexType>> gpu_l_in_col_major(gpu_data_l_in_col_major, tensorColMajorRange); 792 793 sycl_device.memcpyHostToDevice(gpu_data_l_in_col_major, l_in_col_major.data(),(l_in_col_major.size())*sizeof(DataType)); 794 795 array<IndexType, 5> patchTensorRange={{sizeDim1, 11, 11, sizeDim2*sizeDim3, sizeDim4}}; 796 Tensor<DataType, 5, DataLayout,IndexType> l_out_col_major(patchTensorRange); 797 size_t patchTensorBuffSize =l_out_col_major.size()*sizeof(DataType); 798 DataType* gpu_data_l_out_col_major = static_cast<DataType*>(sycl_device.allocate(patchTensorBuffSize)); 799 TensorMap<Tensor<DataType, 5, DataLayout,IndexType>> gpu_l_out_col_major(gpu_data_l_out_col_major, patchTensorRange); 800 gpu_l_out_col_major.device(sycl_device)=gpu_l_in_col_major.extract_image_patches(11, 11); 801 sycl_device.memcpyDeviceToHost(l_out_col_major.data(), gpu_data_l_out_col_major, patchTensorBuffSize); 802 803 VERIFY_IS_EQUAL(l_out_col_major.dimension(0), sizeDim1); 804 VERIFY_IS_EQUAL(l_out_col_major.dimension(1), 11); 805 VERIFY_IS_EQUAL(l_out_col_major.dimension(2), 11); 806 VERIFY_IS_EQUAL(l_out_col_major.dimension(3), sizeDim2*sizeDim3); 807 VERIFY_IS_EQUAL(l_out_col_major.dimension(4), sizeDim4); 808 809 // RowMajor 810 patchTensorRange={{sizeDim4, sizeDim2*sizeDim3, 11, 11, sizeDim1}}; 811 Tensor<DataType, 5, RowMajor,IndexType> l_out_row_major(patchTensorRange); 812 patchTensorBuffSize =l_out_row_major.size()*sizeof(DataType); 813 DataType* gpu_data_l_out_row_major = static_cast<DataType*>(sycl_device.allocate(patchTensorBuffSize)); 814 TensorMap<Tensor<DataType, 5, RowMajor,IndexType>> gpu_l_out_row_major(gpu_data_l_out_row_major, patchTensorRange); 815 gpu_l_out_row_major.device(sycl_device)=gpu_l_in_col_major.swap_layout().extract_image_patches(11, 11); 816 sycl_device.memcpyDeviceToHost(l_out_row_major.data(), gpu_data_l_out_row_major, patchTensorBuffSize); 817 818 VERIFY_IS_EQUAL(l_out_row_major.dimension(0), sizeDim4); 819 VERIFY_IS_EQUAL(l_out_row_major.dimension(1), sizeDim2*sizeDim3); 820 VERIFY_IS_EQUAL(l_out_row_major.dimension(2), 11); 821 VERIFY_IS_EQUAL(l_out_row_major.dimension(3), 11); 822 VERIFY_IS_EQUAL(l_out_row_major.dimension(4), sizeDim1); 823 824 for (IndexType b = 0; b < 16; ++b) { 825 for (IndexType i = 0; i < 128; ++i) { 826 for (IndexType j = 0; j < 128; ++j) { 827 IndexType patchId = i+128*j; 828 for (IndexType c = 0; c < 11; ++c) { 829 for (IndexType r = 0; r < 11; ++r) { 830 for (IndexType d = 0; d < 3; ++d) { 831 DataType expected = 0.0f; 832 if (r-5+i >= 0 && c-5+j >= 0 && r-5+i < 128 && c-5+j < 128) { 833 expected = l_in_col_major(d, r-5+i, c-5+j, b); 834 } 835 // ColMajor 836 if (l_out_col_major(d, r, c, patchId, b) != expected) { 837 std::cout << "Mismatch detected at index i=" << i << " j=" << j << " r=" << r << " c=" << c << " d=" << d << " b=" << b << std::endl; 838 } 839 VERIFY_IS_EQUAL(l_out_col_major(d, r, c, patchId, b), expected); 840 // RowMajor 841 if (l_out_row_major(b, patchId, c, r, d) != 842 expected) { 843 std::cout << "Mismatch detected at index i=" << i << " j=" << j 844 << " r=" << r << " c=" << c << " d=" << d << " b=" << b 845 << std::endl; 846 } 847 VERIFY_IS_EQUAL(l_out_row_major(b, patchId, c, r, d), 848 expected); 849 } 850 } 851 } 852 } 853 } 854 } 855 856 // ColMajor 857 sycl_device.deallocate(gpu_data_l_in_col_major); 858 sycl_device.deallocate(gpu_data_l_out_col_major); 859 sizeDim1 = 16; 860 sizeDim2 = 64; 861 sizeDim3 = 64; 862 sizeDim4 = 32; 863 tensorColMajorRange = {{sizeDim1, sizeDim2, sizeDim3, sizeDim4}}; 864 l_in_col_major.resize(tensorColMajorRange); 865 l_in_col_major.setRandom(); 866 gpu_data_l_in_col_major = static_cast<DataType*>(sycl_device.allocate(l_in_col_major.size()*sizeof(DataType))); 867 TensorMap<Tensor<DataType, 4, ColMajor, IndexType>>gpu_l_in_col_major_resize1(gpu_data_l_in_col_major, tensorColMajorRange); 868 869 patchTensorRange={{sizeDim1, 9, 9, sizeDim2*sizeDim3, sizeDim4}}; 870 l_out_col_major.resize(patchTensorRange); 871 patchTensorBuffSize =l_out_col_major.size()*sizeof(DataType); 872 gpu_data_l_out_col_major = static_cast<DataType*>(sycl_device.allocate(patchTensorBuffSize)); 873 TensorMap<Tensor<DataType, 5, DataLayout,IndexType>>gpu_l_out_col_major_resize1(gpu_data_l_out_col_major, patchTensorRange); 874 sycl_device.memcpyHostToDevice(gpu_data_l_in_col_major, l_in_col_major.data(),(l_in_col_major.size())*sizeof(DataType)); 875 gpu_l_out_col_major_resize1.device(sycl_device)=gpu_l_in_col_major_resize1.extract_image_patches(9, 9); 876 sycl_device.memcpyDeviceToHost(l_out_col_major.data(), gpu_data_l_out_col_major, patchTensorBuffSize); 877 VERIFY_IS_EQUAL(l_out_col_major.dimension(0), 16); 878 VERIFY_IS_EQUAL(l_out_col_major.dimension(1), 9); 879 VERIFY_IS_EQUAL(l_out_col_major.dimension(2), 9); 880 VERIFY_IS_EQUAL(l_out_col_major.dimension(3), 64*64); 881 VERIFY_IS_EQUAL(l_out_col_major.dimension(4), 32); 882 883 // RowMajor 884 sycl_device.deallocate(gpu_data_l_out_row_major); 885 patchTensorRange={{sizeDim4, sizeDim2*sizeDim3, 9, 9 ,sizeDim1}}; 886 l_out_row_major.resize(patchTensorRange); 887 patchTensorBuffSize =l_out_row_major.size()*sizeof(DataType); 888 gpu_data_l_out_row_major = static_cast<DataType*>(sycl_device.allocate(patchTensorBuffSize)); 889 TensorMap<Tensor<DataType, 5, RowMajor,IndexType>>gpu_l_out_row_major_resize1(gpu_data_l_out_row_major, patchTensorRange); 890 gpu_l_out_row_major_resize1.device(sycl_device)=gpu_l_in_col_major_resize1.swap_layout().extract_image_patches(9, 9); 891 sycl_device.memcpyDeviceToHost(l_out_row_major.data(), gpu_data_l_out_row_major, patchTensorBuffSize); 892 893 VERIFY_IS_EQUAL(l_out_row_major.dimension(0), 32); 894 VERIFY_IS_EQUAL(l_out_row_major.dimension(1), 64*64); 895 VERIFY_IS_EQUAL(l_out_row_major.dimension(2), 9); 896 VERIFY_IS_EQUAL(l_out_row_major.dimension(3), 9); 897 VERIFY_IS_EQUAL(l_out_row_major.dimension(4), 16); 898 899 for (IndexType b = 0; b < 32; ++b) { 900 for (IndexType i = 0; i < 64; ++i) { 901 for (IndexType j = 0; j < 64; ++j) { 902 IndexType patchId = i+64*j; 903 for (IndexType c = 0; c < 9; ++c) { 904 for (IndexType r = 0; r < 9; ++r) { 905 for (IndexType d = 0; d < 16; ++d) { 906 DataType expected = 0.0f; 907 if (r-4+i >= 0 && c-4+j >= 0 && r-4+i < 64 && c-4+j < 64) { 908 expected = l_in_col_major(d, r-4+i, c-4+j, b); 909 } 910 // ColMajor 911 if (l_out_col_major(d, r, c, patchId, b) != expected) { 912 std::cout << "Mismatch detected at index i=" << i << " j=" << j << " r=" << r << " c=" << c << " d=" << d << " b=" << b << std::endl; 913 } 914 VERIFY_IS_EQUAL(l_out_col_major(d, r, c, patchId, b), expected); 915 // RowMajor 916 if (l_out_row_major(b, patchId, c, r, d) != expected) { 917 std::cout << "Mismatch detected at index i=" << i << " j=" << j << " r=" << r << " c=" << c << " d=" << d << " b=" << b << std::endl; 918 } 919 VERIFY_IS_EQUAL(l_out_row_major(b, patchId, c, r, d), expected); 920 } 921 } 922 } 923 } 924 } 925 } 926 927 // ColMajor 928 929 sycl_device.deallocate(gpu_data_l_in_col_major); 930 sycl_device.deallocate(gpu_data_l_out_col_major); 931 sizeDim1 = 32; 932 sizeDim2 = 16; 933 sizeDim3 = 16; 934 sizeDim4 = 32; 935 tensorColMajorRange = {{sizeDim1, sizeDim2, sizeDim3, sizeDim4}}; 936 l_in_col_major.resize(tensorColMajorRange); 937 l_in_col_major.setRandom(); 938 gpu_data_l_in_col_major = static_cast<DataType*>(sycl_device.allocate(l_in_col_major.size()*sizeof(DataType))); 939 TensorMap<Tensor<DataType, 4, ColMajor, IndexType>>gpu_l_in_col_major_resize2(gpu_data_l_in_col_major, tensorColMajorRange); 940 941 patchTensorRange={{sizeDim1, 7, 7, sizeDim2*sizeDim3, sizeDim4}}; 942 l_out_col_major.resize(patchTensorRange); 943 patchTensorBuffSize =l_out_col_major.size()*sizeof(DataType); 944 gpu_data_l_out_col_major = static_cast<DataType*>(sycl_device.allocate(patchTensorBuffSize)); 945 TensorMap<Tensor<DataType, 5, DataLayout,IndexType>>gpu_l_out_col_major_resize2(gpu_data_l_out_col_major, patchTensorRange); 946 sycl_device.memcpyHostToDevice(gpu_data_l_in_col_major, l_in_col_major.data(),(l_in_col_major.size())*sizeof(DataType)); 947 gpu_l_out_col_major_resize2.device(sycl_device)=gpu_l_in_col_major_resize2.extract_image_patches(7, 7); 948 sycl_device.memcpyDeviceToHost(l_out_col_major.data(), gpu_data_l_out_col_major, patchTensorBuffSize); 949 950 VERIFY_IS_EQUAL(l_out_col_major.dimension(0), 32); 951 VERIFY_IS_EQUAL(l_out_col_major.dimension(1), 7); 952 VERIFY_IS_EQUAL(l_out_col_major.dimension(2), 7); 953 VERIFY_IS_EQUAL(l_out_col_major.dimension(3), 16*16); 954 VERIFY_IS_EQUAL(l_out_col_major.dimension(4), 32); 955 956 // RowMajor 957 sycl_device.deallocate(gpu_data_l_out_row_major); 958 patchTensorRange={{sizeDim4, sizeDim2*sizeDim3, 7, 7 ,sizeDim1}}; 959 l_out_row_major.resize(patchTensorRange); 960 patchTensorBuffSize =l_out_row_major.size()*sizeof(DataType); 961 gpu_data_l_out_row_major = static_cast<DataType*>(sycl_device.allocate(patchTensorBuffSize)); 962 TensorMap<Tensor<DataType, 5, RowMajor,IndexType>>gpu_l_out_row_major_resize2(gpu_data_l_out_row_major, patchTensorRange); 963 gpu_l_out_row_major_resize2.device(sycl_device)=gpu_l_in_col_major_resize2.swap_layout().extract_image_patches(7, 7); 964 sycl_device.memcpyDeviceToHost(l_out_row_major.data(), gpu_data_l_out_row_major, patchTensorBuffSize); 965 966 VERIFY_IS_EQUAL(l_out_row_major.dimension(0), 32); 967 VERIFY_IS_EQUAL(l_out_row_major.dimension(1), 16*16); 968 VERIFY_IS_EQUAL(l_out_row_major.dimension(2), 7); 969 VERIFY_IS_EQUAL(l_out_row_major.dimension(3), 7); 970 VERIFY_IS_EQUAL(l_out_row_major.dimension(4), 32); 971 972 for (IndexType b = 0; b < 32; ++b) { 973 for (IndexType i = 0; i < 16; ++i) { 974 for (IndexType j = 0; j < 16; ++j) { 975 IndexType patchId = i+16*j; 976 for (IndexType c = 0; c < 7; ++c) { 977 for (IndexType r = 0; r < 7; ++r) { 978 for (IndexType d = 0; d < 32; ++d) { 979 DataType expected = 0.0f; 980 if (r-3+i >= 0 && c-3+j >= 0 && r-3+i < 16 && c-3+j < 16) { 981 expected = l_in_col_major(d, r-3+i, c-3+j, b); 982 } 983 // ColMajor 984 if (l_out_col_major(d, r, c, patchId, b) != expected) { 985 std::cout << "Mismatch detected at index i=" << i << " j=" << j << " r=" << r << " c=" << c << " d=" << d << " b=" << b << std::endl; 986 } 987 VERIFY_IS_EQUAL(l_out_col_major(d, r, c, patchId, b), expected); 988 // RowMajor 989 if (l_out_row_major(b, patchId, c, r, d) != expected) { 990 std::cout << "Mismatch detected at index i=" << i << " j=" << j << " r=" << r << " c=" << c << " d=" << d << " b=" << b << std::endl; 991 } 992 VERIFY_IS_EQUAL(l_out_row_major(b, patchId, c, r, d), expected); 993 } 994 } 995 } 996 } 997 } 998 } 999 1000 // ColMajor 1001 sycl_device.deallocate(gpu_data_l_in_col_major); 1002 sycl_device.deallocate(gpu_data_l_out_col_major); 1003 sizeDim1 = 64; 1004 sizeDim2 = 13; 1005 sizeDim3 = 13; 1006 sizeDim4 = 32; 1007 tensorColMajorRange = {{sizeDim1, sizeDim2, sizeDim3, sizeDim4}}; 1008 l_in_col_major.resize(tensorColMajorRange); 1009 l_in_col_major.setRandom(); 1010 gpu_data_l_in_col_major = static_cast<DataType*>(sycl_device.allocate(l_in_col_major.size()*sizeof(DataType))); 1011 TensorMap<Tensor<DataType, 4, ColMajor, IndexType>>gpu_l_in_col_major_resize3(gpu_data_l_in_col_major, tensorColMajorRange); 1012 1013 patchTensorRange={{sizeDim1, 3, 3, sizeDim2*sizeDim3, sizeDim4}}; 1014 l_out_col_major.resize(patchTensorRange); 1015 patchTensorBuffSize =l_out_col_major.size()*sizeof(DataType); 1016 gpu_data_l_out_col_major = static_cast<DataType*>(sycl_device.allocate(patchTensorBuffSize)); 1017 TensorMap<Tensor<DataType, 5, DataLayout,IndexType>>gpu_l_out_col_major_resize3(gpu_data_l_out_col_major, patchTensorRange); 1018 sycl_device.memcpyHostToDevice(gpu_data_l_in_col_major, l_in_col_major.data(),(l_in_col_major.size())*sizeof(DataType)); 1019 gpu_l_out_col_major_resize3.device(sycl_device)=gpu_l_in_col_major_resize3.extract_image_patches(3, 3); 1020 sycl_device.memcpyDeviceToHost(l_out_col_major.data(), gpu_data_l_out_col_major, patchTensorBuffSize); 1021 1022 VERIFY_IS_EQUAL(l_out_col_major.dimension(0), 64); 1023 VERIFY_IS_EQUAL(l_out_col_major.dimension(1), 3); 1024 VERIFY_IS_EQUAL(l_out_col_major.dimension(2), 3); 1025 VERIFY_IS_EQUAL(l_out_col_major.dimension(3), 13*13); 1026 VERIFY_IS_EQUAL(l_out_col_major.dimension(4), 32); 1027 1028 // RowMajor 1029 sycl_device.deallocate(gpu_data_l_out_row_major); 1030 patchTensorRange={{sizeDim4, sizeDim2*sizeDim3, 3, 3 ,sizeDim1}}; 1031 l_out_row_major.resize(patchTensorRange); 1032 patchTensorBuffSize =l_out_row_major.size()*sizeof(DataType); 1033 gpu_data_l_out_row_major = static_cast<DataType*>(sycl_device.allocate(patchTensorBuffSize)); 1034 TensorMap<Tensor<DataType, 5, RowMajor,IndexType>>gpu_l_out_row_major_resize3(gpu_data_l_out_row_major, patchTensorRange); 1035 gpu_l_out_row_major_resize3.device(sycl_device)=gpu_l_in_col_major_resize3.swap_layout().extract_image_patches(3, 3); 1036 sycl_device.memcpyDeviceToHost(l_out_row_major.data(), gpu_data_l_out_row_major, patchTensorBuffSize); 1037 1038 VERIFY_IS_EQUAL(l_out_row_major.dimension(0), 32); 1039 VERIFY_IS_EQUAL(l_out_row_major.dimension(1), 13*13); 1040 VERIFY_IS_EQUAL(l_out_row_major.dimension(2), 3); 1041 VERIFY_IS_EQUAL(l_out_row_major.dimension(3), 3); 1042 VERIFY_IS_EQUAL(l_out_row_major.dimension(4), 64); 1043 1044 for (IndexType b = 0; b < 32; ++b) { 1045 for (IndexType i = 0; i < 13; ++i) { 1046 for (IndexType j = 0; j < 13; ++j) { 1047 IndexType patchId = i+13*j; 1048 for (IndexType c = 0; c < 3; ++c) { 1049 for (IndexType r = 0; r < 3; ++r) { 1050 for (IndexType d = 0; d < 64; ++d) { 1051 DataType expected = 0.0f; 1052 if (r-1+i >= 0 && c-1+j >= 0 && r-1+i < 13 && c-1+j < 13) { 1053 expected = l_in_col_major(d, r-1+i, c-1+j, b); 1054 } 1055 // ColMajor 1056 if (l_out_col_major(d, r, c, patchId, b) != expected) { 1057 std::cout << "Mismatch detected at index i=" << i << " j=" << j << " r=" << r << " c=" << c << " d=" << d << " b=" << b << std::endl; 1058 } 1059 VERIFY_IS_EQUAL(l_out_col_major(d, r, c, patchId, b), expected); 1060 // RowMajor 1061 if (l_out_row_major(b, patchId, c, r, d) != expected) { 1062 std::cout << "Mismatch detected at index i=" << i << " j=" << j << " r=" << r << " c=" << c << " d=" << d << " b=" << b << std::endl; 1063 } 1064 VERIFY_IS_EQUAL(l_out_row_major(b, patchId, c, r, d), expected); 1065 } 1066 } 1067 } 1068 } 1069 } 1070 } 1071 sycl_device.deallocate(gpu_data_l_in_col_major); 1072 sycl_device.deallocate(gpu_data_l_out_col_major); 1073 sycl_device.deallocate(gpu_data_l_out_row_major); 1074 } 1075 1076 1077 template<typename DataType, typename dev_Selector> void sycl_tensor_image_patch_test_per_device(dev_Selector s){ 1078 QueueInterface queueInterface(s); 1079 auto sycl_device = Eigen::SyclDevice(&queueInterface); 1080 test_simple_image_patch_sycl<DataType, int64_t>(sycl_device); 1081 test_patch_padding_valid_sycl<DataType, int64_t>(sycl_device); 1082 test_patch_padding_valid_same_value_sycl<DataType, int64_t>(sycl_device); 1083 test_patch_padding_same_sycl<DataType, int64_t>(sycl_device); 1084 test_patch_no_extra_dim_sycl<DataType, int64_t>(sycl_device); 1085 test_imagenet_patches_sycl<DataType, int64_t>(sycl_device); 1086 } 1087 EIGEN_DECLARE_TEST(cxx11_tensor_image_patch_sycl) 1088 { 1089 for (const auto& device :Eigen::get_sycl_supported_devices()) { 1090 CALL_SUBTEST(sycl_tensor_image_patch_test_per_device<float>(device)); 1091 } 1092 }