cart-elc

Source code for CART-ELC
git clone git://git.laack.co/cart-elc.git
Log | Files | Refs | README | LICENSE

cxx11_tensor_block_eval.cpp (31888B)


      1 // This file is part of Eigen, a lightweight C++ template library
      2 // for linear algebra.
      3 //
      4 // This Source Code Form is subject to the terms of the Mozilla
      5 // Public License v. 2.0. If a copy of the MPL was not distributed
      6 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
      7 
      8 // clang-format off
      9 #include "main.h"
     10 #include <Eigen/CXX11/Tensor>
     11 // clang-format on
     12 
     13 using Eigen::internal::TensorBlockDescriptor;
     14 using Eigen::internal::TensorExecutor;
     15 
     16 // -------------------------------------------------------------------------- //
     17 // Utility functions to generate random tensors, blocks, and evaluate them.
     18 
     19 template <int NumDims>
     20 static DSizes<Index, NumDims> RandomDims(Index min, Index max) {
     21   DSizes<Index, NumDims> dims;
     22   for (int i = 0; i < NumDims; ++i) {
     23     dims[i] = internal::random<Index>(min, max);
     24   }
     25   return DSizes<Index, NumDims>(dims);
     26 }
     27 
     28 // Block offsets and extents allows to construct a TensorSlicingOp corresponding
     29 // to a TensorBlockDescriptor.
     30 template <int NumDims>
     31 struct TensorBlockParams {
     32   DSizes<Index, NumDims> offsets;
     33   DSizes<Index, NumDims> sizes;
     34   TensorBlockDescriptor<NumDims, Index> desc;
     35 };
     36 
     37 template <int Layout, int NumDims>
     38 static TensorBlockParams<NumDims> RandomBlock(DSizes<Index, NumDims> dims,
     39                                               Index min, Index max) {
     40   // Choose random offsets and sizes along all tensor dimensions.
     41   DSizes<Index, NumDims> offsets(RandomDims<NumDims>(min, max));
     42   DSizes<Index, NumDims> sizes(RandomDims<NumDims>(min, max));
     43 
     44   // Make sure that offset + size do not overflow dims.
     45   for (int i = 0; i < NumDims; ++i) {
     46     offsets[i] = numext::mini(dims[i] - 1, offsets[i]);
     47     sizes[i] = numext::mini(sizes[i], dims[i] - offsets[i]);
     48   }
     49 
     50   Index offset = 0;
     51   DSizes<Index, NumDims> strides = Eigen::internal::strides<Layout>(dims);
     52   for (int i = 0; i < NumDims; ++i) {
     53     offset += strides[i] * offsets[i];
     54   }
     55 
     56   return {offsets, sizes, TensorBlockDescriptor<NumDims, Index>(offset, sizes)};
     57 }
     58 
     59 // Generate block with block sizes skewed towards inner dimensions. This type of
     60 // block is required for evaluating broadcast expressions.
     61 template <int Layout, int NumDims>
     62 static TensorBlockParams<NumDims> SkewedInnerBlock(
     63     DSizes<Index, NumDims> dims) {
     64   using BlockMapper = internal::TensorBlockMapper<NumDims, Layout, Index>;
     65   BlockMapper block_mapper(dims,
     66                            {internal::TensorBlockShapeType::kSkewedInnerDims,
     67                             internal::random<size_t>(1, dims.TotalSize()),
     68                             {0, 0, 0}});
     69 
     70   Index total_blocks = block_mapper.blockCount();
     71   Index block_index = internal::random<Index>(0, total_blocks - 1);
     72   auto block = block_mapper.blockDescriptor(block_index);
     73   DSizes<Index, NumDims> sizes = block.dimensions();
     74 
     75   auto strides = internal::strides<Layout>(dims);
     76   DSizes<Index, NumDims> offsets;
     77 
     78   // Compute offsets for the first block coefficient.
     79   Index index = block.offset();
     80   if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
     81     for (int i = NumDims - 1; i > 0; --i) {
     82       const Index idx = index / strides[i];
     83       index -= idx * strides[i];
     84       offsets[i] = idx;
     85     }
     86     if (NumDims > 0) offsets[0] = index;
     87   } else {
     88     for (int i = 0; i < NumDims - 1; ++i) {
     89       const Index idx = index / strides[i];
     90       index -= idx * strides[i];
     91       offsets[i] = idx;
     92     }
     93     if (NumDims > 0) offsets[NumDims - 1] = index;
     94   }
     95 
     96   return {offsets, sizes, block};
     97 }
     98 
     99 template <int NumDims>
    100 static TensorBlockParams<NumDims> FixedSizeBlock(DSizes<Index, NumDims> dims) {
    101   DSizes<Index, NumDims> offsets;
    102   for (int i = 0; i < NumDims; ++i) offsets[i] = 0;
    103 
    104   return {offsets, dims, TensorBlockDescriptor<NumDims, Index>(0, dims)};
    105 }
    106 
    107 inline Eigen::IndexList<Index, Eigen::type2index<1>> NByOne(Index n) {
    108   Eigen::IndexList<Index, Eigen::type2index<1>> ret;
    109   ret.set(0, n);
    110   return ret;
    111 }
    112 inline Eigen::IndexList<Eigen::type2index<1>, Index> OneByM(Index m) {
    113   Eigen::IndexList<Eigen::type2index<1>, Index> ret;
    114   ret.set(1, m);
    115   return ret;
    116 }
    117 
    118 // -------------------------------------------------------------------------- //
    119 // Verify that block expression evaluation produces the same result as a
    120 // TensorSliceOp (reading a tensor block is same to taking a tensor slice).
    121 
    122 template <typename T, int NumDims, int Layout, typename Expression,
    123           typename GenBlockParams>
    124 static void VerifyBlockEvaluator(Expression expr, GenBlockParams gen_block) {
    125   using Device = DefaultDevice;
    126   auto d = Device();
    127 
    128   // Scratch memory allocator for block evaluation.
    129   typedef internal::TensorBlockScratchAllocator<Device> TensorBlockScratch;
    130   TensorBlockScratch scratch(d);
    131 
    132   // TensorEvaluator is needed to produce tensor blocks of the expression.
    133   auto eval = TensorEvaluator<const decltype(expr), Device>(expr, d);
    134   eval.evalSubExprsIfNeeded(nullptr);
    135 
    136   // Choose a random offsets, sizes and TensorBlockDescriptor.
    137   TensorBlockParams<NumDims> block_params = gen_block();
    138 
    139   // Evaluate TensorBlock expression into a tensor.
    140   Tensor<T, NumDims, Layout> block(block_params.desc.dimensions());
    141 
    142   // Dimensions for the potential destination buffer.
    143   DSizes<Index, NumDims> dst_dims;
    144   if (internal::random<bool>()) {
    145     dst_dims = block_params.desc.dimensions();
    146   } else {
    147     for (int i = 0; i < NumDims; ++i) {
    148       Index extent = internal::random<Index>(0, 5);
    149       dst_dims[i] = block_params.desc.dimension(i) + extent;
    150     }
    151   }
    152 
    153   // Maybe use this tensor as a block desc destination.
    154   Tensor<T, NumDims, Layout> dst(dst_dims);
    155   dst.setZero();
    156   if (internal::random<bool>()) {
    157     block_params.desc.template AddDestinationBuffer<Layout>(
    158         dst.data(), internal::strides<Layout>(dst.dimensions()));
    159   }
    160 
    161   const bool root_of_expr = internal::random<bool>();
    162   auto tensor_block = eval.block(block_params.desc, scratch, root_of_expr);
    163 
    164   if (tensor_block.kind() == internal::TensorBlockKind::kMaterializedInOutput) {
    165     // Copy data from destination buffer.
    166     if (dimensions_match(dst.dimensions(), block.dimensions())) {
    167       block = dst;
    168     } else {
    169       DSizes<Index, NumDims> offsets;
    170       for (int i = 0; i < NumDims; ++i) offsets[i] = 0;
    171       block = dst.slice(offsets, block.dimensions());
    172     }
    173 
    174   } else {
    175     // Assign to block from expression.
    176     auto b_expr = tensor_block.expr();
    177 
    178     // We explicitly disable vectorization and tiling, to run a simple coefficient
    179     // wise assignment loop, because it's very simple and should be correct.
    180     using BlockAssign = TensorAssignOp<decltype(block), const decltype(b_expr)>;
    181     using BlockExecutor = TensorExecutor<const BlockAssign, Device, false,
    182                                          internal::TiledEvaluation::Off>;
    183     BlockExecutor::run(BlockAssign(block, b_expr), d);
    184   }
    185 
    186   // Cleanup temporary buffers owned by a tensor block.
    187   tensor_block.cleanup();
    188 
    189   // Compute a Tensor slice corresponding to a Tensor block.
    190   Tensor<T, NumDims, Layout> slice(block_params.desc.dimensions());
    191   auto s_expr = expr.slice(block_params.offsets, block_params.sizes);
    192 
    193   // Explicitly use coefficient assignment to evaluate slice expression.
    194   using SliceAssign = TensorAssignOp<decltype(slice), const decltype(s_expr)>;
    195   using SliceExecutor = TensorExecutor<const SliceAssign, Device, false,
    196                                        internal::TiledEvaluation::Off>;
    197   SliceExecutor::run(SliceAssign(slice, s_expr), d);
    198 
    199   // Tensor block and tensor slice must be the same.
    200   for (Index i = 0; i < block.dimensions().TotalSize(); ++i) {
    201     VERIFY_IS_EQUAL(block.coeff(i), slice.coeff(i));
    202   }
    203 }
    204 
    205 // -------------------------------------------------------------------------- //
    206 
    207 template <typename T, int NumDims, int Layout>
    208 static void test_eval_tensor_block() {
    209   DSizes<Index, NumDims> dims = RandomDims<NumDims>(10, 20);
    210   Tensor<T, NumDims, Layout> input(dims);
    211   input.setRandom();
    212 
    213   // Identity tensor expression transformation.
    214   VerifyBlockEvaluator<T, NumDims, Layout>(
    215       input, [&dims]() { return RandomBlock<Layout>(dims, 1, 10); });
    216 }
    217 
    218 template <typename T, int NumDims, int Layout>
    219 static void test_eval_tensor_unary_expr_block() {
    220   DSizes<Index, NumDims> dims = RandomDims<NumDims>(10, 20);
    221   Tensor<T, NumDims, Layout> input(dims);
    222   input.setRandom();
    223 
    224   VerifyBlockEvaluator<T, NumDims, Layout>(
    225       input.abs(), [&dims]() { return RandomBlock<Layout>(dims, 1, 10); });
    226 }
    227 
    228 template <typename T, int NumDims, int Layout>
    229 static void test_eval_tensor_binary_expr_block() {
    230   DSizes<Index, NumDims> dims = RandomDims<NumDims>(10, 20);
    231   Tensor<T, NumDims, Layout> lhs(dims), rhs(dims);
    232   lhs.setRandom();
    233   rhs.setRandom();
    234 
    235   VerifyBlockEvaluator<T, NumDims, Layout>(
    236       lhs * rhs, [&dims]() { return RandomBlock<Layout>(dims, 1, 10); });
    237 }
    238 
    239 template <typename T, int NumDims, int Layout>
    240 static void test_eval_tensor_binary_with_unary_expr_block() {
    241   DSizes<Index, NumDims> dims = RandomDims<NumDims>(10, 20);
    242   Tensor<T, NumDims, Layout> lhs(dims), rhs(dims);
    243   lhs.setRandom();
    244   rhs.setRandom();
    245 
    246   VerifyBlockEvaluator<T, NumDims, Layout>(
    247       (lhs.square() + rhs.square()).sqrt(),
    248       [&dims]() { return RandomBlock<Layout>(dims, 1, 10); });
    249 }
    250 
    251 template <typename T, int NumDims, int Layout>
    252 static void test_eval_tensor_broadcast() {
    253   DSizes<Index, NumDims> dims = RandomDims<NumDims>(1, 10);
    254   Tensor<T, NumDims, Layout> input(dims);
    255   input.setRandom();
    256 
    257   DSizes<Index, NumDims> bcast = RandomDims<NumDims>(1, 5);
    258 
    259   DSizes<Index, NumDims> bcasted_dims;
    260   for (int i = 0; i < NumDims; ++i) bcasted_dims[i] = dims[i] * bcast[i];
    261 
    262   VerifyBlockEvaluator<T, NumDims, Layout>(
    263       input.broadcast(bcast),
    264       [&bcasted_dims]() { return SkewedInnerBlock<Layout>(bcasted_dims); });
    265 
    266   VerifyBlockEvaluator<T, NumDims, Layout>(
    267       input.broadcast(bcast),
    268       [&bcasted_dims]() { return RandomBlock<Layout>(bcasted_dims, 5, 10); });
    269 
    270   VerifyBlockEvaluator<T, NumDims, Layout>(
    271       input.broadcast(bcast),
    272       [&bcasted_dims]() { return FixedSizeBlock(bcasted_dims); });
    273 
    274   // Check that desc.destination() memory is not shared between two broadcast
    275   // materializations.
    276   VerifyBlockEvaluator<T, NumDims, Layout>(
    277       input.broadcast(bcast) * input.abs().broadcast(bcast),
    278       [&bcasted_dims]() { return SkewedInnerBlock<Layout>(bcasted_dims); });
    279 }
    280 
    281 template <typename T, int NumDims, int Layout>
    282 static void test_eval_tensor_reshape() {
    283   DSizes<Index, NumDims> dims = RandomDims<NumDims>(1, 10);
    284 
    285   DSizes<Index, NumDims> shuffled = dims;
    286   std::shuffle(&shuffled[0], &shuffled[NumDims - 1], std::mt19937(g_seed));
    287 
    288   Tensor<T, NumDims, Layout> input(dims);
    289   input.setRandom();
    290 
    291   VerifyBlockEvaluator<T, NumDims, Layout>(
    292       input.reshape(shuffled),
    293       [&shuffled]() { return RandomBlock<Layout>(shuffled, 1, 10); });
    294 
    295   VerifyBlockEvaluator<T, NumDims, Layout>(
    296       input.reshape(shuffled),
    297       [&shuffled]() { return SkewedInnerBlock<Layout>(shuffled); });
    298 }
    299 
    300 template <typename T, int NumDims, int Layout>
    301 static void test_eval_tensor_cast() {
    302   DSizes<Index, NumDims> dims = RandomDims<NumDims>(10, 20);
    303   Tensor<T, NumDims, Layout> input(dims);
    304   input.setRandom();
    305 
    306   VerifyBlockEvaluator<T, NumDims, Layout>(
    307       input.template cast<int>().template cast<T>(),
    308       [&dims]() { return RandomBlock<Layout>(dims, 1, 10); });
    309 }
    310 
    311 template <typename T, int NumDims, int Layout>
    312 static void test_eval_tensor_select() {
    313   DSizes<Index, NumDims> dims = RandomDims<NumDims>(10, 20);
    314   Tensor<T, NumDims, Layout> lhs(dims);
    315   Tensor<T, NumDims, Layout> rhs(dims);
    316   Tensor<bool, NumDims, Layout> cond(dims);
    317   lhs.setRandom();
    318   rhs.setRandom();
    319   cond.setRandom();
    320 
    321   VerifyBlockEvaluator<T, NumDims, Layout>(cond.select(lhs, rhs), [&dims]() {
    322     return RandomBlock<Layout>(dims, 1, 20);
    323   });
    324 }
    325 
    326 template <typename T, int NumDims, int Layout>
    327 static void test_eval_tensor_padding() {
    328   const int inner_dim = Layout == static_cast<int>(ColMajor) ? 0 : NumDims - 1;
    329 
    330   DSizes<Index, NumDims> dims = RandomDims<NumDims>(10, 20);
    331   Tensor<T, NumDims, Layout> input(dims);
    332   input.setRandom();
    333 
    334   DSizes<Index, NumDims> pad_before = RandomDims<NumDims>(0, 4);
    335   DSizes<Index, NumDims> pad_after = RandomDims<NumDims>(0, 4);
    336   array<std::pair<Index, Index>, NumDims> paddings;
    337   for (int i = 0; i < NumDims; ++i) {
    338     paddings[i] = std::make_pair(pad_before[i], pad_after[i]);
    339   }
    340 
    341   // Test squeezing reads from inner dim.
    342   if (internal::random<bool>()) {
    343     pad_before[inner_dim] = 0;
    344     pad_after[inner_dim] = 0;
    345     paddings[inner_dim] = std::make_pair(0, 0);
    346   }
    347 
    348   DSizes<Index, NumDims> padded_dims;
    349   for (int i = 0; i < NumDims; ++i) {
    350     padded_dims[i] = dims[i] + pad_before[i] + pad_after[i];
    351   }
    352 
    353   VerifyBlockEvaluator<T, NumDims, Layout>(
    354       input.pad(paddings),
    355       [&padded_dims]() { return FixedSizeBlock(padded_dims); });
    356 
    357   VerifyBlockEvaluator<T, NumDims, Layout>(
    358       input.pad(paddings),
    359       [&padded_dims]() { return RandomBlock<Layout>(padded_dims, 1, 10); });
    360 
    361   VerifyBlockEvaluator<T, NumDims, Layout>(
    362       input.pad(paddings),
    363       [&padded_dims]() { return SkewedInnerBlock<Layout>(padded_dims); });
    364 }
    365 
    366 template <typename T, int NumDims, int Layout>
    367 static void test_eval_tensor_chipping() {
    368   DSizes<Index, NumDims> dims = RandomDims<NumDims>(10, 20);
    369   Tensor<T, NumDims, Layout> input(dims);
    370   input.setRandom();
    371 
    372   Index chip_dim = internal::random<int>(0, NumDims - 1);
    373   Index chip_offset = internal::random<Index>(0, dims[chip_dim] - 2);
    374 
    375   DSizes<Index, NumDims - 1> chipped_dims;
    376   for (Index i = 0; i < chip_dim; ++i) {
    377     chipped_dims[i] = dims[i];
    378   }
    379   for (Index i = chip_dim + 1; i < NumDims; ++i) {
    380     chipped_dims[i - 1] = dims[i];
    381   }
    382 
    383   // Block buffer forwarding.
    384   VerifyBlockEvaluator<T, NumDims - 1, Layout>(
    385       input.chip(chip_offset, chip_dim),
    386       [&chipped_dims]() { return FixedSizeBlock(chipped_dims); });
    387 
    388   VerifyBlockEvaluator<T, NumDims - 1, Layout>(
    389       input.chip(chip_offset, chip_dim),
    390       [&chipped_dims]() { return RandomBlock<Layout>(chipped_dims, 1, 10); });
    391 
    392   // Block expression assignment.
    393   VerifyBlockEvaluator<T, NumDims - 1, Layout>(
    394       input.abs().chip(chip_offset, chip_dim),
    395       [&chipped_dims]() { return FixedSizeBlock(chipped_dims); });
    396 
    397   VerifyBlockEvaluator<T, NumDims - 1, Layout>(
    398       input.abs().chip(chip_offset, chip_dim),
    399       [&chipped_dims]() { return RandomBlock<Layout>(chipped_dims, 1, 10); });
    400 }
    401 
    402 
    403 template<typename T, int NumDims>
    404 struct SimpleTensorGenerator {
    405   T operator()(const array<Index, NumDims>& coords) const {
    406     T result = static_cast<T>(0);
    407     for (int i = 0; i < NumDims; ++i) {
    408       result += static_cast<T>((i + 1) * coords[i]);
    409     }
    410     return result;
    411   }
    412 };
    413 
    414 // Boolean specialization to avoid -Wint-in-bool-context warnings on GCC.
    415 template<int NumDims>
    416 struct SimpleTensorGenerator<bool, NumDims> {
    417   bool operator()(const array<Index, NumDims>& coords) const {
    418     bool result = false;
    419     for (int i = 0; i < NumDims; ++i) {
    420       result ^= coords[i];
    421     }
    422     return result;
    423   }
    424 };
    425 
    426 
    427 template <typename T, int NumDims, int Layout>
    428 static void test_eval_tensor_generator() {
    429   DSizes<Index, NumDims> dims = RandomDims<NumDims>(10, 20);
    430   Tensor<T, NumDims, Layout> input(dims);
    431   input.setRandom();
    432 
    433   auto generator = SimpleTensorGenerator<T, NumDims>();
    434 
    435   VerifyBlockEvaluator<T, NumDims, Layout>(
    436       input.generate(generator), [&dims]() { return FixedSizeBlock(dims); });
    437 
    438   VerifyBlockEvaluator<T, NumDims, Layout>(
    439       input.generate(generator),
    440       [&dims]() { return RandomBlock<Layout>(dims, 1, 10); });
    441 }
    442 
    443 template <typename T, int NumDims, int Layout>
    444 static void test_eval_tensor_reverse() {
    445   DSizes<Index, NumDims> dims = RandomDims<NumDims>(10, 20);
    446   Tensor<T, NumDims, Layout> input(dims);
    447   input.setRandom();
    448 
    449   // Randomly reverse dimensions.
    450   Eigen::DSizes<bool, NumDims> reverse;
    451   for (int i = 0; i < NumDims; ++i) reverse[i] = internal::random<bool>();
    452 
    453   VerifyBlockEvaluator<T, NumDims, Layout>(
    454       input.reverse(reverse), [&dims]() { return FixedSizeBlock(dims); });
    455 
    456   VerifyBlockEvaluator<T, NumDims, Layout>(input.reverse(reverse), [&dims]() {
    457     return RandomBlock<Layout>(dims, 1, 10);
    458   });
    459 }
    460 
    461 template <typename T, int NumDims, int Layout>
    462 static void test_eval_tensor_slice() {
    463   DSizes<Index, NumDims> dims = RandomDims<NumDims>(10, 20);
    464   Tensor<T, NumDims, Layout> input(dims);
    465   input.setRandom();
    466 
    467   // Pick a random slice of an input tensor.
    468   DSizes<Index, NumDims> slice_start = RandomDims<NumDims>(5, 10);
    469   DSizes<Index, NumDims> slice_size = RandomDims<NumDims>(5, 10);
    470 
    471   // Make sure that slice start + size do not overflow tensor dims.
    472   for (int i = 0; i < NumDims; ++i) {
    473     slice_start[i] = numext::mini(dims[i] - 1, slice_start[i]);
    474     slice_size[i] = numext::mini(slice_size[i], dims[i] - slice_start[i]);
    475   }
    476 
    477   VerifyBlockEvaluator<T, NumDims, Layout>(
    478       input.slice(slice_start, slice_size),
    479       [&slice_size]() { return FixedSizeBlock(slice_size); });
    480 
    481   VerifyBlockEvaluator<T, NumDims, Layout>(
    482       input.slice(slice_start, slice_size),
    483       [&slice_size]() { return RandomBlock<Layout>(slice_size, 1, 10); });
    484 }
    485 
    486 template <typename T, int NumDims, int Layout>
    487 static void test_eval_tensor_shuffle() {
    488   DSizes<Index, NumDims> dims = RandomDims<NumDims>(5, 15);
    489   Tensor<T, NumDims, Layout> input(dims);
    490   input.setRandom();
    491 
    492   DSizes<Index, NumDims> shuffle;
    493   for (int i = 0; i < NumDims; ++i) shuffle[i] = i;
    494 
    495   do {
    496     DSizes<Index, NumDims> shuffled_dims;
    497     for (int i = 0; i < NumDims; ++i) shuffled_dims[i] = dims[shuffle[i]];
    498 
    499     VerifyBlockEvaluator<T, NumDims, Layout>(
    500         input.shuffle(shuffle),
    501         [&shuffled_dims]() { return FixedSizeBlock(shuffled_dims); });
    502 
    503     VerifyBlockEvaluator<T, NumDims, Layout>(
    504         input.shuffle(shuffle), [&shuffled_dims]() {
    505           return RandomBlock<Layout>(shuffled_dims, 1, 5);
    506         });
    507 
    508     break;
    509 
    510   } while (std::next_permutation(&shuffle[0], &shuffle[0] + NumDims));
    511 }
    512 
    513 template <typename T, int Layout>
    514 static void test_eval_tensor_reshape_with_bcast() {
    515   Index dim = internal::random<Index>(1, 100);
    516 
    517   Tensor<T, 2, Layout> lhs(1, dim);
    518   Tensor<T, 2, Layout> rhs(dim, 1);
    519   lhs.setRandom();
    520   rhs.setRandom();
    521 
    522   auto reshapeLhs = NByOne(dim);
    523   auto reshapeRhs = OneByM(dim);
    524 
    525   auto bcastLhs = OneByM(dim);
    526   auto bcastRhs = NByOne(dim);
    527 
    528   DSizes<Index, 2> dims(dim, dim);
    529 
    530   VerifyBlockEvaluator<T, 2, Layout>(
    531       lhs.reshape(reshapeLhs).broadcast(bcastLhs) *
    532           rhs.reshape(reshapeRhs).broadcast(bcastRhs),
    533       [dims]() { return SkewedInnerBlock<Layout, 2>(dims); });
    534 }
    535 
    536 template <typename T, int Layout>
    537 static void test_eval_tensor_forced_eval() {
    538   Index dim = internal::random<Index>(1, 100);
    539 
    540   Tensor<T, 2, Layout> lhs(dim, 1);
    541   Tensor<T, 2, Layout> rhs(1, dim);
    542   lhs.setRandom();
    543   rhs.setRandom();
    544 
    545   auto bcastLhs = OneByM(dim);
    546   auto bcastRhs = NByOne(dim);
    547 
    548   DSizes<Index, 2> dims(dim, dim);
    549 
    550   VerifyBlockEvaluator<T, 2, Layout>(
    551       (lhs.broadcast(bcastLhs) * rhs.broadcast(bcastRhs)).eval().reshape(dims),
    552       [dims]() { return SkewedInnerBlock<Layout, 2>(dims); });
    553 
    554   VerifyBlockEvaluator<T, 2, Layout>(
    555       (lhs.broadcast(bcastLhs) * rhs.broadcast(bcastRhs)).eval().reshape(dims),
    556       [dims]() { return RandomBlock<Layout, 2>(dims, 1, 50); });
    557 }
    558 
    559 template <typename T, int Layout>
    560 static void test_eval_tensor_chipping_of_bcast() {
    561   if (Layout != static_cast<int>(RowMajor)) return;
    562 
    563   Index dim0 = internal::random<Index>(1, 10);
    564   Index dim1 = internal::random<Index>(1, 10);
    565   Index dim2 = internal::random<Index>(1, 10);
    566 
    567   Tensor<T, 3, Layout> input(1, dim1, dim2);
    568   input.setRandom();
    569 
    570   Eigen::array<Index, 3> bcast = {{dim0, 1, 1}};
    571   DSizes<Index, 2> chipped_dims(dim0, dim2);
    572 
    573   VerifyBlockEvaluator<T, 2, Layout>(
    574       input.broadcast(bcast).chip(0, 1),
    575       [chipped_dims]() { return FixedSizeBlock(chipped_dims); });
    576 
    577   VerifyBlockEvaluator<T, 2, Layout>(
    578       input.broadcast(bcast).chip(0, 1),
    579       [chipped_dims]() { return SkewedInnerBlock<Layout, 2>(chipped_dims); });
    580 
    581   VerifyBlockEvaluator<T, 2, Layout>(
    582       input.broadcast(bcast).chip(0, 1),
    583       [chipped_dims]() { return RandomBlock<Layout, 2>(chipped_dims, 1, 5); });
    584 }
    585 
    586 // -------------------------------------------------------------------------- //
    587 // Verify that assigning block to a Tensor expression produces the same result
    588 // as an assignment to TensorSliceOp (writing a block is is identical to
    589 // assigning one tensor to a slice of another tensor).
    590 
    591 template <typename T, int NumDims, int Layout, int NumExprDims = NumDims,
    592           typename Expression, typename GenBlockParams>
    593 static void VerifyBlockAssignment(Tensor<T, NumDims, Layout>& tensor,
    594                                   Expression expr, GenBlockParams gen_block) {
    595   using Device = DefaultDevice;
    596   auto d = Device();
    597 
    598   // We use tensor evaluator as a target for block and slice assignments.
    599   auto eval = TensorEvaluator<decltype(expr), Device>(expr, d);
    600 
    601   // Generate a random block, or choose a block that fits in full expression.
    602   TensorBlockParams<NumExprDims> block_params = gen_block();
    603 
    604   // Generate random data of the selected block size.
    605   Tensor<T, NumExprDims, Layout> block(block_params.desc.dimensions());
    606   block.setRandom();
    607 
    608   // ************************************************************************ //
    609   // (1) Assignment from a block.
    610 
    611   // Construct a materialize block from a random generated block tensor.
    612   internal::TensorMaterializedBlock<T, NumExprDims, Layout> blk(
    613       internal::TensorBlockKind::kView, block.data(), block.dimensions());
    614 
    615   // Reset all underlying tensor values to zero.
    616   tensor.setZero();
    617 
    618   // Use evaluator to write block into a tensor.
    619   eval.writeBlock(block_params.desc, blk);
    620 
    621   // Make a copy of the result after assignment.
    622   Tensor<T, NumDims, Layout> block_assigned = tensor;
    623 
    624   // ************************************************************************ //
    625   // (2) Assignment to a slice
    626 
    627   // Reset all underlying tensor values to zero.
    628   tensor.setZero();
    629 
    630   // Assign block to a slice of original expression
    631   auto s_expr = expr.slice(block_params.offsets, block_params.sizes);
    632 
    633   // Explicitly use coefficient assignment to evaluate slice expression.
    634   using SliceAssign = TensorAssignOp<decltype(s_expr), const decltype(block)>;
    635   using SliceExecutor = TensorExecutor<const SliceAssign, Device, false,
    636                                        internal::TiledEvaluation::Off>;
    637   SliceExecutor::run(SliceAssign(s_expr, block), d);
    638 
    639   // Make a copy of the result after assignment.
    640   Tensor<T, NumDims, Layout> slice_assigned = tensor;
    641 
    642   for (Index i = 0; i < tensor.dimensions().TotalSize(); ++i) {
    643     VERIFY_IS_EQUAL(block_assigned.coeff(i), slice_assigned.coeff(i));
    644   }
    645 }
    646 
    647 // -------------------------------------------------------------------------- //
    648 
    649 template <typename T, int NumDims, int Layout>
    650 static void test_assign_to_tensor() {
    651   DSizes<Index, NumDims> dims = RandomDims<NumDims>(10, 20);
    652   Tensor<T, NumDims, Layout> tensor(dims);
    653 
    654   TensorMap<Tensor<T, NumDims, Layout>> map(tensor.data(), dims);
    655 
    656   VerifyBlockAssignment<T, NumDims, Layout>(
    657       tensor, map, [&dims]() { return RandomBlock<Layout>(dims, 10, 20); });
    658   VerifyBlockAssignment<T, NumDims, Layout>(
    659       tensor, map, [&dims]() { return FixedSizeBlock(dims); });
    660 }
    661 
    662 template <typename T, int NumDims, int Layout>
    663 static void test_assign_to_tensor_reshape() {
    664   DSizes<Index, NumDims> dims = RandomDims<NumDims>(10, 20);
    665   Tensor<T, NumDims, Layout> tensor(dims);
    666 
    667   TensorMap<Tensor<T, NumDims, Layout>> map(tensor.data(), dims);
    668 
    669   DSizes<Index, NumDims> shuffled = dims;
    670   std::shuffle(&shuffled[0], &shuffled[NumDims - 1], std::mt19937(g_seed));
    671 
    672   VerifyBlockAssignment<T, NumDims, Layout>(
    673       tensor, map.reshape(shuffled),
    674       [&shuffled]() { return RandomBlock<Layout>(shuffled, 1, 10); });
    675 
    676   VerifyBlockAssignment<T, NumDims, Layout>(
    677       tensor, map.reshape(shuffled),
    678       [&shuffled]() { return SkewedInnerBlock<Layout>(shuffled); });
    679 
    680   VerifyBlockAssignment<T, NumDims, Layout>(
    681       tensor, map.reshape(shuffled),
    682       [&shuffled]() { return FixedSizeBlock(shuffled); });
    683 }
    684 
    685 template <typename T, int NumDims, int Layout>
    686 static void test_assign_to_tensor_chipping() {
    687   DSizes<Index, NumDims> dims = RandomDims<NumDims>(10, 20);
    688   Tensor<T, NumDims, Layout> tensor(dims);
    689 
    690   Index chip_dim = internal::random<int>(0, NumDims - 1);
    691   Index chip_offset = internal::random<Index>(0, dims[chip_dim] - 2);
    692 
    693   DSizes<Index, NumDims - 1> chipped_dims;
    694   for (Index i = 0; i < chip_dim; ++i) {
    695     chipped_dims[i] = dims[i];
    696   }
    697   for (Index i = chip_dim + 1; i < NumDims; ++i) {
    698     chipped_dims[i - 1] = dims[i];
    699   }
    700 
    701   TensorMap<Tensor<T, NumDims, Layout>> map(tensor.data(), dims);
    702 
    703   VerifyBlockAssignment<T, NumDims, Layout, NumDims - 1>(
    704       tensor, map.chip(chip_offset, chip_dim),
    705       [&chipped_dims]() { return RandomBlock<Layout>(chipped_dims, 1, 10); });
    706 
    707   VerifyBlockAssignment<T, NumDims, Layout, NumDims - 1>(
    708       tensor, map.chip(chip_offset, chip_dim),
    709       [&chipped_dims]() { return SkewedInnerBlock<Layout>(chipped_dims); });
    710 
    711   VerifyBlockAssignment<T, NumDims, Layout, NumDims - 1>(
    712       tensor, map.chip(chip_offset, chip_dim),
    713       [&chipped_dims]() { return FixedSizeBlock(chipped_dims); });
    714 }
    715 
    716 template <typename T, int NumDims, int Layout>
    717 static void test_assign_to_tensor_slice() {
    718   DSizes<Index, NumDims> dims = RandomDims<NumDims>(10, 20);
    719   Tensor<T, NumDims, Layout> tensor(dims);
    720 
    721   // Pick a random slice of tensor.
    722   DSizes<Index, NumDims> slice_start = RandomDims<NumDims>(5, 10);
    723   DSizes<Index, NumDims> slice_size = RandomDims<NumDims>(5, 10);
    724 
    725   // Make sure that slice start + size do not overflow tensor dims.
    726   for (int i = 0; i < NumDims; ++i) {
    727     slice_start[i] = numext::mini(dims[i] - 1, slice_start[i]);
    728     slice_size[i] = numext::mini(slice_size[i], dims[i] - slice_start[i]);
    729   }
    730 
    731   TensorMap<Tensor<T, NumDims, Layout>> map(tensor.data(), dims);
    732 
    733   VerifyBlockAssignment<T, NumDims, Layout>(
    734       tensor, map.slice(slice_start, slice_size),
    735       [&slice_size]() { return RandomBlock<Layout>(slice_size, 1, 10); });
    736 
    737   VerifyBlockAssignment<T, NumDims, Layout>(
    738       tensor, map.slice(slice_start, slice_size),
    739       [&slice_size]() { return SkewedInnerBlock<Layout>(slice_size); });
    740 
    741   VerifyBlockAssignment<T, NumDims, Layout>(
    742       tensor, map.slice(slice_start, slice_size),
    743       [&slice_size]() { return FixedSizeBlock(slice_size); });
    744 }
    745 
    746 template <typename T, int NumDims, int Layout>
    747 static void test_assign_to_tensor_shuffle() {
    748   DSizes<Index, NumDims> dims = RandomDims<NumDims>(5, 15);
    749   Tensor<T, NumDims, Layout> tensor(dims);
    750 
    751   DSizes<Index, NumDims> shuffle;
    752   for (int i = 0; i < NumDims; ++i) shuffle[i] = i;
    753 
    754   TensorMap<Tensor<T, NumDims, Layout>> map(tensor.data(), dims);
    755 
    756   do {
    757     DSizes<Index, NumDims> shuffled_dims;
    758     for (int i = 0; i < NumDims; ++i) shuffled_dims[i] = dims[shuffle[i]];
    759 
    760     VerifyBlockAssignment<T, NumDims, Layout>(
    761         tensor, map.shuffle(shuffle),
    762         [&shuffled_dims]() { return FixedSizeBlock(shuffled_dims); });
    763 
    764     VerifyBlockAssignment<T, NumDims, Layout>(
    765         tensor, map.shuffle(shuffle), [&shuffled_dims]() {
    766           return RandomBlock<Layout>(shuffled_dims, 1, 5);
    767         });
    768 
    769   } while (std::next_permutation(&shuffle[0], &shuffle[0] + NumDims));
    770 }
    771 
    772 // -------------------------------------------------------------------------- //
    773 
    774 #define CALL_SUBTEST_PART(PART) \
    775   CALL_SUBTEST_##PART
    776 
    777 #define CALL_SUBTESTS_DIMS_LAYOUTS_TYPES(PART, NAME)           \
    778   CALL_SUBTEST_PART(PART)((NAME<float, 1, RowMajor>())); \
    779   CALL_SUBTEST_PART(PART)((NAME<float, 2, RowMajor>())); \
    780   CALL_SUBTEST_PART(PART)((NAME<float, 3, RowMajor>())); \
    781   CALL_SUBTEST_PART(PART)((NAME<float, 4, RowMajor>())); \
    782   CALL_SUBTEST_PART(PART)((NAME<float, 5, RowMajor>())); \
    783   CALL_SUBTEST_PART(PART)((NAME<float, 1, ColMajor>())); \
    784   CALL_SUBTEST_PART(PART)((NAME<float, 2, ColMajor>())); \
    785   CALL_SUBTEST_PART(PART)((NAME<float, 4, ColMajor>())); \
    786   CALL_SUBTEST_PART(PART)((NAME<float, 4, ColMajor>())); \
    787   CALL_SUBTEST_PART(PART)((NAME<float, 5, ColMajor>())); \
    788   CALL_SUBTEST_PART(PART)((NAME<int, 1, RowMajor>())); \
    789   CALL_SUBTEST_PART(PART)((NAME<int, 2, RowMajor>())); \
    790   CALL_SUBTEST_PART(PART)((NAME<int, 3, RowMajor>())); \
    791   CALL_SUBTEST_PART(PART)((NAME<int, 4, RowMajor>())); \
    792   CALL_SUBTEST_PART(PART)((NAME<int, 5, RowMajor>())); \
    793   CALL_SUBTEST_PART(PART)((NAME<int, 1, ColMajor>())); \
    794   CALL_SUBTEST_PART(PART)((NAME<int, 2, ColMajor>())); \
    795   CALL_SUBTEST_PART(PART)((NAME<int, 4, ColMajor>())); \
    796   CALL_SUBTEST_PART(PART)((NAME<int, 4, ColMajor>())); \
    797   CALL_SUBTEST_PART(PART)((NAME<int, 5, ColMajor>())); \
    798   CALL_SUBTEST_PART(PART)((NAME<bool, 1, RowMajor>())); \
    799   CALL_SUBTEST_PART(PART)((NAME<bool, 2, RowMajor>())); \
    800   CALL_SUBTEST_PART(PART)((NAME<bool, 3, RowMajor>())); \
    801   CALL_SUBTEST_PART(PART)((NAME<bool, 4, RowMajor>())); \
    802   CALL_SUBTEST_PART(PART)((NAME<bool, 5, RowMajor>())); \
    803   CALL_SUBTEST_PART(PART)((NAME<bool, 1, ColMajor>())); \
    804   CALL_SUBTEST_PART(PART)((NAME<bool, 2, ColMajor>())); \
    805   CALL_SUBTEST_PART(PART)((NAME<bool, 4, ColMajor>())); \
    806   CALL_SUBTEST_PART(PART)((NAME<bool, 4, ColMajor>())); \
    807   CALL_SUBTEST_PART(PART)((NAME<bool, 5, ColMajor>()))
    808 
    809 #define CALL_SUBTESTS_DIMS_LAYOUTS(PART, NAME)     \
    810   CALL_SUBTEST_PART(PART)((NAME<float, 1, RowMajor>())); \
    811   CALL_SUBTEST_PART(PART)((NAME<float, 2, RowMajor>())); \
    812   CALL_SUBTEST_PART(PART)((NAME<float, 3, RowMajor>())); \
    813   CALL_SUBTEST_PART(PART)((NAME<float, 4, RowMajor>())); \
    814   CALL_SUBTEST_PART(PART)((NAME<float, 5, RowMajor>())); \
    815   CALL_SUBTEST_PART(PART)((NAME<float, 1, ColMajor>())); \
    816   CALL_SUBTEST_PART(PART)((NAME<float, 2, ColMajor>())); \
    817   CALL_SUBTEST_PART(PART)((NAME<float, 4, ColMajor>())); \
    818   CALL_SUBTEST_PART(PART)((NAME<float, 4, ColMajor>())); \
    819   CALL_SUBTEST_PART(PART)((NAME<float, 5, ColMajor>()))
    820 
    821 #define CALL_SUBTESTS_LAYOUTS_TYPES(PART, NAME)       \
    822   CALL_SUBTEST_PART(PART)((NAME<float, RowMajor>())); \
    823   CALL_SUBTEST_PART(PART)((NAME<float, ColMajor>()));  \
    824   CALL_SUBTEST_PART(PART)((NAME<bool, RowMajor>())); \
    825   CALL_SUBTEST_PART(PART)((NAME<bool, ColMajor>()))
    826 
    827 EIGEN_DECLARE_TEST(cxx11_tensor_block_eval) {
    828   // clang-format off
    829   CALL_SUBTESTS_DIMS_LAYOUTS_TYPES(1, test_eval_tensor_block);
    830   CALL_SUBTESTS_DIMS_LAYOUTS_TYPES(1, test_eval_tensor_binary_expr_block);
    831   CALL_SUBTESTS_DIMS_LAYOUTS(1, test_eval_tensor_unary_expr_block);
    832   CALL_SUBTESTS_DIMS_LAYOUTS(2, test_eval_tensor_binary_with_unary_expr_block);
    833   CALL_SUBTESTS_DIMS_LAYOUTS_TYPES(2, test_eval_tensor_broadcast);
    834   CALL_SUBTESTS_DIMS_LAYOUTS_TYPES(2, test_eval_tensor_reshape);
    835   CALL_SUBTESTS_DIMS_LAYOUTS_TYPES(3, test_eval_tensor_cast);
    836   CALL_SUBTESTS_DIMS_LAYOUTS_TYPES(3, test_eval_tensor_select);
    837   CALL_SUBTESTS_DIMS_LAYOUTS_TYPES(3, test_eval_tensor_padding);
    838   CALL_SUBTESTS_DIMS_LAYOUTS_TYPES(4, test_eval_tensor_chipping);
    839   CALL_SUBTESTS_DIMS_LAYOUTS_TYPES(4, test_eval_tensor_generator);
    840   CALL_SUBTESTS_DIMS_LAYOUTS_TYPES(4, test_eval_tensor_reverse);
    841   CALL_SUBTESTS_DIMS_LAYOUTS_TYPES(5, test_eval_tensor_slice);
    842   CALL_SUBTESTS_DIMS_LAYOUTS_TYPES(5, test_eval_tensor_shuffle);
    843 
    844   CALL_SUBTESTS_LAYOUTS_TYPES(6, test_eval_tensor_reshape_with_bcast);
    845   CALL_SUBTESTS_LAYOUTS_TYPES(6, test_eval_tensor_forced_eval);
    846   CALL_SUBTESTS_LAYOUTS_TYPES(6, test_eval_tensor_chipping_of_bcast);
    847 
    848   CALL_SUBTESTS_DIMS_LAYOUTS_TYPES(7, test_assign_to_tensor);
    849   CALL_SUBTESTS_DIMS_LAYOUTS_TYPES(7, test_assign_to_tensor_reshape);
    850   CALL_SUBTESTS_DIMS_LAYOUTS_TYPES(7, test_assign_to_tensor_chipping);
    851   CALL_SUBTESTS_DIMS_LAYOUTS_TYPES(8, test_assign_to_tensor_slice);
    852   CALL_SUBTESTS_DIMS_LAYOUTS_TYPES(8, test_assign_to_tensor_shuffle);
    853 
    854   // Force CMake to split this test.
    855   // EIGEN_SUFFIXES;1;2;3;4;5;6;7;8
    856 
    857   // clang-format on
    858 }