1// This file is part of Eigen, a lightweight C++ template library
2// for linear algebra.
3//
4// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
5//
6// This Source Code Form is subject to the terms of the Mozilla
7// Public License v. 2.0. If a copy of the MPL was not distributed
8// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
9
10#ifndef EIGEN_CXX11_TENSOR_TENSOR_MORPHING_H
11#define EIGEN_CXX11_TENSOR_TENSOR_MORPHING_H
12
13namespace Eigen {
14
15/** \class TensorReshaping
16  * \ingroup CXX11_Tensor_Module
17  *
18  * \brief Tensor reshaping class.
19  *
20  *
21  */
22namespace internal {
23template<typename NewDimensions, typename XprType>
24struct traits<TensorReshapingOp<NewDimensions, XprType> > : public traits<XprType>
25{
26  typedef typename XprType::Scalar Scalar;
27  typedef traits<XprType> XprTraits;
28  typedef typename XprTraits::StorageKind StorageKind;
29  typedef typename XprTraits::Index Index;
30  typedef typename XprType::Nested Nested;
31  typedef typename remove_reference<Nested>::type _Nested;
32  static const int NumDimensions = array_size<NewDimensions>::value;
33  static const int Layout = XprTraits::Layout;
34};
35
36template<typename NewDimensions, typename XprType>
37struct eval<TensorReshapingOp<NewDimensions, XprType>, Eigen::Dense>
38{
39  typedef const TensorReshapingOp<NewDimensions, XprType>& type;
40};
41
42template<typename NewDimensions, typename XprType>
43struct nested<TensorReshapingOp<NewDimensions, XprType>, 1, typename eval<TensorReshapingOp<NewDimensions, XprType> >::type>
44{
45  typedef TensorReshapingOp<NewDimensions, XprType> type;
46};
47
48}  // end namespace internal
49
50
51
52template<typename NewDimensions, typename XprType>
53class TensorReshapingOp : public TensorBase<TensorReshapingOp<NewDimensions, XprType>, WriteAccessors>
54{
55  public:
56  typedef typename Eigen::internal::traits<TensorReshapingOp>::Scalar Scalar;
57  typedef typename internal::remove_const<typename XprType::CoeffReturnType>::type CoeffReturnType;
58  typedef typename Eigen::internal::nested<TensorReshapingOp>::type Nested;
59  typedef typename Eigen::internal::traits<TensorReshapingOp>::StorageKind StorageKind;
60  typedef typename Eigen::internal::traits<TensorReshapingOp>::Index Index;
61
62  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorReshapingOp(const XprType& expr, const NewDimensions& dims)
63      : m_xpr(expr), m_dims(dims) {}
64
65    EIGEN_DEVICE_FUNC
66    const NewDimensions& dimensions() const { return m_dims; }
67
68    EIGEN_DEVICE_FUNC
69    const typename internal::remove_all<typename XprType::Nested>::type&
70    expression() const { return m_xpr; }
71
72    EIGEN_DEVICE_FUNC
73    EIGEN_STRONG_INLINE TensorReshapingOp& operator = (const TensorReshapingOp& other)
74    {
75      typedef TensorAssignOp<TensorReshapingOp, const TensorReshapingOp> Assign;
76      Assign assign(*this, other);
77      internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice());
78      return *this;
79    }
80
81    template<typename OtherDerived>
82    EIGEN_DEVICE_FUNC
83    EIGEN_STRONG_INLINE TensorReshapingOp& operator = (const OtherDerived& other)
84    {
85      typedef TensorAssignOp<TensorReshapingOp, const OtherDerived> Assign;
86      Assign assign(*this, other);
87      internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice());
88      return *this;
89    }
90
91  protected:
92    typename XprType::Nested m_xpr;
93    const NewDimensions m_dims;
94};
95
96
97// Eval as rvalue
98template<typename NewDimensions, typename ArgType, typename Device>
99struct TensorEvaluator<const TensorReshapingOp<NewDimensions, ArgType>, Device>
100{
101  typedef TensorReshapingOp<NewDimensions, ArgType> XprType;
102  typedef NewDimensions Dimensions;
103
104  enum {
105    IsAligned = TensorEvaluator<ArgType, Device>::IsAligned,
106    PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
107    Layout = TensorEvaluator<ArgType, Device>::Layout,
108    CoordAccess = false,  // to be implemented
109    RawAccess = TensorEvaluator<ArgType, Device>::RawAccess
110  };
111
112  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
113      : m_impl(op.expression(), device), m_dimensions(op.dimensions())
114  {
115    // The total size of the reshaped tensor must be equal to the total size
116    // of the input tensor.
117    eigen_assert(internal::array_prod(m_impl.dimensions()) == internal::array_prod(op.dimensions()));
118  }
119
120  typedef typename XprType::Index Index;
121  typedef typename XprType::Scalar Scalar;
122  typedef typename XprType::CoeffReturnType CoeffReturnType;
123  typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
124
125  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
126
127  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType* data) {
128    return m_impl.evalSubExprsIfNeeded(data);
129  }
130  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {
131    m_impl.cleanup();
132  }
133
134  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const
135  {
136    return m_impl.coeff(index);
137  }
138
139  template<int LoadMode>
140  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
141  {
142    return m_impl.template packet<LoadMode>(index);
143  }
144
145  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const {
146    return m_impl.costPerCoeff(vectorized);
147  }
148
149  EIGEN_DEVICE_FUNC Scalar* data() const { return const_cast<Scalar*>(m_impl.data()); }
150
151  EIGEN_DEVICE_FUNC const TensorEvaluator<ArgType, Device>& impl() const { return m_impl; }
152
153 protected:
154  TensorEvaluator<ArgType, Device> m_impl;
155  NewDimensions m_dimensions;
156};
157
158
159// Eval as lvalue
160template<typename NewDimensions, typename ArgType, typename Device>
161  struct TensorEvaluator<TensorReshapingOp<NewDimensions, ArgType>, Device>
162  : public TensorEvaluator<const TensorReshapingOp<NewDimensions, ArgType>, Device>
163
164{
165  typedef TensorEvaluator<const TensorReshapingOp<NewDimensions, ArgType>, Device> Base;
166  typedef TensorReshapingOp<NewDimensions, ArgType> XprType;
167  typedef NewDimensions Dimensions;
168
169  enum {
170    IsAligned = TensorEvaluator<ArgType, Device>::IsAligned,
171    PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
172    Layout = TensorEvaluator<ArgType, Device>::Layout,
173    CoordAccess = false,  // to be implemented
174    RawAccess = TensorEvaluator<ArgType, Device>::RawAccess
175  };
176
177  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
178    : Base(op, device)
179  { }
180
181  typedef typename XprType::Index Index;
182  typedef typename XprType::Scalar Scalar;
183  typedef typename XprType::CoeffReturnType CoeffReturnType;
184  typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
185
186  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType& coeffRef(Index index)
187  {
188    return this->m_impl.coeffRef(index);
189  }
190  template <int StoreMode> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
191  void writePacket(Index index, const PacketReturnType& x)
192  {
193    this->m_impl.template writePacket<StoreMode>(index, x);
194  }
195};
196
197
198/** \class TensorSlicing
199  * \ingroup CXX11_Tensor_Module
200  *
201  * \brief Tensor slicing class.
202  *
203  *
204  */
205namespace internal {
206template<typename StartIndices, typename Sizes, typename XprType>
207struct traits<TensorSlicingOp<StartIndices, Sizes, XprType> > : public traits<XprType>
208{
209  typedef typename XprType::Scalar Scalar;
210  typedef traits<XprType> XprTraits;
211  typedef typename XprTraits::StorageKind StorageKind;
212  typedef typename XprTraits::Index Index;
213  typedef typename XprType::Nested Nested;
214  typedef typename remove_reference<Nested>::type _Nested;
215  static const int NumDimensions = array_size<StartIndices>::value;
216  static const int Layout = XprTraits::Layout;
217};
218
219template<typename StartIndices, typename Sizes, typename XprType>
220struct eval<TensorSlicingOp<StartIndices, Sizes, XprType>, Eigen::Dense>
221{
222  typedef const TensorSlicingOp<StartIndices, Sizes, XprType>& type;
223};
224
225template<typename StartIndices, typename Sizes, typename XprType>
226struct nested<TensorSlicingOp<StartIndices, Sizes, XprType>, 1, typename eval<TensorSlicingOp<StartIndices, Sizes, XprType> >::type>
227{
228  typedef TensorSlicingOp<StartIndices, Sizes, XprType> type;
229};
230
231}  // end namespace internal
232
233
234
235template<typename StartIndices, typename Sizes, typename XprType>
236class TensorSlicingOp : public TensorBase<TensorSlicingOp<StartIndices, Sizes, XprType> >
237{
238  public:
239  typedef typename Eigen::internal::traits<TensorSlicingOp>::Scalar Scalar;
240  typedef typename XprType::CoeffReturnType CoeffReturnType;
241  typedef typename Eigen::internal::nested<TensorSlicingOp>::type Nested;
242  typedef typename Eigen::internal::traits<TensorSlicingOp>::StorageKind StorageKind;
243  typedef typename Eigen::internal::traits<TensorSlicingOp>::Index Index;
244
245  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorSlicingOp(const XprType& expr, const StartIndices& indices, const Sizes& sizes)
246      : m_xpr(expr), m_indices(indices), m_sizes(sizes) {}
247
248    EIGEN_DEVICE_FUNC
249    const StartIndices& startIndices() const { return m_indices; }
250    EIGEN_DEVICE_FUNC
251    const Sizes& sizes() const { return m_sizes; }
252
253    EIGEN_DEVICE_FUNC
254    const typename internal::remove_all<typename XprType::Nested>::type&
255    expression() const { return m_xpr; }
256
257    template<typename OtherDerived>
258    EIGEN_DEVICE_FUNC
259    EIGEN_STRONG_INLINE TensorSlicingOp& operator = (const OtherDerived& other)
260    {
261      typedef TensorAssignOp<TensorSlicingOp, const OtherDerived> Assign;
262      Assign assign(*this, other);
263      internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice());
264      return *this;
265    }
266
267    EIGEN_DEVICE_FUNC
268    EIGEN_STRONG_INLINE TensorSlicingOp& operator = (const TensorSlicingOp& other)
269    {
270      typedef TensorAssignOp<TensorSlicingOp, const TensorSlicingOp> Assign;
271      Assign assign(*this, other);
272      internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice());
273      return *this;
274    }
275
276
277  protected:
278    typename XprType::Nested m_xpr;
279    const StartIndices m_indices;
280    const Sizes m_sizes;
281};
282
283
284// Fixme: figure out the exact threshold
285namespace {
286template <typename Index, typename Device> struct MemcpyTriggerForSlicing {
287  EIGEN_DEVICE_FUNC MemcpyTriggerForSlicing(const Device& device) : threshold_(2 * device.numThreads()) { }
288  EIGEN_DEVICE_FUNC bool operator ()(Index val) const { return val > threshold_; }
289
290 private:
291  Index threshold_;
292};
293
294// It is very expensive to start the memcpy kernel on GPU: we therefore only
295// use it for large copies.
296#ifdef EIGEN_USE_GPU
297template <typename Index> struct MemcpyTriggerForSlicing<Index, GpuDevice>  {
298  EIGEN_DEVICE_FUNC MemcpyTriggerForSlicing(const GpuDevice&) { }
299  EIGEN_DEVICE_FUNC bool operator ()(Index val) const { return val > 4*1024*1024; }
300};
301#endif
302}
303
304// Eval as rvalue
305template<typename StartIndices, typename Sizes, typename ArgType, typename Device>
306struct TensorEvaluator<const TensorSlicingOp<StartIndices, Sizes, ArgType>, Device>
307{
308  typedef TensorSlicingOp<StartIndices, Sizes, ArgType> XprType;
309  static const int NumDims = internal::array_size<Sizes>::value;
310
311  enum {
312    // Alignment can't be guaranteed at compile time since it depends on the
313    // slice offsets and sizes.
314    IsAligned = /*TensorEvaluator<ArgType, Device>::IsAligned*/false,
315    PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
316    Layout = TensorEvaluator<ArgType, Device>::Layout,
317    CoordAccess = false,
318    RawAccess = false
319  };
320
321  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
322      : m_impl(op.expression(), device), m_device(device), m_dimensions(op.sizes()), m_offsets(op.startIndices())
323  {
324    for (std::size_t i = 0; i < internal::array_size<Dimensions>::value; ++i) {
325      eigen_assert(m_impl.dimensions()[i] >= op.sizes()[i] + op.startIndices()[i]);
326    }
327
328    const typename TensorEvaluator<ArgType, Device>::Dimensions& input_dims = m_impl.dimensions();
329    const Sizes& output_dims = op.sizes();
330    if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
331      m_inputStrides[0] = 1;
332      for (int i = 1; i < NumDims; ++i) {
333        m_inputStrides[i] = m_inputStrides[i-1] * input_dims[i-1];
334      }
335
336     // Don't initialize m_fastOutputStrides[0] since it won't ever be accessed.
337      m_outputStrides[0] = 1;
338      for (int i = 1; i < NumDims; ++i) {
339        m_outputStrides[i] = m_outputStrides[i-1] * output_dims[i-1];
340        m_fastOutputStrides[i] = internal::TensorIntDivisor<Index>(m_outputStrides[i]);
341      }
342    } else {
343      m_inputStrides[NumDims-1] = 1;
344      for (int i = NumDims - 2; i >= 0; --i) {
345        m_inputStrides[i] = m_inputStrides[i+1] * input_dims[i+1];
346      }
347
348     // Don't initialize m_fastOutputStrides[NumDims-1] since it won't ever be accessed.
349      m_outputStrides[NumDims-1] = 1;
350      for (int i = NumDims - 2; i >= 0; --i) {
351        m_outputStrides[i] = m_outputStrides[i+1] * output_dims[i+1];
352        m_fastOutputStrides[i] = internal::TensorIntDivisor<Index>(m_outputStrides[i]);
353      }
354    }
355  }
356
357  typedef typename XprType::Index Index;
358  typedef typename XprType::Scalar Scalar;
359  typedef typename XprType::CoeffReturnType CoeffReturnType;
360  typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
361  typedef Sizes Dimensions;
362
363  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
364
365
366  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType* data) {
367    m_impl.evalSubExprsIfNeeded(NULL);
368    if (!NumTraits<typename internal::remove_const<Scalar>::type>::RequireInitialization && data && m_impl.data()) {
369      Index contiguous_values = 1;
370      if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
371        for (int i = 0; i < NumDims; ++i) {
372          contiguous_values *= dimensions()[i];
373          if (dimensions()[i] != m_impl.dimensions()[i]) {
374            break;
375          }
376        }
377      } else {
378        for (int i = NumDims-1; i >= 0; --i) {
379          contiguous_values *= dimensions()[i];
380          if (dimensions()[i] != m_impl.dimensions()[i]) {
381            break;
382          }
383        }
384      }
385      // Use memcpy if it's going to be faster than using the regular evaluation.
386      const MemcpyTriggerForSlicing<Index, Device> trigger(m_device);
387      if (trigger(contiguous_values)) {
388        Scalar* src = (Scalar*)m_impl.data();
389        for (int i = 0; i < internal::array_prod(dimensions()); i += contiguous_values) {
390          Index offset = srcCoeff(i);
391          m_device.memcpy((void*)(data+i), src+offset, contiguous_values * sizeof(Scalar));
392        }
393        return false;
394      }
395    }
396    return true;
397  }
398
399  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {
400    m_impl.cleanup();
401  }
402
403  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const
404  {
405    return m_impl.coeff(srcCoeff(index));
406  }
407
408  template<int LoadMode>
409  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
410  {
411    const int packetSize = internal::unpacket_traits<PacketReturnType>::size;
412    EIGEN_STATIC_ASSERT((packetSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
413    eigen_assert(index+packetSize-1 < internal::array_prod(dimensions()));
414
415    Index inputIndices[] = {0, 0};
416    Index indices[] = {index, index + packetSize - 1};
417    if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
418      for (int i = NumDims - 1; i > 0; --i) {
419        const Index idx0 = indices[0] / m_fastOutputStrides[i];
420        const Index idx1 = indices[1] / m_fastOutputStrides[i];
421        inputIndices[0] += (idx0 + m_offsets[i]) * m_inputStrides[i];
422        inputIndices[1] += (idx1 + m_offsets[i]) * m_inputStrides[i];
423        indices[0] -= idx0 * m_outputStrides[i];
424        indices[1] -= idx1 * m_outputStrides[i];
425      }
426      inputIndices[0] += (indices[0] + m_offsets[0]);
427      inputIndices[1] += (indices[1] + m_offsets[0]);
428    } else {
429      for (int i = 0; i < NumDims - 1; ++i) {
430        const Index idx0 = indices[0] / m_fastOutputStrides[i];
431        const Index idx1 = indices[1] / m_fastOutputStrides[i];
432        inputIndices[0] += (idx0 + m_offsets[i]) * m_inputStrides[i];
433        inputIndices[1] += (idx1 + m_offsets[i]) * m_inputStrides[i];
434        indices[0] -= idx0 * m_outputStrides[i];
435        indices[1] -= idx1 * m_outputStrides[i];
436      }
437      inputIndices[0] += (indices[0] + m_offsets[NumDims-1]);
438      inputIndices[1] += (indices[1] + m_offsets[NumDims-1]);
439    }
440    if (inputIndices[1] - inputIndices[0] == packetSize - 1) {
441      PacketReturnType rslt = m_impl.template packet<Unaligned>(inputIndices[0]);
442      return rslt;
443    }
444    else {
445      EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[packetSize];
446      values[0] = m_impl.coeff(inputIndices[0]);
447      values[packetSize-1] = m_impl.coeff(inputIndices[1]);
448      for (int i = 1; i < packetSize-1; ++i) {
449        values[i] = coeff(index+i);
450      }
451      PacketReturnType rslt = internal::pload<PacketReturnType>(values);
452      return rslt;
453    }
454  }
455
456  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const {
457    return m_impl.costPerCoeff(vectorized) + TensorOpCost(0, 0, NumDims);
458  }
459
460
461  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar* data() const {
462    Scalar* result = m_impl.data();
463    if (result) {
464      Index offset = 0;
465      if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
466        for (int i = 0; i < NumDims; ++i) {
467          if (m_dimensions[i] != m_impl.dimensions()[i]) {
468            offset += m_offsets[i] * m_inputStrides[i];
469            for (int j = i+1; j < NumDims; ++j) {
470              if (m_dimensions[j] > 1) {
471                return NULL;
472              }
473              offset += m_offsets[j] * m_inputStrides[j];
474            }
475            break;
476          }
477        }
478      } else {
479        for (int i = NumDims - 1; i >= 0; --i) {
480          if (m_dimensions[i] != m_impl.dimensions()[i]) {
481            offset += m_offsets[i] * m_inputStrides[i];
482            for (int j = i-1; j >= 0; --j) {
483              if (m_dimensions[j] > 1) {
484                return NULL;
485              }
486              offset += m_offsets[j] * m_inputStrides[j];
487            }
488            break;
489          }
490        }
491      }
492      return result + offset;
493    }
494    return NULL;
495  }
496
497 protected:
498  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index srcCoeff(Index index) const
499  {
500    Index inputIndex = 0;
501    if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
502      for (int i = NumDims - 1; i > 0; --i) {
503        const Index idx = index / m_fastOutputStrides[i];
504        inputIndex += (idx + m_offsets[i]) * m_inputStrides[i];
505        index -= idx * m_outputStrides[i];
506      }
507      inputIndex += (index + m_offsets[0]);
508    } else {
509      for (int i = 0; i < NumDims - 1; ++i) {
510        const Index idx = index / m_fastOutputStrides[i];
511        inputIndex += (idx + m_offsets[i]) * m_inputStrides[i];
512        index -= idx * m_outputStrides[i];
513      }
514      inputIndex += (index + m_offsets[NumDims-1]);
515    }
516    return inputIndex;
517  }
518
519  array<Index, NumDims> m_outputStrides;
520  array<internal::TensorIntDivisor<Index>, NumDims> m_fastOutputStrides;
521  array<Index, NumDims> m_inputStrides;
522  TensorEvaluator<ArgType, Device> m_impl;
523  const Device& m_device;
524  Dimensions m_dimensions;
525  const StartIndices m_offsets;
526};
527
528
529// Eval as lvalue
530template<typename StartIndices, typename Sizes, typename ArgType, typename Device>
531struct TensorEvaluator<TensorSlicingOp<StartIndices, Sizes, ArgType>, Device>
532  : public TensorEvaluator<const TensorSlicingOp<StartIndices, Sizes, ArgType>, Device>
533{
534  typedef TensorEvaluator<const TensorSlicingOp<StartIndices, Sizes, ArgType>, Device> Base;
535  typedef TensorSlicingOp<StartIndices, Sizes, ArgType> XprType;
536  static const int NumDims = internal::array_size<Sizes>::value;
537
538  enum {
539    IsAligned = /*TensorEvaluator<ArgType, Device>::IsAligned*/false,
540    PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
541    Layout = TensorEvaluator<ArgType, Device>::Layout,
542    CoordAccess = false,
543    RawAccess = false
544  };
545
546  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
547    : Base(op, device)
548    { }
549
550  typedef typename XprType::Index Index;
551  typedef typename XprType::Scalar Scalar;
552  typedef typename XprType::CoeffReturnType CoeffReturnType;
553  typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
554  typedef Sizes Dimensions;
555
556  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType& coeffRef(Index index)
557  {
558    return this->m_impl.coeffRef(this->srcCoeff(index));
559  }
560
561  template <int StoreMode> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
562  void writePacket(Index index, const PacketReturnType& x)
563  {
564    const int packetSize = internal::unpacket_traits<PacketReturnType>::size;
565    Index inputIndices[] = {0, 0};
566    Index indices[] = {index, index + packetSize - 1};
567    if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
568      for (int i = NumDims - 1; i > 0; --i) {
569        const Index idx0 = indices[0] / this->m_fastOutputStrides[i];
570        const Index idx1 = indices[1] / this->m_fastOutputStrides[i];
571        inputIndices[0] += (idx0 + this->m_offsets[i]) * this->m_inputStrides[i];
572        inputIndices[1] += (idx1 + this->m_offsets[i]) * this->m_inputStrides[i];
573        indices[0] -= idx0 * this->m_outputStrides[i];
574        indices[1] -= idx1 * this->m_outputStrides[i];
575      }
576      inputIndices[0] += (indices[0] + this->m_offsets[0]);
577      inputIndices[1] += (indices[1] + this->m_offsets[0]);
578    } else {
579      for (int i = 0; i < NumDims - 1; ++i) {
580        const Index idx0 = indices[0] / this->m_fastOutputStrides[i];
581        const Index idx1 = indices[1] / this->m_fastOutputStrides[i];
582        inputIndices[0] += (idx0 + this->m_offsets[i]) * this->m_inputStrides[i];
583        inputIndices[1] += (idx1 + this->m_offsets[i]) * this->m_inputStrides[i];
584        indices[0] -= idx0 * this->m_outputStrides[i];
585        indices[1] -= idx1 * this->m_outputStrides[i];
586      }
587      inputIndices[0] += (indices[0] + this->m_offsets[NumDims-1]);
588      inputIndices[1] += (indices[1] + this->m_offsets[NumDims-1]);
589    }
590    if (inputIndices[1] - inputIndices[0] == packetSize - 1) {
591      this->m_impl.template writePacket<StoreMode>(inputIndices[0], x);
592    }
593    else {
594      EIGEN_ALIGN_MAX CoeffReturnType values[packetSize];
595      internal::pstore<CoeffReturnType, PacketReturnType>(values, x);
596      this->m_impl.coeffRef(inputIndices[0]) = values[0];
597      this->m_impl.coeffRef(inputIndices[1]) = values[packetSize-1];
598      for (int i = 1; i < packetSize-1; ++i) {
599        this->coeffRef(index+i) = values[i];
600      }
601    }
602  }
603};
604
605
606
607namespace internal {
608template<typename StartIndices, typename StopIndices, typename Strides, typename XprType>
609struct traits<TensorStridingSlicingOp<StartIndices, StopIndices, Strides, XprType> > : public traits<XprType>
610{
611  typedef typename XprType::Scalar Scalar;
612  typedef traits<XprType> XprTraits;
613  typedef typename XprTraits::StorageKind StorageKind;
614  typedef typename XprTraits::Index Index;
615  typedef typename XprType::Nested Nested;
616  typedef typename remove_reference<Nested>::type _Nested;
617  static const int NumDimensions = array_size<StartIndices>::value;
618  static const int Layout = XprTraits::Layout;
619};
620
621template<typename StartIndices, typename StopIndices, typename Strides, typename XprType>
622struct eval<TensorStridingSlicingOp<StartIndices, StopIndices, Strides, XprType>, Eigen::Dense>
623{
624  typedef const TensorStridingSlicingOp<StartIndices, StopIndices, Strides, XprType>& type;
625};
626
627template<typename StartIndices, typename StopIndices, typename Strides, typename XprType>
628struct nested<TensorStridingSlicingOp<StartIndices, StopIndices, Strides, XprType>, 1, typename eval<TensorStridingSlicingOp<StartIndices, StopIndices, Strides, XprType> >::type>
629{
630  typedef TensorStridingSlicingOp<StartIndices, StopIndices, Strides, XprType> type;
631};
632
633}  // end namespace internal
634
635
636template<typename StartIndices, typename StopIndices, typename Strides, typename XprType>
637class TensorStridingSlicingOp : public TensorBase<TensorStridingSlicingOp<StartIndices, StopIndices, Strides, XprType> >
638{
639  public:
640  typedef typename internal::traits<TensorStridingSlicingOp>::Scalar Scalar;
641  typedef typename XprType::CoeffReturnType CoeffReturnType;
642  typedef typename internal::nested<TensorStridingSlicingOp>::type Nested;
643  typedef typename internal::traits<TensorStridingSlicingOp>::StorageKind StorageKind;
644  typedef typename internal::traits<TensorStridingSlicingOp>::Index Index;
645
646  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorStridingSlicingOp(
647    const XprType& expr, const StartIndices& startIndices,
648    const StopIndices& stopIndices, const Strides& strides)
649      : m_xpr(expr), m_startIndices(startIndices), m_stopIndices(stopIndices),
650        m_strides(strides) {}
651
652    EIGEN_DEVICE_FUNC
653    const StartIndices& startIndices() const { return m_startIndices; }
654    EIGEN_DEVICE_FUNC
655    const StartIndices& stopIndices() const { return m_stopIndices; }
656    EIGEN_DEVICE_FUNC
657    const StartIndices& strides() const { return m_strides; }
658
659    EIGEN_DEVICE_FUNC
660    const typename internal::remove_all<typename XprType::Nested>::type&
661    expression() const { return m_xpr; }
662
663    EIGEN_DEVICE_FUNC
664    EIGEN_STRONG_INLINE TensorStridingSlicingOp& operator = (const TensorStridingSlicingOp& other)
665    {
666      typedef TensorAssignOp<TensorStridingSlicingOp, const TensorStridingSlicingOp> Assign;
667      Assign assign(*this, other);
668      internal::TensorExecutor<const Assign, DefaultDevice>::run(
669          assign, DefaultDevice());
670      return *this;
671    }
672
673    template<typename OtherDerived>
674    EIGEN_DEVICE_FUNC
675    EIGEN_STRONG_INLINE TensorStridingSlicingOp& operator = (const OtherDerived& other)
676    {
677      typedef TensorAssignOp<TensorStridingSlicingOp, const OtherDerived> Assign;
678      Assign assign(*this, other);
679      internal::TensorExecutor<const Assign, DefaultDevice>::run(
680          assign, DefaultDevice());
681      return *this;
682    }
683
684  protected:
685    typename XprType::Nested m_xpr;
686    const StartIndices m_startIndices;
687    const StopIndices m_stopIndices;
688    const Strides m_strides;
689};
690
691// Eval as rvalue
692template<typename StartIndices, typename StopIndices, typename Strides, typename ArgType, typename Device>
693struct TensorEvaluator<const TensorStridingSlicingOp<StartIndices, StopIndices, Strides, ArgType>, Device>
694{
695  typedef TensorStridingSlicingOp<StartIndices, StopIndices, Strides, ArgType> XprType;
696  static const int NumDims = internal::array_size<Strides>::value;
697
698  enum {
699    // Alignment can't be guaranteed at compile time since it depends on the
700    // slice offsets and sizes.
701    IsAligned = false,
702    PacketAccess = false,
703    BlockAccess = false,
704    Layout = TensorEvaluator<ArgType, Device>::Layout,
705    RawAccess = false
706  };
707
708  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
709      : m_impl(op.expression(), device), m_device(device), m_strides(op.strides())
710  {
711    // Handle degenerate intervals by gracefully clamping and allowing m_dimensions to be zero
712    DSizes<Index,NumDims> startIndicesClamped, stopIndicesClamped;
713    for (size_t i = 0; i < internal::array_size<Dimensions>::value; ++i) {
714      eigen_assert(m_strides[i] != 0 && "0 stride is invalid");
715      if(m_strides[i]>0){
716        startIndicesClamped[i] = clamp(op.startIndices()[i], 0, m_impl.dimensions()[i]);
717        stopIndicesClamped[i] = clamp(op.stopIndices()[i], 0, m_impl.dimensions()[i]);
718      }else{
719        /* implies m_strides[i]<0 by assert */
720        startIndicesClamped[i] = clamp(op.startIndices()[i], -1, m_impl.dimensions()[i] - 1);
721        stopIndicesClamped[i] = clamp(op.stopIndices()[i], -1, m_impl.dimensions()[i] - 1);
722      }
723      m_startIndices[i] = startIndicesClamped[i];
724    }
725
726    const typename TensorEvaluator<ArgType, Device>::Dimensions& input_dims = m_impl.dimensions();
727
728    // check for degenerate intervals and compute output tensor shape
729    bool degenerate = false;;
730    for(int i = 0; i < NumDims; i++){
731      Index interval = stopIndicesClamped[i] - startIndicesClamped[i];
732      if(interval == 0 || ((interval<0) != (m_strides[i]<0))){
733        m_dimensions[i] = 0;
734        degenerate = true;
735      }else{
736        m_dimensions[i] = interval / m_strides[i]
737                          + (interval % m_strides[i] != 0 ? 1 : 0);
738        eigen_assert(m_dimensions[i] >= 0);
739      }
740    }
741    Strides output_dims = m_dimensions;
742
743    if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
744      m_inputStrides[0] = m_strides[0];
745      m_offsets[0] = startIndicesClamped[0];
746      Index previousDimProduct = 1;
747      for (int i = 1; i < NumDims; ++i) {
748        previousDimProduct *= input_dims[i-1];
749        m_inputStrides[i] = previousDimProduct * m_strides[i];
750        m_offsets[i] = startIndicesClamped[i] * previousDimProduct;
751      }
752
753      // Don't initialize m_fastOutputStrides[0] since it won't ever be accessed.
754      m_outputStrides[0] = 1;
755      for (int i = 1; i < NumDims; ++i) {
756        m_outputStrides[i] = m_outputStrides[i-1] * output_dims[i-1];
757        // NOTE: if tensor is degenerate, we send 1 to prevent TensorIntDivisor constructor crash
758        m_fastOutputStrides[i] = internal::TensorIntDivisor<Index>(degenerate ? 1 : m_outputStrides[i]);
759      }
760    } else {
761      m_inputStrides[NumDims-1] = m_strides[NumDims-1];
762      m_offsets[NumDims-1] = startIndicesClamped[NumDims-1];
763      Index previousDimProduct = 1;
764      for (int i = NumDims - 2; i >= 0; --i) {
765        previousDimProduct *= input_dims[i+1];
766        m_inputStrides[i] = previousDimProduct * m_strides[i];
767        m_offsets[i] = startIndicesClamped[i] * previousDimProduct;
768      }
769
770      m_outputStrides[NumDims-1] = 1;
771      for (int i = NumDims - 2; i >= 0; --i) {
772        m_outputStrides[i] = m_outputStrides[i+1] * output_dims[i+1];
773        // NOTE: if tensor is degenerate, we send 1 to prevent TensorIntDivisor constructor crash
774        m_fastOutputStrides[i] = internal::TensorIntDivisor<Index>(degenerate ? 1 : m_outputStrides[i]);
775      }
776    }
777    m_block_total_size_max = numext::maxi(static_cast<std::size_t>(1),
778                                          device.lastLevelCacheSize() /
779                                          sizeof(Scalar));
780  }
781
782  typedef typename XprType::Index Index;
783  typedef typename XprType::Scalar Scalar;
784  typedef typename internal::remove_const<Scalar>::type ScalarNonConst;
785  typedef typename XprType::CoeffReturnType CoeffReturnType;
786  typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
787  typedef Strides Dimensions;
788
789  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
790
791
792  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType*) {
793    m_impl.evalSubExprsIfNeeded(NULL);
794    return true;
795  }
796
797  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {
798    m_impl.cleanup();
799  }
800
801  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const
802  {
803    return m_impl.coeff(srcCoeff(index));
804  }
805
806  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const {
807    return m_impl.costPerCoeff(vectorized) + TensorOpCost(0, 0, NumDims);
808  }
809
810  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar* data() const {
811    return NULL;
812  }
813
814 protected:
815  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index srcCoeff(Index index) const
816  {
817    Index inputIndex = 0;
818    if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
819      for (int i = NumDims - 1; i >= 0; --i) {
820        const Index idx = index / m_fastOutputStrides[i];
821        inputIndex += idx * m_inputStrides[i] + m_offsets[i];
822        index -= idx * m_outputStrides[i];
823      }
824    } else {
825      for (int i = 0; i < NumDims; ++i) {
826        const Index idx = index / m_fastOutputStrides[i];
827        inputIndex += idx * m_inputStrides[i] + m_offsets[i];
828        index -= idx * m_outputStrides[i];
829      }
830    }
831    return inputIndex;
832  }
833
834  static EIGEN_STRONG_INLINE Index clamp(Index value, Index min, Index max) {
835    return numext::maxi(min, numext::mini(max,value));
836  }
837
838  array<Index, NumDims> m_outputStrides;
839  array<internal::TensorIntDivisor<Index>, NumDims> m_fastOutputStrides;
840  array<Index, NumDims> m_inputStrides;
841  TensorEvaluator<ArgType, Device> m_impl;
842  const Device& m_device;
843  DSizes<Index, NumDims> m_startIndices; // clamped startIndices
844  DSizes<Index, NumDims> m_dimensions;
845  DSizes<Index, NumDims> m_offsets; // offset in a flattened shape
846  const Strides m_strides;
847  std::size_t m_block_total_size_max;
848};
849
850// Eval as lvalue
851template<typename StartIndices, typename StopIndices, typename Strides, typename ArgType, typename Device>
852struct TensorEvaluator<TensorStridingSlicingOp<StartIndices, StopIndices, Strides, ArgType>, Device>
853  : public TensorEvaluator<const TensorStridingSlicingOp<StartIndices, StopIndices, Strides, ArgType>, Device>
854{
855  typedef TensorEvaluator<const TensorStridingSlicingOp<StartIndices, StopIndices, Strides, ArgType>, Device> Base;
856  typedef TensorStridingSlicingOp<StartIndices, StopIndices, Strides, ArgType> XprType;
857  static const int NumDims = internal::array_size<Strides>::value;
858
859  enum {
860    IsAligned = false,
861    PacketAccess = false,
862    BlockAccess = false,
863    Layout = TensorEvaluator<ArgType, Device>::Layout,
864    CoordAccess = TensorEvaluator<ArgType, Device>::CoordAccess,
865    RawAccess = false
866  };
867
868  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
869    : Base(op, device)
870    { }
871
872  typedef typename XprType::Index Index;
873  typedef typename XprType::Scalar Scalar;
874  typedef typename internal::remove_const<Scalar>::type ScalarNonConst;
875  typedef typename XprType::CoeffReturnType CoeffReturnType;
876  typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
877  typedef Strides Dimensions;
878
879  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType& coeffRef(Index index)
880  {
881    return this->m_impl.coeffRef(this->srcCoeff(index));
882  }
883};
884
885
886} // end namespace Eigen
887
888#endif // EIGEN_CXX11_TENSOR_TENSOR_MORPHING_H
889