1// This file is part of Eigen, a lightweight C++ template library
2// for linear algebra.
3//
4// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
5//
6// This Source Code Form is subject to the terms of the Mozilla
7// Public License v. 2.0. If a copy of the MPL was not distributed
8// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
9
10#ifndef EIGEN_CXX11_TENSOR_TENSOR_CHIPPING_H
11#define EIGEN_CXX11_TENSOR_TENSOR_CHIPPING_H
12
13namespace Eigen {
14
15/** \class TensorKChippingReshaping
16  * \ingroup CXX11_Tensor_Module
17  *
18  * \brief A chip is a thin slice, corresponding to a column or a row in a 2-d tensor.
19  *
20  *
21  */
22
23namespace internal {
24template<DenseIndex DimId, typename XprType>
25struct traits<TensorChippingOp<DimId, XprType> > : public traits<XprType>
26{
27  typedef typename XprType::Scalar Scalar;
28  typedef traits<XprType> XprTraits;
29  typedef typename XprTraits::StorageKind StorageKind;
30  typedef typename XprTraits::Index Index;
31  typedef typename XprType::Nested Nested;
32  typedef typename remove_reference<Nested>::type _Nested;
33  static const int NumDimensions = XprTraits::NumDimensions - 1;
34  static const int Layout = XprTraits::Layout;
35};
36
37template<DenseIndex DimId, typename XprType>
38struct eval<TensorChippingOp<DimId, XprType>, Eigen::Dense>
39{
40  typedef const TensorChippingOp<DimId, XprType>& type;
41};
42
43template<DenseIndex DimId, typename XprType>
44struct nested<TensorChippingOp<DimId, XprType>, 1, typename eval<TensorChippingOp<DimId, XprType> >::type>
45{
46  typedef TensorChippingOp<DimId, XprType> type;
47};
48
49template <DenseIndex DimId>
50struct DimensionId
51{
52  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DimensionId(DenseIndex dim) {
53    eigen_assert(dim == DimId);
54  }
55  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DenseIndex actualDim() const {
56    return DimId;
57  }
58};
59template <>
60struct DimensionId<Dynamic>
61{
62  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DimensionId(DenseIndex dim) : actual_dim(dim) {
63    eigen_assert(dim >= 0);
64  }
65  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DenseIndex actualDim() const {
66    return actual_dim;
67  }
68 private:
69  const DenseIndex actual_dim;
70};
71
72
73}  // end namespace internal
74
75
76
77template<DenseIndex DimId, typename XprType>
78class TensorChippingOp : public TensorBase<TensorChippingOp<DimId, XprType> >
79{
80  public:
81  typedef typename Eigen::internal::traits<TensorChippingOp>::Scalar Scalar;
82  typedef typename Eigen::NumTraits<Scalar>::Real RealScalar;
83  typedef typename XprType::CoeffReturnType CoeffReturnType;
84  typedef typename Eigen::internal::nested<TensorChippingOp>::type Nested;
85  typedef typename Eigen::internal::traits<TensorChippingOp>::StorageKind StorageKind;
86  typedef typename Eigen::internal::traits<TensorChippingOp>::Index Index;
87
88  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorChippingOp(const XprType& expr, const Index offset, const Index dim)
89      : m_xpr(expr), m_offset(offset), m_dim(dim) {
90  }
91
92  EIGEN_DEVICE_FUNC
93  const Index offset() const { return m_offset; }
94  EIGEN_DEVICE_FUNC
95  const Index dim() const { return m_dim.actualDim(); }
96
97  EIGEN_DEVICE_FUNC
98  const typename internal::remove_all<typename XprType::Nested>::type&
99  expression() const { return m_xpr; }
100
101  EIGEN_DEVICE_FUNC
102  EIGEN_STRONG_INLINE TensorChippingOp& operator = (const TensorChippingOp& other)
103  {
104    typedef TensorAssignOp<TensorChippingOp, const TensorChippingOp> Assign;
105    Assign assign(*this, other);
106    internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice());
107    return *this;
108  }
109
110  template<typename OtherDerived>
111  EIGEN_DEVICE_FUNC
112  EIGEN_STRONG_INLINE TensorChippingOp& operator = (const OtherDerived& other)
113  {
114    typedef TensorAssignOp<TensorChippingOp, const OtherDerived> Assign;
115    Assign assign(*this, other);
116    internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice());
117    return *this;
118  }
119
120  protected:
121    typename XprType::Nested m_xpr;
122    const Index m_offset;
123    const internal::DimensionId<DimId> m_dim;
124};
125
126
127// Eval as rvalue
128template<DenseIndex DimId, typename ArgType, typename Device>
129struct TensorEvaluator<const TensorChippingOp<DimId, ArgType>, Device>
130{
131  typedef TensorChippingOp<DimId, ArgType> XprType;
132  static const int NumInputDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value;
133  static const int NumDims = NumInputDims-1;
134  typedef typename XprType::Index Index;
135  typedef DSizes<Index, NumDims> Dimensions;
136  typedef typename XprType::Scalar Scalar;
137  typedef typename XprType::CoeffReturnType CoeffReturnType;
138  typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
139  static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size;
140
141
142  enum {
143    // Alignment can't be guaranteed at compile time since it depends on the
144    // slice offsets.
145    IsAligned = false,
146    PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
147    Layout = TensorEvaluator<ArgType, Device>::Layout,
148    CoordAccess = false,  // to be implemented
149    RawAccess = false
150  };
151
152  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
153      : m_impl(op.expression(), device), m_dim(op.dim()), m_device(device)
154  {
155    EIGEN_STATIC_ASSERT((NumInputDims >= 1), YOU_MADE_A_PROGRAMMING_MISTAKE);
156    eigen_assert(NumInputDims > m_dim.actualDim());
157
158    const typename TensorEvaluator<ArgType, Device>::Dimensions& input_dims = m_impl.dimensions();
159    eigen_assert(op.offset() < input_dims[m_dim.actualDim()]);
160
161    int j = 0;
162    for (int i = 0; i < NumInputDims; ++i) {
163      if (i != m_dim.actualDim()) {
164        m_dimensions[j] = input_dims[i];
165        ++j;
166      }
167    }
168
169    m_stride = 1;
170    m_inputStride = 1;
171    if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
172      for (int i = 0; i < m_dim.actualDim(); ++i) {
173        m_stride *= input_dims[i];
174        m_inputStride *= input_dims[i];
175      }
176    } else {
177      for (int i = NumInputDims-1; i > m_dim.actualDim(); --i) {
178        m_stride *= input_dims[i];
179        m_inputStride *= input_dims[i];
180      }
181    }
182    m_inputStride *= input_dims[m_dim.actualDim()];
183    m_inputOffset = m_stride * op.offset();
184  }
185
186  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
187
188  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* /*data*/) {
189    m_impl.evalSubExprsIfNeeded(NULL);
190    return true;
191  }
192
193  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {
194    m_impl.cleanup();
195  }
196
197  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const
198  {
199    return m_impl.coeff(srcCoeff(index));
200  }
201
202  template<int LoadMode>
203  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
204  {
205    EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
206    eigen_assert(index+PacketSize-1 < dimensions().TotalSize());
207
208    if ((static_cast<int>(Layout) == static_cast<int>(ColMajor) && m_dim.actualDim() == 0) ||
209	(static_cast<int>(Layout) == static_cast<int>(RowMajor) && m_dim.actualDim() == NumInputDims-1)) {
210      // m_stride is equal to 1, so let's avoid the integer division.
211      eigen_assert(m_stride == 1);
212      Index inputIndex = index * m_inputStride + m_inputOffset;
213      EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize];
214      for (int i = 0; i < PacketSize; ++i) {
215        values[i] = m_impl.coeff(inputIndex);
216        inputIndex += m_inputStride;
217      }
218      PacketReturnType rslt = internal::pload<PacketReturnType>(values);
219      return rslt;
220    } else if ((static_cast<int>(Layout) == static_cast<int>(ColMajor) && m_dim.actualDim() == NumInputDims - 1) ||
221	       (static_cast<int>(Layout) == static_cast<int>(RowMajor) && m_dim.actualDim() == 0)) {
222      // m_stride is aways greater than index, so let's avoid the integer division.
223      eigen_assert(m_stride > index);
224      return m_impl.template packet<LoadMode>(index + m_inputOffset);
225    } else {
226      const Index idx = index / m_stride;
227      const Index rem = index - idx * m_stride;
228      if (rem + PacketSize <= m_stride) {
229        Index inputIndex = idx * m_inputStride + m_inputOffset + rem;
230        return m_impl.template packet<LoadMode>(inputIndex);
231      } else {
232        // Cross the stride boundary. Fallback to slow path.
233        EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize];
234        for (int i = 0; i < PacketSize; ++i) {
235          values[i] = coeff(index);
236          ++index;
237        }
238        PacketReturnType rslt = internal::pload<PacketReturnType>(values);
239        return rslt;
240      }
241    }
242  }
243
244  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost
245  costPerCoeff(bool vectorized) const {
246    double cost = 0;
247    if ((static_cast<int>(Layout) == static_cast<int>(ColMajor) &&
248         m_dim.actualDim() == 0) ||
249        (static_cast<int>(Layout) == static_cast<int>(RowMajor) &&
250         m_dim.actualDim() == NumInputDims - 1)) {
251      cost += TensorOpCost::MulCost<Index>() + TensorOpCost::AddCost<Index>();
252    } else if ((static_cast<int>(Layout) == static_cast<int>(ColMajor) &&
253                m_dim.actualDim() == NumInputDims - 1) ||
254               (static_cast<int>(Layout) == static_cast<int>(RowMajor) &&
255                m_dim.actualDim() == 0)) {
256      cost += TensorOpCost::AddCost<Index>();
257    } else {
258      cost += 3 * TensorOpCost::MulCost<Index>() + TensorOpCost::DivCost<Index>() +
259              3 * TensorOpCost::AddCost<Index>();
260    }
261
262    return m_impl.costPerCoeff(vectorized) +
263           TensorOpCost(0, 0, cost, vectorized, PacketSize);
264  }
265
266  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType* data() const {
267    CoeffReturnType* result = const_cast<CoeffReturnType*>(m_impl.data());
268    if (((static_cast<int>(Layout) == static_cast<int>(ColMajor) && m_dim.actualDim() == NumDims) ||
269         (static_cast<int>(Layout) == static_cast<int>(RowMajor) && m_dim.actualDim() == 0)) &&
270        result) {
271      return result + m_inputOffset;
272    } else {
273      return NULL;
274    }
275  }
276
277 protected:
278  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index srcCoeff(Index index) const
279  {
280    Index inputIndex;
281    if ((static_cast<int>(Layout) == static_cast<int>(ColMajor) && m_dim.actualDim() == 0) ||
282	(static_cast<int>(Layout) == static_cast<int>(RowMajor) && m_dim.actualDim() == NumInputDims-1)) {
283      // m_stride is equal to 1, so let's avoid the integer division.
284      eigen_assert(m_stride == 1);
285      inputIndex = index * m_inputStride + m_inputOffset;
286    } else if ((static_cast<int>(Layout) == static_cast<int>(ColMajor) && m_dim.actualDim() == NumInputDims-1) ||
287	       (static_cast<int>(Layout) == static_cast<int>(RowMajor) && m_dim.actualDim() == 0)) {
288      // m_stride is aways greater than index, so let's avoid the integer division.
289      eigen_assert(m_stride > index);
290      inputIndex = index + m_inputOffset;
291    } else {
292      const Index idx = index / m_stride;
293      inputIndex = idx * m_inputStride + m_inputOffset;
294      index -= idx * m_stride;
295      inputIndex += index;
296    }
297    return inputIndex;
298  }
299
300  Dimensions m_dimensions;
301  Index m_stride;
302  Index m_inputOffset;
303  Index m_inputStride;
304  TensorEvaluator<ArgType, Device> m_impl;
305  const internal::DimensionId<DimId> m_dim;
306  const Device& m_device;
307};
308
309
310// Eval as lvalue
311template<DenseIndex DimId, typename ArgType, typename Device>
312struct TensorEvaluator<TensorChippingOp<DimId, ArgType>, Device>
313  : public TensorEvaluator<const TensorChippingOp<DimId, ArgType>, Device>
314{
315  typedef TensorEvaluator<const TensorChippingOp<DimId, ArgType>, Device> Base;
316  typedef TensorChippingOp<DimId, ArgType> XprType;
317  static const int NumInputDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value;
318  static const int NumDims = NumInputDims-1;
319  typedef typename XprType::Index Index;
320  typedef DSizes<Index, NumDims> Dimensions;
321  typedef typename XprType::Scalar Scalar;
322  typedef typename XprType::CoeffReturnType CoeffReturnType;
323  typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
324  static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size;
325
326  enum {
327    IsAligned = false,
328    PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
329    RawAccess = false
330  };
331
332  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
333    : Base(op, device)
334    { }
335
336  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType& coeffRef(Index index)
337  {
338    return this->m_impl.coeffRef(this->srcCoeff(index));
339  }
340
341  template <int StoreMode> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
342  void writePacket(Index index, const PacketReturnType& x)
343  {
344    EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
345
346    if ((static_cast<int>(this->Layout) == static_cast<int>(ColMajor) && this->m_dim.actualDim() == 0) ||
347	(static_cast<int>(this->Layout) == static_cast<int>(RowMajor) && this->m_dim.actualDim() == NumInputDims-1)) {
348      // m_stride is equal to 1, so let's avoid the integer division.
349      eigen_assert(this->m_stride == 1);
350      EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize];
351      internal::pstore<CoeffReturnType, PacketReturnType>(values, x);
352      Index inputIndex = index * this->m_inputStride + this->m_inputOffset;
353      for (int i = 0; i < PacketSize; ++i) {
354        this->m_impl.coeffRef(inputIndex) = values[i];
355        inputIndex += this->m_inputStride;
356      }
357    } else if ((static_cast<int>(this->Layout) == static_cast<int>(ColMajor) && this->m_dim.actualDim() == NumInputDims-1) ||
358	       (static_cast<int>(this->Layout) == static_cast<int>(RowMajor) && this->m_dim.actualDim() == 0)) {
359      // m_stride is aways greater than index, so let's avoid the integer division.
360      eigen_assert(this->m_stride > index);
361      this->m_impl.template writePacket<StoreMode>(index + this->m_inputOffset, x);
362    } else {
363      const Index idx = index / this->m_stride;
364      const Index rem = index - idx * this->m_stride;
365      if (rem + PacketSize <= this->m_stride) {
366        const Index inputIndex = idx * this->m_inputStride + this->m_inputOffset + rem;
367        this->m_impl.template writePacket<StoreMode>(inputIndex, x);
368      } else {
369        // Cross stride boundary. Fallback to slow path.
370        EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize];
371        internal::pstore<CoeffReturnType, PacketReturnType>(values, x);
372        for (int i = 0; i < PacketSize; ++i) {
373          this->coeffRef(index) = values[i];
374          ++index;
375        }
376      }
377    }
378  }
379};
380
381
382} // end namespace Eigen
383
384#endif // EIGEN_CXX11_TENSOR_TENSOR_CHIPPING_H
385