1/*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements.  See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License.  You may obtain a copy of the License at
8 *
9 *      http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18package org.apache.commons.math.linear;
19
20import java.io.Serializable;
21
22import org.apache.commons.math.Field;
23import org.apache.commons.math.FieldElement;
24import org.apache.commons.math.MathRuntimeException;
25import org.apache.commons.math.linear.MatrixVisitorException;
26import org.apache.commons.math.exception.util.LocalizedFormats;
27import org.apache.commons.math.util.FastMath;
28
29/**
30 * Cache-friendly implementation of FieldMatrix using a flat arrays to store
31 * square blocks of the matrix.
32 * <p>
33 * This implementation is specially designed to be cache-friendly. Square blocks are
34 * stored as small arrays and allow efficient traversal of data both in row major direction
35 * and columns major direction, one block at a time. This greatly increases performances
36 * for algorithms that use crossed directions loops like multiplication or transposition.
37 * </p>
38 * <p>
39 * The size of square blocks is a static parameter. It may be tuned according to the cache
40 * size of the target computer processor. As a rule of thumbs, it should be the largest
41 * value that allows three blocks to be simultaneously cached (this is necessary for example
42 * for matrix multiplication). The default value is to use 36x36 blocks.
43 * </p>
44 * <p>
45 * The regular blocks represent {@link #BLOCK_SIZE} x {@link #BLOCK_SIZE} squares. Blocks
46 * at right hand side and bottom side which may be smaller to fit matrix dimensions. The square
47 * blocks are flattened in row major order in single dimension arrays which are therefore
48 * {@link #BLOCK_SIZE}<sup>2</sup> elements long for regular blocks. The blocks are themselves
49 * organized in row major order.
50 * </p>
51 * <p>
52 * As an example, for a block size of 36x36, a 100x60 matrix would be stored in 6 blocks.
53 * Block 0 would be a Field[1296] array holding the upper left 36x36 square, block 1 would be
54 * a Field[1296] array holding the upper center 36x36 square, block 2 would be a Field[1008]
55 * array holding the upper right 36x28 rectangle, block 3 would be a Field[864] array holding
56 * the lower left 24x36 rectangle, block 4 would be a Field[864] array holding the lower center
57 * 24x36 rectangle and block 5 would be a Field[672] array holding the lower right 24x28
58 * rectangle.
59 * </p>
60 * <p>
61 * The layout complexity overhead versus simple mapping of matrices to java
62 * arrays is negligible for small matrices (about 1%). The gain from cache efficiency leads
63 * to up to 3-fold improvements for matrices of moderate to large size.
64 * </p>
65 * @param <T> the type of the field elements
66 * @version $Revision: 1073158 $ $Date: 2011-02-21 22:46:52 +0100 (lun. 21 févr. 2011) $
67 * @since 2.0
68 */
69public class BlockFieldMatrix<T extends FieldElement<T>> extends AbstractFieldMatrix<T> implements Serializable {
70
71    /** Block size. */
72    public static final int BLOCK_SIZE = 36;
73
74    /** Serializable version identifier */
75    private static final long serialVersionUID = -4602336630143123183L;
76
77    /** Blocks of matrix entries. */
78    private final T blocks[][];
79
80    /** Number of rows of the matrix. */
81    private final int rows;
82
83    /** Number of columns of the matrix. */
84    private final int columns;
85
86    /** Number of block rows of the matrix. */
87    private final int blockRows;
88
89    /** Number of block columns of the matrix. */
90    private final int blockColumns;
91
92    /**
93     * Create a new matrix with the supplied row and column dimensions.
94     *
95     * @param field field to which the elements belong
96     * @param rows  the number of rows in the new matrix
97     * @param columns  the number of columns in the new matrix
98     * @throws IllegalArgumentException if row or column dimension is not
99     *  positive
100     */
101    public BlockFieldMatrix(final Field<T> field, final int rows, final int columns)
102        throws IllegalArgumentException {
103
104        super(field, rows, columns);
105        this.rows    = rows;
106        this.columns = columns;
107
108        // number of blocks
109        blockRows    = (rows    + BLOCK_SIZE - 1) / BLOCK_SIZE;
110        blockColumns = (columns + BLOCK_SIZE - 1) / BLOCK_SIZE;
111
112        // allocate storage blocks, taking care of smaller ones at right and bottom
113        blocks = createBlocksLayout(field, rows, columns);
114
115    }
116
117    /**
118     * Create a new dense matrix copying entries from raw layout data.
119     * <p>The input array <em>must</em> already be in raw layout.</p>
120     * <p>Calling this constructor is equivalent to call:
121     * <pre>matrix = new BlockFieldMatrix<T>(getField(), rawData.length, rawData[0].length,
122     *                                   toBlocksLayout(rawData), false);</pre>
123     * </p>
124     * @param rawData data for new matrix, in raw layout
125     *
126     * @exception IllegalArgumentException if <code>blockData</code> shape is
127     * inconsistent with block layout
128     * @see #BlockFieldMatrix(int, int, FieldElement[][], boolean)
129     */
130    public BlockFieldMatrix(final T[][] rawData)
131        throws IllegalArgumentException {
132        this(rawData.length, rawData[0].length, toBlocksLayout(rawData), false);
133    }
134
135    /**
136     * Create a new dense matrix copying entries from block layout data.
137     * <p>The input array <em>must</em> already be in blocks layout.</p>
138     * @param rows  the number of rows in the new matrix
139     * @param columns  the number of columns in the new matrix
140     * @param blockData data for new matrix
141     * @param copyArray if true, the input array will be copied, otherwise
142     * it will be referenced
143     *
144     * @exception IllegalArgumentException if <code>blockData</code> shape is
145     * inconsistent with block layout
146     * @see #createBlocksLayout(Field, int, int)
147     * @see #toBlocksLayout(FieldElement[][])
148     * @see #BlockFieldMatrix(FieldElement[][])
149     */
150    public BlockFieldMatrix(final int rows, final int columns,
151                            final T[][] blockData, final boolean copyArray)
152        throws IllegalArgumentException {
153
154        super(extractField(blockData), rows, columns);
155        this.rows    = rows;
156        this.columns = columns;
157
158        // number of blocks
159        blockRows    = (rows    + BLOCK_SIZE - 1) / BLOCK_SIZE;
160        blockColumns = (columns + BLOCK_SIZE - 1) / BLOCK_SIZE;
161
162        if (copyArray) {
163            // allocate storage blocks, taking care of smaller ones at right and bottom
164            blocks = buildArray(getField(), blockRows * blockColumns, -1);
165        } else {
166            // reference existing array
167            blocks = blockData;
168        }
169
170        int index = 0;
171        for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
172            final int iHeight = blockHeight(iBlock);
173            for (int jBlock = 0; jBlock < blockColumns; ++jBlock, ++index) {
174                if (blockData[index].length != iHeight * blockWidth(jBlock)) {
175                    throw MathRuntimeException.createIllegalArgumentException(
176                            LocalizedFormats.WRONG_BLOCK_LENGTH,
177                            blockData[index].length, iHeight * blockWidth(jBlock));
178                }
179                if (copyArray) {
180                    blocks[index] = blockData[index].clone();
181                }
182            }
183        }
184
185    }
186
187    /**
188     * Convert a data array from raw layout to blocks layout.
189     * <p>
190     * Raw layout is the straightforward layout where element at row i and
191     * column j is in array element <code>rawData[i][j]</code>. Blocks layout
192     * is the layout used in {@link BlockFieldMatrix} instances, where the matrix
193     * is split in square blocks (except at right and bottom side where blocks may
194     * be rectangular to fit matrix size) and each block is stored in a flattened
195     * one-dimensional array.
196     * </p>
197     * <p>
198     * This method creates an array in blocks layout from an input array in raw layout.
199     * It can be used to provide the array argument of the {@link
200     * #BlockFieldMatrix(int, int, FieldElement[][], boolean)}
201     * constructor.
202     * </p>
203     * @param <T> the type of the field elements
204     * @param rawData data array in raw layout
205     * @return a new data array containing the same entries but in blocks layout
206     * @exception IllegalArgumentException if <code>rawData</code> is not rectangular
207     *  (not all rows have the same length)
208     * @see #createBlocksLayout(Field, int, int)
209     * @see #BlockFieldMatrix(int, int, FieldElement[][], boolean)
210     */
211    public static <T extends FieldElement<T>> T[][] toBlocksLayout(final T[][] rawData)
212        throws IllegalArgumentException {
213
214        final int rows         = rawData.length;
215        final int columns      = rawData[0].length;
216        final int blockRows    = (rows    + BLOCK_SIZE - 1) / BLOCK_SIZE;
217        final int blockColumns = (columns + BLOCK_SIZE - 1) / BLOCK_SIZE;
218
219        // safety checks
220        for (int i = 0; i < rawData.length; ++i) {
221            final int length = rawData[i].length;
222            if (length != columns) {
223                throw MathRuntimeException.createIllegalArgumentException(
224                        LocalizedFormats.DIFFERENT_ROWS_LENGTHS,
225                        columns, length);
226            }
227        }
228
229        // convert array
230        final Field<T> field = extractField(rawData);
231        final T[][] blocks = buildArray(field, blockRows * blockColumns, -1);
232        int blockIndex = 0;
233        for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
234            final int pStart  = iBlock * BLOCK_SIZE;
235            final int pEnd    = FastMath.min(pStart + BLOCK_SIZE, rows);
236            final int iHeight = pEnd - pStart;
237            for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
238                final int qStart = jBlock * BLOCK_SIZE;
239                final int qEnd   = FastMath.min(qStart + BLOCK_SIZE, columns);
240                final int jWidth = qEnd - qStart;
241
242                // allocate new block
243                final T[] block = buildArray(field, iHeight * jWidth);
244                blocks[blockIndex] = block;
245
246                // copy data
247                int index = 0;
248                for (int p = pStart; p < pEnd; ++p) {
249                    System.arraycopy(rawData[p], qStart, block, index, jWidth);
250                    index += jWidth;
251                }
252
253                ++blockIndex;
254
255            }
256        }
257
258        return blocks;
259
260    }
261
262    /**
263     * Create a data array in blocks layout.
264     * <p>
265     * This method can be used to create the array argument of the {@link
266     * #BlockFieldMatrix(int, int, FieldElement[][], boolean)}
267     * constructor.
268     * </p>
269     * @param <T> the type of the field elements
270     * @param field field to which the elements belong
271     * @param rows  the number of rows in the new matrix
272     * @param columns  the number of columns in the new matrix
273     * @return a new data array in blocks layout
274     * @see #toBlocksLayout(FieldElement[][])
275     * @see #BlockFieldMatrix(int, int, FieldElement[][], boolean)
276     */
277    public static <T extends FieldElement<T>> T[][] createBlocksLayout(final Field<T> field,
278                                                                       final int rows, final int columns) {
279
280        final int blockRows    = (rows    + BLOCK_SIZE - 1) / BLOCK_SIZE;
281        final int blockColumns = (columns + BLOCK_SIZE - 1) / BLOCK_SIZE;
282
283        final T[][] blocks = buildArray(field, blockRows * blockColumns, -1);
284        int blockIndex = 0;
285        for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
286            final int pStart  = iBlock * BLOCK_SIZE;
287            final int pEnd    = FastMath.min(pStart + BLOCK_SIZE, rows);
288            final int iHeight = pEnd - pStart;
289            for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
290                final int qStart = jBlock * BLOCK_SIZE;
291                final int qEnd   = FastMath.min(qStart + BLOCK_SIZE, columns);
292                final int jWidth = qEnd - qStart;
293                blocks[blockIndex] = buildArray(field, iHeight * jWidth);
294                ++blockIndex;
295            }
296        }
297
298        return blocks;
299
300    }
301
302    /** {@inheritDoc} */
303    @Override
304    public FieldMatrix<T> createMatrix(final int rowDimension, final int columnDimension)
305        throws IllegalArgumentException {
306        return new BlockFieldMatrix<T>(getField(), rowDimension, columnDimension);
307    }
308
309    /** {@inheritDoc} */
310    @Override
311    public FieldMatrix<T> copy() {
312
313        // create an empty matrix
314        BlockFieldMatrix<T> copied = new BlockFieldMatrix<T>(getField(), rows, columns);
315
316        // copy the blocks
317        for (int i = 0; i < blocks.length; ++i) {
318            System.arraycopy(blocks[i], 0, copied.blocks[i], 0, blocks[i].length);
319        }
320
321        return copied;
322
323    }
324
325    /** {@inheritDoc} */
326    @Override
327    public FieldMatrix<T> add(final FieldMatrix<T> m)
328        throws IllegalArgumentException {
329        try {
330            return add((BlockFieldMatrix<T>) m);
331        } catch (ClassCastException cce) {
332
333            // safety check
334            checkAdditionCompatible(m);
335
336            final BlockFieldMatrix<T> out = new BlockFieldMatrix<T>(getField(), rows, columns);
337
338            // perform addition block-wise, to ensure good cache behavior
339            int blockIndex = 0;
340            for (int iBlock = 0; iBlock < out.blockRows; ++iBlock) {
341                for (int jBlock = 0; jBlock < out.blockColumns; ++jBlock) {
342
343                    // perform addition on the current block
344                    final T[] outBlock = out.blocks[blockIndex];
345                    final T[] tBlock   = blocks[blockIndex];
346                    final int      pStart   = iBlock * BLOCK_SIZE;
347                    final int      pEnd     = FastMath.min(pStart + BLOCK_SIZE, rows);
348                    final int      qStart   = jBlock * BLOCK_SIZE;
349                    final int      qEnd     = FastMath.min(qStart + BLOCK_SIZE, columns);
350                    int k = 0;
351                    for (int p = pStart; p < pEnd; ++p) {
352                        for (int q = qStart; q < qEnd; ++q) {
353                            outBlock[k] = tBlock[k].add(m.getEntry(p, q));
354                            ++k;
355                        }
356                    }
357
358                    // go to next block
359                    ++blockIndex;
360
361                }
362            }
363
364            return out;
365
366        }
367    }
368
369    /**
370     * Compute the sum of this and <code>m</code>.
371     *
372     * @param m    matrix to be added
373     * @return     this + m
374     * @throws  IllegalArgumentException if m is not the same size as this
375     */
376    public BlockFieldMatrix<T> add(final BlockFieldMatrix<T> m)
377        throws IllegalArgumentException {
378
379        // safety check
380        checkAdditionCompatible(m);
381
382        final BlockFieldMatrix<T> out = new BlockFieldMatrix<T>(getField(), rows, columns);
383
384        // perform addition block-wise, to ensure good cache behavior
385        for (int blockIndex = 0; blockIndex < out.blocks.length; ++blockIndex) {
386            final T[] outBlock = out.blocks[blockIndex];
387            final T[] tBlock   = blocks[blockIndex];
388            final T[] mBlock   = m.blocks[blockIndex];
389            for (int k = 0; k < outBlock.length; ++k) {
390                outBlock[k] = tBlock[k].add(mBlock[k]);
391            }
392        }
393
394        return out;
395
396    }
397
398    /** {@inheritDoc} */
399    @Override
400    public FieldMatrix<T> subtract(final FieldMatrix<T> m)
401        throws IllegalArgumentException {
402        try {
403            return subtract((BlockFieldMatrix<T>) m);
404        } catch (ClassCastException cce) {
405
406            // safety check
407            checkSubtractionCompatible(m);
408
409            final BlockFieldMatrix<T> out = new BlockFieldMatrix<T>(getField(), rows, columns);
410
411            // perform subtraction block-wise, to ensure good cache behavior
412            int blockIndex = 0;
413            for (int iBlock = 0; iBlock < out.blockRows; ++iBlock) {
414                for (int jBlock = 0; jBlock < out.blockColumns; ++jBlock) {
415
416                    // perform subtraction on the current block
417                    final T[] outBlock = out.blocks[blockIndex];
418                    final T[] tBlock   = blocks[blockIndex];
419                    final int      pStart   = iBlock * BLOCK_SIZE;
420                    final int      pEnd     = FastMath.min(pStart + BLOCK_SIZE, rows);
421                    final int      qStart   = jBlock * BLOCK_SIZE;
422                    final int      qEnd     = FastMath.min(qStart + BLOCK_SIZE, columns);
423                    int k = 0;
424                    for (int p = pStart; p < pEnd; ++p) {
425                        for (int q = qStart; q < qEnd; ++q) {
426                            outBlock[k] = tBlock[k].subtract(m.getEntry(p, q));
427                            ++k;
428                        }
429                    }
430
431                    // go to next block
432                    ++blockIndex;
433
434                }
435            }
436
437            return out;
438
439        }
440    }
441
442    /**
443     * Compute this minus <code>m</code>.
444     *
445     * @param m    matrix to be subtracted
446     * @return     this - m
447     * @throws  IllegalArgumentException if m is not the same size as this
448     */
449    public BlockFieldMatrix<T> subtract(final BlockFieldMatrix<T> m)
450        throws IllegalArgumentException {
451
452        // safety check
453        checkSubtractionCompatible(m);
454
455        final BlockFieldMatrix<T> out = new BlockFieldMatrix<T>(getField(), rows, columns);
456
457        // perform subtraction block-wise, to ensure good cache behavior
458        for (int blockIndex = 0; blockIndex < out.blocks.length; ++blockIndex) {
459            final T[] outBlock = out.blocks[blockIndex];
460            final T[] tBlock   = blocks[blockIndex];
461            final T[] mBlock   = m.blocks[blockIndex];
462            for (int k = 0; k < outBlock.length; ++k) {
463                outBlock[k] = tBlock[k].subtract(mBlock[k]);
464            }
465        }
466
467        return out;
468
469    }
470
471    /** {@inheritDoc} */
472    @Override
473    public FieldMatrix<T> scalarAdd(final T d)
474        throws IllegalArgumentException {
475
476        final BlockFieldMatrix<T> out = new BlockFieldMatrix<T>(getField(), rows, columns);
477
478        // perform subtraction block-wise, to ensure good cache behavior
479        for (int blockIndex = 0; blockIndex < out.blocks.length; ++blockIndex) {
480            final T[] outBlock = out.blocks[blockIndex];
481            final T[] tBlock   = blocks[blockIndex];
482            for (int k = 0; k < outBlock.length; ++k) {
483                outBlock[k] = tBlock[k].add(d);
484            }
485        }
486
487        return out;
488
489    }
490
491    /** {@inheritDoc} */
492    @Override
493    public FieldMatrix<T> scalarMultiply(final T d)
494        throws IllegalArgumentException {
495
496        final BlockFieldMatrix<T> out = new BlockFieldMatrix<T>(getField(), rows, columns);
497
498        // perform subtraction block-wise, to ensure good cache behavior
499        for (int blockIndex = 0; blockIndex < out.blocks.length; ++blockIndex) {
500            final T[] outBlock = out.blocks[blockIndex];
501            final T[] tBlock   = blocks[blockIndex];
502            for (int k = 0; k < outBlock.length; ++k) {
503                outBlock[k] = tBlock[k].multiply(d);
504            }
505        }
506
507        return out;
508
509    }
510
511    /** {@inheritDoc} */
512    @Override
513    public FieldMatrix<T> multiply(final FieldMatrix<T> m)
514        throws IllegalArgumentException {
515        try {
516            return multiply((BlockFieldMatrix<T>) m);
517        } catch (ClassCastException cce) {
518
519            // safety check
520            checkMultiplicationCompatible(m);
521
522            final BlockFieldMatrix<T> out = new BlockFieldMatrix<T>(getField(), rows, m.getColumnDimension());
523            final T zero = getField().getZero();
524
525            // perform multiplication block-wise, to ensure good cache behavior
526            int blockIndex = 0;
527            for (int iBlock = 0; iBlock < out.blockRows; ++iBlock) {
528
529                final int pStart = iBlock * BLOCK_SIZE;
530                final int pEnd   = FastMath.min(pStart + BLOCK_SIZE, rows);
531
532                for (int jBlock = 0; jBlock < out.blockColumns; ++jBlock) {
533
534                    final int qStart = jBlock * BLOCK_SIZE;
535                    final int qEnd   = FastMath.min(qStart + BLOCK_SIZE, m.getColumnDimension());
536
537                    // select current block
538                    final T[] outBlock = out.blocks[blockIndex];
539
540                    // perform multiplication on current block
541                    for (int kBlock = 0; kBlock < blockColumns; ++kBlock) {
542                        final int kWidth      = blockWidth(kBlock);
543                        final T[] tBlock = blocks[iBlock * blockColumns + kBlock];
544                        final int rStart      = kBlock * BLOCK_SIZE;
545                        int k = 0;
546                        for (int p = pStart; p < pEnd; ++p) {
547                            final int lStart = (p - pStart) * kWidth;
548                            final int lEnd   = lStart + kWidth;
549                            for (int q = qStart; q < qEnd; ++q) {
550                                T sum = zero;
551                                int r = rStart;
552                                for (int l = lStart; l < lEnd; ++l) {
553                                    sum = sum.add(tBlock[l].multiply(m.getEntry(r, q)));
554                                    ++r;
555                                }
556                                outBlock[k] = outBlock[k].add(sum);
557                                ++k;
558                            }
559                        }
560                    }
561
562                    // go to next block
563                    ++blockIndex;
564
565                }
566            }
567
568            return out;
569
570        }
571    }
572
573    /**
574     * Returns the result of postmultiplying this by m.
575     *
576     * @param m    matrix to postmultiply by
577     * @return     this * m
578     * @throws     IllegalArgumentException
579     *             if columnDimension(this) != rowDimension(m)
580     */
581    public BlockFieldMatrix<T> multiply(BlockFieldMatrix<T> m) throws IllegalArgumentException {
582
583        // safety check
584        checkMultiplicationCompatible(m);
585
586        final BlockFieldMatrix<T> out = new BlockFieldMatrix<T>(getField(), rows, m.columns);
587        final T zero = getField().getZero();
588
589        // perform multiplication block-wise, to ensure good cache behavior
590        int blockIndex = 0;
591        for (int iBlock = 0; iBlock < out.blockRows; ++iBlock) {
592
593            final int pStart = iBlock * BLOCK_SIZE;
594            final int pEnd   = FastMath.min(pStart + BLOCK_SIZE, rows);
595
596            for (int jBlock = 0; jBlock < out.blockColumns; ++jBlock) {
597                final int jWidth = out.blockWidth(jBlock);
598                final int jWidth2 = jWidth  + jWidth;
599                final int jWidth3 = jWidth2 + jWidth;
600                final int jWidth4 = jWidth3 + jWidth;
601
602                // select current block
603                final T[] outBlock = out.blocks[blockIndex];
604
605                // perform multiplication on current block
606                for (int kBlock = 0; kBlock < blockColumns; ++kBlock) {
607                    final int kWidth = blockWidth(kBlock);
608                    final T[] tBlock = blocks[iBlock * blockColumns + kBlock];
609                    final T[] mBlock = m.blocks[kBlock * m.blockColumns + jBlock];
610                    int k = 0;
611                    for (int p = pStart; p < pEnd; ++p) {
612                        final int lStart = (p - pStart) * kWidth;
613                        final int lEnd   = lStart + kWidth;
614                        for (int nStart = 0; nStart < jWidth; ++nStart) {
615                            T sum = zero;
616                            int l = lStart;
617                            int n = nStart;
618                            while (l < lEnd - 3) {
619                                sum = sum.
620                                      add(tBlock[l].multiply(mBlock[n])).
621                                      add(tBlock[l + 1].multiply(mBlock[n + jWidth])).
622                                      add(tBlock[l + 2].multiply(mBlock[n + jWidth2])).
623                                      add(tBlock[l + 3].multiply(mBlock[n + jWidth3]));
624                                l += 4;
625                                n += jWidth4;
626                            }
627                            while (l < lEnd) {
628                                sum = sum.add(tBlock[l++].multiply(mBlock[n]));
629                                n += jWidth;
630                            }
631                            outBlock[k] = outBlock[k].add(sum);
632                            ++k;
633                        }
634                    }
635                }
636
637                // go to next block
638                ++blockIndex;
639
640            }
641        }
642
643        return out;
644
645    }
646
647    /** {@inheritDoc} */
648    @Override
649    public T[][] getData() {
650
651        final T[][] data = buildArray(getField(), getRowDimension(), getColumnDimension());
652        final int lastColumns = columns - (blockColumns - 1) * BLOCK_SIZE;
653
654        for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
655            final int pStart = iBlock * BLOCK_SIZE;
656            final int pEnd   = FastMath.min(pStart + BLOCK_SIZE, rows);
657            int regularPos   = 0;
658            int lastPos      = 0;
659            for (int p = pStart; p < pEnd; ++p) {
660                final T[] dataP = data[p];
661                int blockIndex = iBlock * blockColumns;
662                int dataPos    = 0;
663                for (int jBlock = 0; jBlock < blockColumns - 1; ++jBlock) {
664                    System.arraycopy(blocks[blockIndex++], regularPos, dataP, dataPos, BLOCK_SIZE);
665                    dataPos += BLOCK_SIZE;
666                }
667                System.arraycopy(blocks[blockIndex], lastPos, dataP, dataPos, lastColumns);
668                regularPos += BLOCK_SIZE;
669                lastPos    += lastColumns;
670            }
671        }
672
673        return data;
674
675    }
676
677    /** {@inheritDoc} */
678    @Override
679    public FieldMatrix<T> getSubMatrix(final int startRow, final int endRow,
680                                   final int startColumn, final int endColumn)
681        throws MatrixIndexException {
682
683        // safety checks
684        checkSubMatrixIndex(startRow, endRow, startColumn, endColumn);
685
686        // create the output matrix
687        final BlockFieldMatrix<T> out =
688            new BlockFieldMatrix<T>(getField(), endRow - startRow + 1, endColumn - startColumn + 1);
689
690        // compute blocks shifts
691        final int blockStartRow    = startRow    / BLOCK_SIZE;
692        final int rowsShift        = startRow    % BLOCK_SIZE;
693        final int blockStartColumn = startColumn / BLOCK_SIZE;
694        final int columnsShift     = startColumn % BLOCK_SIZE;
695
696        // perform extraction block-wise, to ensure good cache behavior
697        int pBlock = blockStartRow;
698        for (int iBlock = 0; iBlock < out.blockRows; ++iBlock) {
699            final int iHeight = out.blockHeight(iBlock);
700            int qBlock = blockStartColumn;
701            for (int jBlock = 0; jBlock < out.blockColumns; ++jBlock) {
702                final int jWidth = out.blockWidth(jBlock);
703
704                // handle one block of the output matrix
705                final int      outIndex = iBlock * out.blockColumns + jBlock;
706                final T[] outBlock = out.blocks[outIndex];
707                final int      index    = pBlock * blockColumns + qBlock;
708                final int      width    = blockWidth(qBlock);
709
710                final int heightExcess = iHeight + rowsShift - BLOCK_SIZE;
711                final int widthExcess  = jWidth + columnsShift - BLOCK_SIZE;
712                if (heightExcess > 0) {
713                    // the submatrix block spans on two blocks rows from the original matrix
714                    if (widthExcess > 0) {
715                        // the submatrix block spans on two blocks columns from the original matrix
716                        final int width2 = blockWidth(qBlock + 1);
717                        copyBlockPart(blocks[index], width,
718                                      rowsShift, BLOCK_SIZE,
719                                      columnsShift, BLOCK_SIZE,
720                                      outBlock, jWidth, 0, 0);
721                        copyBlockPart(blocks[index + 1], width2,
722                                      rowsShift, BLOCK_SIZE,
723                                      0, widthExcess,
724                                      outBlock, jWidth, 0, jWidth - widthExcess);
725                        copyBlockPart(blocks[index + blockColumns], width,
726                                      0, heightExcess,
727                                      columnsShift, BLOCK_SIZE,
728                                      outBlock, jWidth, iHeight - heightExcess, 0);
729                        copyBlockPart(blocks[index + blockColumns + 1], width2,
730                                      0, heightExcess,
731                                      0, widthExcess,
732                                      outBlock, jWidth, iHeight - heightExcess, jWidth - widthExcess);
733                    } else {
734                        // the submatrix block spans on one block column from the original matrix
735                        copyBlockPart(blocks[index], width,
736                                      rowsShift, BLOCK_SIZE,
737                                      columnsShift, jWidth + columnsShift,
738                                      outBlock, jWidth, 0, 0);
739                        copyBlockPart(blocks[index + blockColumns], width,
740                                      0, heightExcess,
741                                      columnsShift, jWidth + columnsShift,
742                                      outBlock, jWidth, iHeight - heightExcess, 0);
743                    }
744                } else {
745                    // the submatrix block spans on one block row from the original matrix
746                    if (widthExcess > 0) {
747                        // the submatrix block spans on two blocks columns from the original matrix
748                        final int width2 = blockWidth(qBlock + 1);
749                        copyBlockPart(blocks[index], width,
750                                      rowsShift, iHeight + rowsShift,
751                                      columnsShift, BLOCK_SIZE,
752                                      outBlock, jWidth, 0, 0);
753                        copyBlockPart(blocks[index + 1], width2,
754                                      rowsShift, iHeight + rowsShift,
755                                      0, widthExcess,
756                                      outBlock, jWidth, 0, jWidth - widthExcess);
757                    } else {
758                        // the submatrix block spans on one block column from the original matrix
759                        copyBlockPart(blocks[index], width,
760                                      rowsShift, iHeight + rowsShift,
761                                      columnsShift, jWidth + columnsShift,
762                                      outBlock, jWidth, 0, 0);
763                    }
764               }
765
766                ++qBlock;
767            }
768
769            ++pBlock;
770
771        }
772
773        return out;
774
775    }
776
777    /**
778     * Copy a part of a block into another one
779     * <p>This method can be called only when the specified part fits in both
780     * blocks, no verification is done here.</p>
781     * @param srcBlock source block
782     * @param srcWidth source block width ({@link #BLOCK_SIZE} or smaller)
783     * @param srcStartRow start row in the source block
784     * @param srcEndRow end row (exclusive) in the source block
785     * @param srcStartColumn start column in the source block
786     * @param srcEndColumn end column (exclusive) in the source block
787     * @param dstBlock destination block
788     * @param dstWidth destination block width ({@link #BLOCK_SIZE} or smaller)
789     * @param dstStartRow start row in the destination block
790     * @param dstStartColumn start column in the destination block
791     */
792    private void copyBlockPart(final T[] srcBlock, final int srcWidth,
793                               final int srcStartRow, final int srcEndRow,
794                               final int srcStartColumn, final int srcEndColumn,
795                               final T[] dstBlock, final int dstWidth,
796                               final int dstStartRow, final int dstStartColumn) {
797        final int length = srcEndColumn - srcStartColumn;
798        int srcPos = srcStartRow * srcWidth + srcStartColumn;
799        int dstPos = dstStartRow * dstWidth + dstStartColumn;
800        for (int srcRow = srcStartRow; srcRow < srcEndRow; ++srcRow) {
801            System.arraycopy(srcBlock, srcPos, dstBlock, dstPos, length);
802            srcPos += srcWidth;
803            dstPos += dstWidth;
804        }
805    }
806
807    /** {@inheritDoc} */
808    @Override
809    public void setSubMatrix(final T[][] subMatrix, final int row, final int column)
810        throws MatrixIndexException {
811
812        // safety checks
813        final int refLength = subMatrix[0].length;
814        if (refLength < 1) {
815            throw MathRuntimeException.createIllegalArgumentException(LocalizedFormats.AT_LEAST_ONE_COLUMN);
816        }
817        final int endRow    = row + subMatrix.length - 1;
818        final int endColumn = column + refLength - 1;
819        checkSubMatrixIndex(row, endRow, column, endColumn);
820        for (final T[] subRow : subMatrix) {
821            if (subRow.length != refLength) {
822                throw MathRuntimeException.createIllegalArgumentException(
823                        LocalizedFormats.DIFFERENT_ROWS_LENGTHS,
824                        refLength, subRow.length);
825            }
826        }
827
828        // compute blocks bounds
829        final int blockStartRow    = row / BLOCK_SIZE;
830        final int blockEndRow      = (endRow + BLOCK_SIZE) / BLOCK_SIZE;
831        final int blockStartColumn = column / BLOCK_SIZE;
832        final int blockEndColumn   = (endColumn + BLOCK_SIZE) / BLOCK_SIZE;
833
834        // perform copy block-wise, to ensure good cache behavior
835        for (int iBlock = blockStartRow; iBlock < blockEndRow; ++iBlock) {
836            final int iHeight  = blockHeight(iBlock);
837            final int firstRow = iBlock * BLOCK_SIZE;
838            final int iStart   = FastMath.max(row,    firstRow);
839            final int iEnd     = FastMath.min(endRow + 1, firstRow + iHeight);
840
841            for (int jBlock = blockStartColumn; jBlock < blockEndColumn; ++jBlock) {
842                final int jWidth      = blockWidth(jBlock);
843                final int firstColumn = jBlock * BLOCK_SIZE;
844                final int jStart      = FastMath.max(column,    firstColumn);
845                final int jEnd        = FastMath.min(endColumn + 1, firstColumn + jWidth);
846                final int jLength     = jEnd - jStart;
847
848                // handle one block, row by row
849                final T[] block = blocks[iBlock * blockColumns + jBlock];
850                for (int i = iStart; i < iEnd; ++i) {
851                    System.arraycopy(subMatrix[i - row], jStart - column,
852                                     block, (i - firstRow) * jWidth + (jStart - firstColumn),
853                                     jLength);
854                }
855
856            }
857        }
858    }
859
860    /** {@inheritDoc} */
861    @Override
862    public FieldMatrix<T> getRowMatrix(final int row)
863        throws MatrixIndexException {
864
865        checkRowIndex(row);
866        final BlockFieldMatrix<T> out = new BlockFieldMatrix<T>(getField(), 1, columns);
867
868        // perform copy block-wise, to ensure good cache behavior
869        final int iBlock  = row / BLOCK_SIZE;
870        final int iRow    = row - iBlock * BLOCK_SIZE;
871        int outBlockIndex = 0;
872        int outIndex      = 0;
873        T[] outBlock = out.blocks[outBlockIndex];
874        for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
875            final int jWidth     = blockWidth(jBlock);
876            final T[] block = blocks[iBlock * blockColumns + jBlock];
877            final int available  = outBlock.length - outIndex;
878            if (jWidth > available) {
879                System.arraycopy(block, iRow * jWidth, outBlock, outIndex, available);
880                outBlock = out.blocks[++outBlockIndex];
881                System.arraycopy(block, iRow * jWidth, outBlock, 0, jWidth - available);
882                outIndex = jWidth - available;
883            } else {
884                System.arraycopy(block, iRow * jWidth, outBlock, outIndex, jWidth);
885                outIndex += jWidth;
886            }
887        }
888
889        return out;
890
891    }
892
893    /** {@inheritDoc} */
894    @Override
895    public void setRowMatrix(final int row, final FieldMatrix<T> matrix)
896        throws MatrixIndexException, InvalidMatrixException {
897        try {
898            setRowMatrix(row, (BlockFieldMatrix<T>) matrix);
899        } catch (ClassCastException cce) {
900            super.setRowMatrix(row, matrix);
901        }
902    }
903
904    /**
905     * Sets the entries in row number <code>row</code>
906     * as a row matrix.  Row indices start at 0.
907     *
908     * @param row the row to be set
909     * @param matrix row matrix (must have one row and the same number of columns
910     * as the instance)
911     * @throws MatrixIndexException if the specified row index is invalid
912     * @throws InvalidMatrixException if the matrix dimensions do not match one
913     * instance row
914     */
915    public void setRowMatrix(final int row, final BlockFieldMatrix<T> matrix)
916        throws MatrixIndexException, InvalidMatrixException {
917
918        checkRowIndex(row);
919        final int nCols = getColumnDimension();
920        if ((matrix.getRowDimension() != 1) ||
921            (matrix.getColumnDimension() != nCols)) {
922            throw new InvalidMatrixException(
923                    LocalizedFormats.DIMENSIONS_MISMATCH_2x2,
924                    matrix.getRowDimension(), matrix.getColumnDimension(),
925                    1, nCols);
926        }
927
928        // perform copy block-wise, to ensure good cache behavior
929        final int iBlock = row / BLOCK_SIZE;
930        final int iRow   = row - iBlock * BLOCK_SIZE;
931        int mBlockIndex  = 0;
932        int mIndex       = 0;
933        T[] mBlock  = matrix.blocks[mBlockIndex];
934        for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
935            final int jWidth     = blockWidth(jBlock);
936            final T[] block = blocks[iBlock * blockColumns + jBlock];
937            final int available  = mBlock.length - mIndex;
938            if (jWidth > available) {
939                System.arraycopy(mBlock, mIndex, block, iRow * jWidth, available);
940                mBlock = matrix.blocks[++mBlockIndex];
941                System.arraycopy(mBlock, 0, block, iRow * jWidth, jWidth - available);
942                mIndex = jWidth - available;
943            } else {
944                System.arraycopy(mBlock, mIndex, block, iRow * jWidth, jWidth);
945                mIndex += jWidth;
946           }
947        }
948
949    }
950
951    /** {@inheritDoc} */
952    @Override
953    public FieldMatrix<T> getColumnMatrix(final int column)
954        throws MatrixIndexException {
955
956        checkColumnIndex(column);
957        final BlockFieldMatrix<T> out = new BlockFieldMatrix<T>(getField(), rows, 1);
958
959        // perform copy block-wise, to ensure good cache behavior
960        final int jBlock  = column / BLOCK_SIZE;
961        final int jColumn = column - jBlock * BLOCK_SIZE;
962        final int jWidth  = blockWidth(jBlock);
963        int outBlockIndex = 0;
964        int outIndex      = 0;
965        T[] outBlock = out.blocks[outBlockIndex];
966        for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
967            final int iHeight = blockHeight(iBlock);
968            final T[] block = blocks[iBlock * blockColumns + jBlock];
969            for (int i = 0; i < iHeight; ++i) {
970                if (outIndex >= outBlock.length) {
971                    outBlock = out.blocks[++outBlockIndex];
972                    outIndex = 0;
973                }
974                outBlock[outIndex++] = block[i * jWidth + jColumn];
975            }
976        }
977
978        return out;
979
980    }
981
982    /** {@inheritDoc} */
983    @Override
984    public void setColumnMatrix(final int column, final FieldMatrix<T> matrix)
985        throws MatrixIndexException, InvalidMatrixException {
986        try {
987            setColumnMatrix(column, (BlockFieldMatrix<T>) matrix);
988        } catch (ClassCastException cce) {
989            super.setColumnMatrix(column, matrix);
990        }
991    }
992
993    /**
994     * Sets the entries in column number <code>column</code>
995     * as a column matrix.  Column indices start at 0.
996     *
997     * @param column the column to be set
998     * @param matrix column matrix (must have one column and the same number of rows
999     * as the instance)
1000     * @throws MatrixIndexException if the specified column index is invalid
1001     * @throws InvalidMatrixException if the matrix dimensions do not match one
1002     * instance column
1003     */
1004    void setColumnMatrix(final int column, final BlockFieldMatrix<T> matrix)
1005        throws MatrixIndexException, InvalidMatrixException {
1006
1007        checkColumnIndex(column);
1008        final int nRows = getRowDimension();
1009        if ((matrix.getRowDimension() != nRows) ||
1010            (matrix.getColumnDimension() != 1)) {
1011            throw new InvalidMatrixException(
1012                    LocalizedFormats.DIMENSIONS_MISMATCH_2x2,
1013                    matrix.getRowDimension(), matrix.getColumnDimension(),
1014                    nRows, 1);
1015        }
1016
1017        // perform copy block-wise, to ensure good cache behavior
1018        final int jBlock  = column / BLOCK_SIZE;
1019        final int jColumn = column - jBlock * BLOCK_SIZE;
1020        final int jWidth  = blockWidth(jBlock);
1021        int mBlockIndex = 0;
1022        int mIndex      = 0;
1023        T[] mBlock = matrix.blocks[mBlockIndex];
1024        for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
1025            final int iHeight = blockHeight(iBlock);
1026            final T[] block = blocks[iBlock * blockColumns + jBlock];
1027            for (int i = 0; i < iHeight; ++i) {
1028                if (mIndex >= mBlock.length) {
1029                    mBlock = matrix.blocks[++mBlockIndex];
1030                    mIndex = 0;
1031                }
1032                block[i * jWidth + jColumn] = mBlock[mIndex++];
1033            }
1034        }
1035
1036    }
1037
1038    /** {@inheritDoc} */
1039    @Override
1040    public FieldVector<T> getRowVector(final int row)
1041        throws MatrixIndexException {
1042
1043        checkRowIndex(row);
1044        final T[] outData = buildArray(getField(), columns);
1045
1046        // perform copy block-wise, to ensure good cache behavior
1047        final int iBlock  = row / BLOCK_SIZE;
1048        final int iRow    = row - iBlock * BLOCK_SIZE;
1049        int outIndex      = 0;
1050        for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
1051            final int jWidth     = blockWidth(jBlock);
1052            final T[] block = blocks[iBlock * blockColumns + jBlock];
1053            System.arraycopy(block, iRow * jWidth, outData, outIndex, jWidth);
1054            outIndex += jWidth;
1055        }
1056
1057        return new ArrayFieldVector<T>(outData, false);
1058
1059    }
1060
1061    /** {@inheritDoc} */
1062    @Override
1063    public void setRowVector(final int row, final FieldVector<T> vector)
1064        throws MatrixIndexException, InvalidMatrixException {
1065        try {
1066            setRow(row, ((ArrayFieldVector<T>) vector).getDataRef());
1067        } catch (ClassCastException cce) {
1068            super.setRowVector(row, vector);
1069        }
1070    }
1071
1072    /** {@inheritDoc} */
1073    @Override
1074    public FieldVector<T> getColumnVector(final int column)
1075        throws MatrixIndexException {
1076
1077        checkColumnIndex(column);
1078        final T[] outData = buildArray(getField(), rows);
1079
1080        // perform copy block-wise, to ensure good cache behavior
1081        final int jBlock  = column / BLOCK_SIZE;
1082        final int jColumn = column - jBlock * BLOCK_SIZE;
1083        final int jWidth  = blockWidth(jBlock);
1084        int outIndex      = 0;
1085        for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
1086            final int iHeight = blockHeight(iBlock);
1087            final T[] block = blocks[iBlock * blockColumns + jBlock];
1088            for (int i = 0; i < iHeight; ++i) {
1089                outData[outIndex++] = block[i * jWidth + jColumn];
1090            }
1091        }
1092
1093        return new ArrayFieldVector<T>(outData, false);
1094
1095    }
1096
1097    /** {@inheritDoc} */
1098    @Override
1099    public void setColumnVector(final int column, final FieldVector<T> vector)
1100        throws MatrixIndexException, InvalidMatrixException {
1101        try {
1102            setColumn(column, ((ArrayFieldVector<T>) vector).getDataRef());
1103        } catch (ClassCastException cce) {
1104            super.setColumnVector(column, vector);
1105        }
1106    }
1107
1108    /** {@inheritDoc} */
1109    @Override
1110    public T[] getRow(final int row)
1111        throws MatrixIndexException {
1112
1113        checkRowIndex(row);
1114        final T[] out = buildArray(getField(), columns);
1115
1116        // perform copy block-wise, to ensure good cache behavior
1117        final int iBlock  = row / BLOCK_SIZE;
1118        final int iRow    = row - iBlock * BLOCK_SIZE;
1119        int outIndex      = 0;
1120        for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
1121            final int jWidth     = blockWidth(jBlock);
1122            final T[] block = blocks[iBlock * blockColumns + jBlock];
1123            System.arraycopy(block, iRow * jWidth, out, outIndex, jWidth);
1124            outIndex += jWidth;
1125        }
1126
1127        return out;
1128
1129    }
1130
1131    /** {@inheritDoc} */
1132    @Override
1133    public void setRow(final int row, final T[] array)
1134        throws MatrixIndexException, InvalidMatrixException {
1135
1136        checkRowIndex(row);
1137        final int nCols = getColumnDimension();
1138        if (array.length != nCols) {
1139            throw new InvalidMatrixException(
1140                    LocalizedFormats.DIMENSIONS_MISMATCH_2x2,
1141                    1, array.length, 1, nCols);
1142        }
1143
1144        // perform copy block-wise, to ensure good cache behavior
1145        final int iBlock  = row / BLOCK_SIZE;
1146        final int iRow    = row - iBlock * BLOCK_SIZE;
1147        int outIndex      = 0;
1148        for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
1149            final int jWidth     = blockWidth(jBlock);
1150            final T[] block = blocks[iBlock * blockColumns + jBlock];
1151            System.arraycopy(array, outIndex, block, iRow * jWidth, jWidth);
1152            outIndex += jWidth;
1153        }
1154
1155    }
1156
1157    /** {@inheritDoc} */
1158    @Override
1159    public T[] getColumn(final int column)
1160        throws MatrixIndexException {
1161
1162        checkColumnIndex(column);
1163        final T[] out = buildArray(getField(), rows);
1164
1165        // perform copy block-wise, to ensure good cache behavior
1166        final int jBlock  = column / BLOCK_SIZE;
1167        final int jColumn = column - jBlock * BLOCK_SIZE;
1168        final int jWidth  = blockWidth(jBlock);
1169        int outIndex      = 0;
1170        for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
1171            final int iHeight = blockHeight(iBlock);
1172            final T[] block = blocks[iBlock * blockColumns + jBlock];
1173            for (int i = 0; i < iHeight; ++i) {
1174                out[outIndex++] = block[i * jWidth + jColumn];
1175            }
1176        }
1177
1178        return out;
1179
1180    }
1181
1182    /** {@inheritDoc} */
1183    @Override
1184    public void setColumn(final int column, final T[] array)
1185        throws MatrixIndexException, InvalidMatrixException {
1186
1187        checkColumnIndex(column);
1188        final int nRows = getRowDimension();
1189        if (array.length != nRows) {
1190            throw new InvalidMatrixException(
1191                    LocalizedFormats.DIMENSIONS_MISMATCH_2x2,
1192                    array.length, 1, nRows, 1);
1193        }
1194
1195        // perform copy block-wise, to ensure good cache behavior
1196        final int jBlock  = column / BLOCK_SIZE;
1197        final int jColumn = column - jBlock * BLOCK_SIZE;
1198        final int jWidth  = blockWidth(jBlock);
1199        int outIndex      = 0;
1200        for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
1201            final int iHeight = blockHeight(iBlock);
1202            final T[] block = blocks[iBlock * blockColumns + jBlock];
1203            for (int i = 0; i < iHeight; ++i) {
1204                block[i * jWidth + jColumn] = array[outIndex++];
1205            }
1206        }
1207
1208    }
1209
1210    /** {@inheritDoc} */
1211    @Override
1212    public T getEntry(final int row, final int column)
1213        throws MatrixIndexException {
1214        try {
1215            final int iBlock = row    / BLOCK_SIZE;
1216            final int jBlock = column / BLOCK_SIZE;
1217            final int k      = (row    - iBlock * BLOCK_SIZE) * blockWidth(jBlock) +
1218                               (column - jBlock * BLOCK_SIZE);
1219            return blocks[iBlock * blockColumns + jBlock][k];
1220        } catch (ArrayIndexOutOfBoundsException e) {
1221            throw new MatrixIndexException(
1222                    LocalizedFormats.NO_SUCH_MATRIX_ENTRY,
1223                    row, column, getRowDimension(), getColumnDimension());
1224        }
1225    }
1226
1227    /** {@inheritDoc} */
1228    @Override
1229    public void setEntry(final int row, final int column, final T value)
1230        throws MatrixIndexException {
1231        try {
1232            final int iBlock = row    / BLOCK_SIZE;
1233            final int jBlock = column / BLOCK_SIZE;
1234            final int k      = (row    - iBlock * BLOCK_SIZE) * blockWidth(jBlock) +
1235                               (column - jBlock * BLOCK_SIZE);
1236            blocks[iBlock * blockColumns + jBlock][k] = value;
1237        } catch (ArrayIndexOutOfBoundsException e) {
1238            throw new MatrixIndexException(
1239                    LocalizedFormats.NO_SUCH_MATRIX_ENTRY,
1240                    row, column, getRowDimension(), getColumnDimension());
1241        }
1242    }
1243
1244    /** {@inheritDoc} */
1245    @Override
1246    public void addToEntry(final int row, final int column, final T increment)
1247        throws MatrixIndexException {
1248        try {
1249            final int iBlock = row    / BLOCK_SIZE;
1250            final int jBlock = column / BLOCK_SIZE;
1251            final int k      = (row    - iBlock * BLOCK_SIZE) * blockWidth(jBlock) +
1252                               (column - jBlock * BLOCK_SIZE);
1253            final T[] blockIJ = blocks[iBlock * blockColumns + jBlock];
1254            blockIJ[k] = blockIJ[k].add(increment);
1255        } catch (ArrayIndexOutOfBoundsException e) {
1256            throw new MatrixIndexException(
1257                    LocalizedFormats.NO_SUCH_MATRIX_ENTRY,
1258                    row, column, getRowDimension(), getColumnDimension());
1259        }
1260    }
1261
1262    /** {@inheritDoc} */
1263    @Override
1264    public void multiplyEntry(final int row, final int column, final T factor)
1265        throws MatrixIndexException {
1266        try {
1267            final int iBlock = row    / BLOCK_SIZE;
1268            final int jBlock = column / BLOCK_SIZE;
1269            final int k      = (row    - iBlock * BLOCK_SIZE) * blockWidth(jBlock) +
1270                               (column - jBlock * BLOCK_SIZE);
1271            final T[] blockIJ = blocks[iBlock * blockColumns + jBlock];
1272            blockIJ[k] = blockIJ[k].multiply(factor);
1273        } catch (ArrayIndexOutOfBoundsException e) {
1274            throw new MatrixIndexException(
1275                    LocalizedFormats.NO_SUCH_MATRIX_ENTRY,
1276                    row, column, getRowDimension(), getColumnDimension());
1277        }
1278    }
1279
1280    /** {@inheritDoc} */
1281    @Override
1282    public FieldMatrix<T> transpose() {
1283
1284        final int nRows = getRowDimension();
1285        final int nCols = getColumnDimension();
1286        final BlockFieldMatrix<T> out = new BlockFieldMatrix<T>(getField(), nCols, nRows);
1287
1288        // perform transpose block-wise, to ensure good cache behavior
1289        int blockIndex = 0;
1290        for (int iBlock = 0; iBlock < blockColumns; ++iBlock) {
1291            for (int jBlock = 0; jBlock < blockRows; ++jBlock) {
1292
1293                // transpose current block
1294                final T[] outBlock = out.blocks[blockIndex];
1295                final T[] tBlock   = blocks[jBlock * blockColumns + iBlock];
1296                final int      pStart   = iBlock * BLOCK_SIZE;
1297                final int      pEnd     = FastMath.min(pStart + BLOCK_SIZE, columns);
1298                final int      qStart   = jBlock * BLOCK_SIZE;
1299                final int      qEnd     = FastMath.min(qStart + BLOCK_SIZE, rows);
1300                int k = 0;
1301                for (int p = pStart; p < pEnd; ++p) {
1302                    final int lInc = pEnd - pStart;
1303                    int l = p - pStart;
1304                    for (int q = qStart; q < qEnd; ++q) {
1305                        outBlock[k] = tBlock[l];
1306                        ++k;
1307                        l+= lInc;
1308                    }
1309                }
1310
1311                // go to next block
1312                ++blockIndex;
1313
1314            }
1315        }
1316
1317        return out;
1318
1319    }
1320
1321    /** {@inheritDoc} */
1322    @Override
1323    public int getRowDimension() {
1324        return rows;
1325    }
1326
1327    /** {@inheritDoc} */
1328    @Override
1329    public int getColumnDimension() {
1330        return columns;
1331    }
1332
1333    /** {@inheritDoc} */
1334    @Override
1335    public T[] operate(final T[] v)
1336        throws IllegalArgumentException {
1337
1338        if (v.length != columns) {
1339            throw MathRuntimeException.createIllegalArgumentException(
1340                    LocalizedFormats.VECTOR_LENGTH_MISMATCH,
1341                    v.length, columns);
1342        }
1343        final T[] out = buildArray(getField(), rows);
1344        final T zero = getField().getZero();
1345
1346        // perform multiplication block-wise, to ensure good cache behavior
1347        for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
1348            final int pStart = iBlock * BLOCK_SIZE;
1349            final int pEnd   = FastMath.min(pStart + BLOCK_SIZE, rows);
1350            for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
1351                final T[] block  = blocks[iBlock * blockColumns + jBlock];
1352                final int      qStart = jBlock * BLOCK_SIZE;
1353                final int      qEnd   = FastMath.min(qStart + BLOCK_SIZE, columns);
1354                int k = 0;
1355                for (int p = pStart; p < pEnd; ++p) {
1356                    T sum = zero;
1357                    int q = qStart;
1358                    while (q < qEnd - 3) {
1359                        sum = sum.
1360                              add(block[k].multiply(v[q])).
1361                              add(block[k + 1].multiply(v[q + 1])).
1362                              add(block[k + 2].multiply(v[q + 2])).
1363                              add(block[k + 3].multiply(v[q + 3]));
1364                        k += 4;
1365                        q += 4;
1366                    }
1367                    while (q < qEnd) {
1368                        sum = sum.add(block[k++].multiply(v[q++]));
1369                    }
1370                    out[p] = out[p].add(sum);
1371                }
1372            }
1373        }
1374
1375        return out;
1376
1377    }
1378
1379    /** {@inheritDoc} */
1380    @Override
1381    public T[] preMultiply(final T[] v)
1382        throws IllegalArgumentException {
1383
1384        if (v.length != rows) {
1385            throw MathRuntimeException.createIllegalArgumentException(
1386                    LocalizedFormats.VECTOR_LENGTH_MISMATCH,
1387                    v.length, rows);
1388        }
1389        final T[] out = buildArray(getField(), columns);
1390        final T zero = getField().getZero();
1391
1392        // perform multiplication block-wise, to ensure good cache behavior
1393        for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
1394            final int jWidth  = blockWidth(jBlock);
1395            final int jWidth2 = jWidth  + jWidth;
1396            final int jWidth3 = jWidth2 + jWidth;
1397            final int jWidth4 = jWidth3 + jWidth;
1398            final int qStart = jBlock * BLOCK_SIZE;
1399            final int qEnd   = FastMath.min(qStart + BLOCK_SIZE, columns);
1400            for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
1401                final T[] block  = blocks[iBlock * blockColumns + jBlock];
1402                final int      pStart = iBlock * BLOCK_SIZE;
1403                final int      pEnd   = FastMath.min(pStart + BLOCK_SIZE, rows);
1404                for (int q = qStart; q < qEnd; ++q) {
1405                    int k = q - qStart;
1406                    T sum = zero;
1407                    int p = pStart;
1408                    while (p < pEnd - 3) {
1409                        sum = sum.
1410                              add(block[k].multiply(v[p])).
1411                              add(block[k + jWidth].multiply(v[p + 1])).
1412                              add(block[k + jWidth2].multiply(v[p + 2])).
1413                              add(block[k + jWidth3].multiply(v[p + 3]));
1414                        k += jWidth4;
1415                        p += 4;
1416                    }
1417                    while (p < pEnd) {
1418                        sum = sum.add(block[k].multiply(v[p++]));
1419                        k += jWidth;
1420                    }
1421                    out[q] = out[q].add(sum);
1422                }
1423            }
1424        }
1425
1426        return out;
1427
1428    }
1429
1430    /** {@inheritDoc} */
1431    @Override
1432    public T walkInRowOrder(final FieldMatrixChangingVisitor<T> visitor)
1433        throws MatrixVisitorException {
1434        visitor.start(rows, columns, 0, rows - 1, 0, columns - 1);
1435        for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
1436            final int pStart = iBlock * BLOCK_SIZE;
1437            final int pEnd   = FastMath.min(pStart + BLOCK_SIZE, rows);
1438            for (int p = pStart; p < pEnd; ++p) {
1439                for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
1440                    final int jWidth = blockWidth(jBlock);
1441                    final int qStart = jBlock * BLOCK_SIZE;
1442                    final int qEnd   = FastMath.min(qStart + BLOCK_SIZE, columns);
1443                    final T[] block = blocks[iBlock * blockColumns + jBlock];
1444                    int k = (p - pStart) * jWidth;
1445                    for (int q = qStart; q < qEnd; ++q) {
1446                        block[k] = visitor.visit(p, q, block[k]);
1447                        ++k;
1448                    }
1449                }
1450             }
1451        }
1452        return visitor.end();
1453    }
1454
1455    /** {@inheritDoc} */
1456    @Override
1457    public T walkInRowOrder(final FieldMatrixPreservingVisitor<T> visitor)
1458        throws MatrixVisitorException {
1459        visitor.start(rows, columns, 0, rows - 1, 0, columns - 1);
1460        for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
1461            final int pStart = iBlock * BLOCK_SIZE;
1462            final int pEnd   = FastMath.min(pStart + BLOCK_SIZE, rows);
1463            for (int p = pStart; p < pEnd; ++p) {
1464                for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
1465                    final int jWidth = blockWidth(jBlock);
1466                    final int qStart = jBlock * BLOCK_SIZE;
1467                    final int qEnd   = FastMath.min(qStart + BLOCK_SIZE, columns);
1468                    final T[] block = blocks[iBlock * blockColumns + jBlock];
1469                    int k = (p - pStart) * jWidth;
1470                    for (int q = qStart; q < qEnd; ++q) {
1471                        visitor.visit(p, q, block[k]);
1472                        ++k;
1473                    }
1474                }
1475             }
1476        }
1477        return visitor.end();
1478    }
1479
1480    /** {@inheritDoc} */
1481    @Override
1482    public T walkInRowOrder(final FieldMatrixChangingVisitor<T> visitor,
1483                                 final int startRow, final int endRow,
1484                                 final int startColumn, final int endColumn)
1485        throws MatrixIndexException, MatrixVisitorException {
1486        checkSubMatrixIndex(startRow, endRow, startColumn, endColumn);
1487        visitor.start(rows, columns, startRow, endRow, startColumn, endColumn);
1488        for (int iBlock = startRow / BLOCK_SIZE; iBlock < 1 + endRow / BLOCK_SIZE; ++iBlock) {
1489            final int p0     = iBlock * BLOCK_SIZE;
1490            final int pStart = FastMath.max(startRow, p0);
1491            final int pEnd   = FastMath.min((iBlock + 1) * BLOCK_SIZE, 1 + endRow);
1492            for (int p = pStart; p < pEnd; ++p) {
1493                for (int jBlock = startColumn / BLOCK_SIZE; jBlock < 1 + endColumn / BLOCK_SIZE; ++jBlock) {
1494                    final int jWidth = blockWidth(jBlock);
1495                    final int q0     = jBlock * BLOCK_SIZE;
1496                    final int qStart = FastMath.max(startColumn, q0);
1497                    final int qEnd   = FastMath.min((jBlock + 1) * BLOCK_SIZE, 1 + endColumn);
1498                    final T[] block = blocks[iBlock * blockColumns + jBlock];
1499                    int k = (p - p0) * jWidth + qStart - q0;
1500                    for (int q = qStart; q < qEnd; ++q) {
1501                        block[k] = visitor.visit(p, q, block[k]);
1502                        ++k;
1503                    }
1504                }
1505             }
1506        }
1507        return visitor.end();
1508    }
1509
1510    /** {@inheritDoc} */
1511    @Override
1512    public T walkInRowOrder(final FieldMatrixPreservingVisitor<T> visitor,
1513                                 final int startRow, final int endRow,
1514                                 final int startColumn, final int endColumn)
1515        throws MatrixIndexException, MatrixVisitorException {
1516        checkSubMatrixIndex(startRow, endRow, startColumn, endColumn);
1517        visitor.start(rows, columns, startRow, endRow, startColumn, endColumn);
1518        for (int iBlock = startRow / BLOCK_SIZE; iBlock < 1 + endRow / BLOCK_SIZE; ++iBlock) {
1519            final int p0     = iBlock * BLOCK_SIZE;
1520            final int pStart = FastMath.max(startRow, p0);
1521            final int pEnd   = FastMath.min((iBlock + 1) * BLOCK_SIZE, 1 + endRow);
1522            for (int p = pStart; p < pEnd; ++p) {
1523                for (int jBlock = startColumn / BLOCK_SIZE; jBlock < 1 + endColumn / BLOCK_SIZE; ++jBlock) {
1524                    final int jWidth = blockWidth(jBlock);
1525                    final int q0     = jBlock * BLOCK_SIZE;
1526                    final int qStart = FastMath.max(startColumn, q0);
1527                    final int qEnd   = FastMath.min((jBlock + 1) * BLOCK_SIZE, 1 + endColumn);
1528                    final T[] block = blocks[iBlock * blockColumns + jBlock];
1529                    int k = (p - p0) * jWidth + qStart - q0;
1530                    for (int q = qStart; q < qEnd; ++q) {
1531                        visitor.visit(p, q, block[k]);
1532                        ++k;
1533                    }
1534                }
1535             }
1536        }
1537        return visitor.end();
1538    }
1539
1540    /** {@inheritDoc} */
1541    @Override
1542    public T walkInOptimizedOrder(final FieldMatrixChangingVisitor<T> visitor)
1543        throws MatrixVisitorException {
1544        visitor.start(rows, columns, 0, rows - 1, 0, columns - 1);
1545        int blockIndex = 0;
1546        for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
1547            final int pStart = iBlock * BLOCK_SIZE;
1548            final int pEnd   = FastMath.min(pStart + BLOCK_SIZE, rows);
1549            for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
1550                final int qStart = jBlock * BLOCK_SIZE;
1551                final int qEnd   = FastMath.min(qStart + BLOCK_SIZE, columns);
1552                final T[] block = blocks[blockIndex];
1553                int k = 0;
1554                for (int p = pStart; p < pEnd; ++p) {
1555                    for (int q = qStart; q < qEnd; ++q) {
1556                        block[k] = visitor.visit(p, q, block[k]);
1557                        ++k;
1558                    }
1559                }
1560                ++blockIndex;
1561            }
1562        }
1563        return visitor.end();
1564    }
1565
1566    /** {@inheritDoc} */
1567    @Override
1568    public T walkInOptimizedOrder(final FieldMatrixPreservingVisitor<T> visitor)
1569        throws MatrixVisitorException {
1570        visitor.start(rows, columns, 0, rows - 1, 0, columns - 1);
1571        int blockIndex = 0;
1572        for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
1573            final int pStart = iBlock * BLOCK_SIZE;
1574            final int pEnd   = FastMath.min(pStart + BLOCK_SIZE, rows);
1575            for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
1576                final int qStart = jBlock * BLOCK_SIZE;
1577                final int qEnd   = FastMath.min(qStart + BLOCK_SIZE, columns);
1578                final T[] block = blocks[blockIndex];
1579                int k = 0;
1580                for (int p = pStart; p < pEnd; ++p) {
1581                    for (int q = qStart; q < qEnd; ++q) {
1582                        visitor.visit(p, q, block[k]);
1583                        ++k;
1584                    }
1585                }
1586                ++blockIndex;
1587            }
1588        }
1589        return visitor.end();
1590    }
1591
1592    /** {@inheritDoc} */
1593    @Override
1594    public T walkInOptimizedOrder(final FieldMatrixChangingVisitor<T> visitor,
1595                                       final int startRow, final int endRow,
1596                                       final int startColumn, final int endColumn)
1597        throws MatrixIndexException, MatrixVisitorException {
1598        checkSubMatrixIndex(startRow, endRow, startColumn, endColumn);
1599        visitor.start(rows, columns, startRow, endRow, startColumn, endColumn);
1600        for (int iBlock = startRow / BLOCK_SIZE; iBlock < 1 + endRow / BLOCK_SIZE; ++iBlock) {
1601            final int p0     = iBlock * BLOCK_SIZE;
1602            final int pStart = FastMath.max(startRow, p0);
1603            final int pEnd   = FastMath.min((iBlock + 1) * BLOCK_SIZE, 1 + endRow);
1604            for (int jBlock = startColumn / BLOCK_SIZE; jBlock < 1 + endColumn / BLOCK_SIZE; ++jBlock) {
1605                final int jWidth = blockWidth(jBlock);
1606                final int q0     = jBlock * BLOCK_SIZE;
1607                final int qStart = FastMath.max(startColumn, q0);
1608                final int qEnd   = FastMath.min((jBlock + 1) * BLOCK_SIZE, 1 + endColumn);
1609                final T[] block = blocks[iBlock * blockColumns + jBlock];
1610                for (int p = pStart; p < pEnd; ++p) {
1611                    int k = (p - p0) * jWidth + qStart - q0;
1612                    for (int q = qStart; q < qEnd; ++q) {
1613                        block[k] = visitor.visit(p, q, block[k]);
1614                        ++k;
1615                    }
1616                }
1617            }
1618        }
1619        return visitor.end();
1620    }
1621
1622    /** {@inheritDoc} */
1623    @Override
1624    public T walkInOptimizedOrder(final FieldMatrixPreservingVisitor<T> visitor,
1625                                       final int startRow, final int endRow,
1626                                       final int startColumn, final int endColumn)
1627        throws MatrixIndexException, MatrixVisitorException {
1628        checkSubMatrixIndex(startRow, endRow, startColumn, endColumn);
1629        visitor.start(rows, columns, startRow, endRow, startColumn, endColumn);
1630        for (int iBlock = startRow / BLOCK_SIZE; iBlock < 1 + endRow / BLOCK_SIZE; ++iBlock) {
1631            final int p0     = iBlock * BLOCK_SIZE;
1632            final int pStart = FastMath.max(startRow, p0);
1633            final int pEnd   = FastMath.min((iBlock + 1) * BLOCK_SIZE, 1 + endRow);
1634            for (int jBlock = startColumn / BLOCK_SIZE; jBlock < 1 + endColumn / BLOCK_SIZE; ++jBlock) {
1635                final int jWidth = blockWidth(jBlock);
1636                final int q0     = jBlock * BLOCK_SIZE;
1637                final int qStart = FastMath.max(startColumn, q0);
1638                final int qEnd   = FastMath.min((jBlock + 1) * BLOCK_SIZE, 1 + endColumn);
1639                final T[] block = blocks[iBlock * blockColumns + jBlock];
1640                for (int p = pStart; p < pEnd; ++p) {
1641                    int k = (p - p0) * jWidth + qStart - q0;
1642                    for (int q = qStart; q < qEnd; ++q) {
1643                        visitor.visit(p, q, block[k]);
1644                        ++k;
1645                    }
1646                }
1647            }
1648        }
1649        return visitor.end();
1650    }
1651
1652    /**
1653     * Get the height of a block.
1654     * @param blockRow row index (in block sense) of the block
1655     * @return height (number of rows) of the block
1656     */
1657    private int blockHeight(final int blockRow) {
1658        return (blockRow == blockRows - 1) ? rows - blockRow * BLOCK_SIZE : BLOCK_SIZE;
1659    }
1660
1661    /**
1662     * Get the width of a block.
1663     * @param blockColumn column index (in block sense) of the block
1664     * @return width (number of columns) of the block
1665     */
1666    private int blockWidth(final int blockColumn) {
1667        return (blockColumn == blockColumns - 1) ? columns - blockColumn * BLOCK_SIZE : BLOCK_SIZE;
1668    }
1669
1670}
1671