android_text_StaticLayout.cpp revision b8082603b60b50b42cf44e3d14f25ccb596fc069
1/*
2 * Copyright (C) 2014 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#define LOG_TAG "StaticLayout"
18
19#include "ScopedIcuLocale.h"
20#include "unicode/locid.h"
21#include "unicode/brkiter.h"
22#include "utils/misc.h"
23#include "utils/Log.h"
24#include "ScopedPrimitiveArray.h"
25#include "JNIHelp.h"
26#include <android_runtime/AndroidRuntime.h>
27#include <cstdint>
28#include <vector>
29#include <list>
30#include <algorithm>
31
32namespace android {
33
34struct JLineBreaksID {
35    jfieldID breaks;
36    jfieldID widths;
37    jfieldID flags;
38};
39
40static jclass gLineBreaks_class;
41static JLineBreaksID gLineBreaks_fieldID;
42
43static const int CHAR_SPACE = 0x20;
44static const int CHAR_TAB = 0x09;
45static const int CHAR_NEWLINE = 0x0a;
46static const int CHAR_ZWSP = 0x200b;
47
48class TabStops {
49    public:
50        // specified stops must be a sorted array (allowed to be null)
51        TabStops(JNIEnv* env, jintArray stops, jint defaultTabWidth) :
52            mStops(env), mTabWidth(defaultTabWidth) {
53                if (stops != NULL) {
54                    mStops.reset(stops);
55                    mNumStops = mStops.size();
56                } else {
57                    mNumStops = 0;
58                }
59            }
60        float width(float widthSoFar) const {
61            const jint* mStopsArray = mStops.get();
62            for (int i = 0; i < mNumStops; i++) {
63                if (mStopsArray[i] > widthSoFar) {
64                    return mStopsArray[i];
65                }
66            }
67            // find the next tabstop after widthSoFar
68            return static_cast<int>((widthSoFar + mTabWidth) / mTabWidth) * mTabWidth;
69        }
70    private:
71        ScopedIntArrayRO mStops;
72        const int mTabWidth;
73        int mNumStops;
74
75        // disable copying and assignment
76        TabStops(const TabStops&);
77        void operator=(const TabStops&);
78};
79
80enum PrimitiveType {
81    kPrimitiveType_Box,
82    kPrimitiveType_Glue,
83    kPrimitiveType_Penalty,
84    kPrimitiveType_Variable,
85    kPrimitiveType_Wordbreak
86};
87
88static const float PENALTY_INFINITY = 1e7; // forced non-break, negative infinity is forced break
89
90struct Primitive {
91    PrimitiveType type;
92    int location;
93    // 'Box' has width
94    // 'Glue' has width
95    // 'Penalty' has width and penalty
96    // 'Variable' has tabStop
97    // 'Wordbreak' has penalty
98    union {
99        struct {
100            float width;
101            float penalty;
102        };
103        const TabStops* tabStop;
104    };
105};
106
107class LineWidth {
108    public:
109        LineWidth(float firstWidth, int firstWidthLineCount, float restWidth) :
110                mFirstWidth(firstWidth), mFirstWidthLineCount(firstWidthLineCount),
111                mRestWidth(restWidth) {}
112        float getLineWidth(int line) const {
113            return (line < mFirstWidthLineCount) ? mFirstWidth : mRestWidth;
114        }
115    private:
116        const float mFirstWidth;
117        const int mFirstWidthLineCount;
118        const float mRestWidth;
119};
120
121class LineBreaker {
122    public:
123        LineBreaker(const std::vector<Primitive>& primitives,
124                    const LineWidth& lineWidth) :
125                mPrimitives(primitives), mLineWidth(lineWidth) {}
126        virtual ~LineBreaker() {}
127        virtual void computeBreaks(std::vector<int>* breaks, std::vector<float>* widths,
128                                   std::vector<unsigned char>* flags) const = 0;
129    protected:
130        const std::vector<Primitive>& mPrimitives;
131        const LineWidth& mLineWidth;
132};
133
134class OptimizingLineBreaker : public LineBreaker {
135    public:
136        OptimizingLineBreaker(const std::vector<Primitive>& primitives, const LineWidth& lineWidth) :
137                LineBreaker(primitives, lineWidth) {}
138        void computeBreaks(std::vector<int>* breaks, std::vector<float>* widths,
139                           std::vector<unsigned char>* flags) const {
140            int numBreaks = mPrimitives.size();
141            Node* opt = new Node[numBreaks];
142            opt[0].prev = -1;
143            opt[0].prevCount = 0;
144            opt[0].width = 0;
145            opt[0].demerits = 0;
146            opt[0].flags = false;
147            opt[numBreaks - 1].prev = -1;
148            opt[numBreaks - 1].prevCount = 0;
149
150            std::list<int> active;
151            active.push_back(0);
152            int lastBreak = 0;
153            for (int i = 0; i < numBreaks; i++) {
154                const Primitive& p = mPrimitives[i];
155                if (p.type == kPrimitiveType_Penalty) {
156                    const bool finalBreak = (i + 1 == numBreaks);
157                    bool breakFound = false;
158                    Node bestBreak;
159                    for (std::list<int>::iterator it = active.begin(); it != active.end(); /* incrementing done in loop */) {
160                        const int pos = *it;
161                        bool flags;
162                        float width, printedWidth;
163                        const int lines = opt[pos].prevCount;
164                        const float maxWidth = mLineWidth.getLineWidth(lines);
165                        // we have to compute metrics every time --
166                        // we can't really precompute this stuff and just deal with breaks
167                        // because of the way tab characters work, this makes it computationally
168                        // harder, but this way, we can still optimize while treating tab characters
169                        // correctly
170                        computeMetrics(pos, i, &width, &printedWidth, &flags);
171                        if (printedWidth <= maxWidth) {
172                            float demerits = computeDemerits(maxWidth, printedWidth,
173                                    finalBreak, p.penalty) + opt[pos].demerits;
174                            if (!breakFound || demerits < bestBreak.demerits) {
175                                bestBreak.prev = pos;
176                                bestBreak.prevCount = opt[pos].prevCount + 1;
177                                bestBreak.demerits = demerits;
178                                bestBreak.width = printedWidth;
179                                bestBreak.flags = flags;
180                                breakFound = true;
181                            }
182                            ++it;
183                        } else {
184                            active.erase(it++); // safe to delete like this
185                        }
186                    }
187                    if (p.penalty == -PENALTY_INFINITY) {
188                        active.clear();
189                    }
190                    if (breakFound) {
191                        opt[i] = bestBreak;
192                        active.push_back(i);
193                        lastBreak = i;
194                    }
195                    if (active.empty()) {
196                        // we can't give up!
197                        float width, printedWidth;
198                        bool flags;
199                        const int lines = opt[lastBreak].prevCount;
200                        const float maxWidth = mLineWidth.getLineWidth(lines);
201                        const int breakIndex = desperateBreak(lastBreak, numBreaks, maxWidth, &width, &printedWidth, &flags);
202
203                        opt[breakIndex].prev = lastBreak;
204                        opt[breakIndex].prevCount = lines + 1;
205                        opt[breakIndex].demerits = 0; // doesn't matter, it's the only one
206                        opt[breakIndex].width = width;
207                        opt[breakIndex].flags = flags;
208
209                        active.push_back(breakIndex);
210                        lastBreak = breakIndex;
211                        i = breakIndex; // incremented by i++
212                    }
213                }
214            }
215
216            int idx = numBreaks - 1;
217            int count = opt[idx].prevCount;
218            breaks->resize(count);
219            widths->resize(count);
220            flags->resize(count);
221            while (opt[idx].prev != -1) {
222                --count;
223
224                (*breaks)[count] = mPrimitives[idx].location;
225                (*widths)[count] = opt[idx].width;
226                (*flags)[count] = opt[idx].flags;
227
228                idx = opt[idx].prev;
229            }
230            delete[] opt;
231        }
232    private:
233        inline void computeMetrics(int start, int end, float* width, float* printedWidth, bool* flags) const {
234            bool f = false;
235            float w = 0, pw = 0;
236            for (int i = start; i < end; i++) {
237                const Primitive& p = mPrimitives[i];
238                if (p.type == kPrimitiveType_Box || p.type == kPrimitiveType_Glue) {
239                    w += p.width;
240                    if (p.type == kPrimitiveType_Box) {
241                        pw = w;
242                    }
243                } else if (p.type == kPrimitiveType_Variable) {
244                    w = p.tabStop->width(w);
245                    f = true;
246                }
247            }
248            *width = w;
249            *printedWidth = pw;
250            *flags = f;
251        }
252
253        inline float computeDemerits(float maxWidth, float width, bool finalBreak, float penalty) const {
254            float deviation = finalBreak ? 0 : maxWidth - width;
255            return (deviation * deviation) + penalty;
256        }
257
258        // returns end pos (chosen break), -1 if fail
259        inline int desperateBreak(int start, int limit, float maxWidth, float* width, float* printedWidth, bool* flags) const {
260            float w = 0, pw = 0;
261            bool breakFound = false;
262            int breakIndex = 0, firstTabIndex = INT_MAX;
263            float breakWidth, breakPrintedWidth;
264            for (int i = start; i < limit; i++) {
265                const Primitive& p = mPrimitives[i];
266
267                if (p.type == kPrimitiveType_Box || p.type == kPrimitiveType_Glue) {
268                    w += p.width;
269                    if (p.type == kPrimitiveType_Box) {
270                        pw = w;
271                    }
272                } else if (p.type == kPrimitiveType_Variable) {
273                    w = p.tabStop->width(w);
274                    firstTabIndex = std::min(firstTabIndex, i);
275                }
276
277                if (pw > maxWidth) {
278                    if (breakFound) {
279                        break;
280                    } else {
281                        // no choice, keep going
282                    }
283                }
284
285                // must make progress
286                if (i > start && (p.type == kPrimitiveType_Penalty || p.type == kPrimitiveType_Wordbreak)) {
287                    breakFound = true;
288                    breakIndex = i;
289                    breakWidth = w;
290                    breakPrintedWidth = pw;
291                }
292            }
293
294            if (breakFound) {
295                *width = w;
296                *printedWidth = pw;
297                *flags = (start <= firstTabIndex && firstTabIndex < breakIndex);
298                return breakIndex;
299            } else {
300                return -1;
301            }
302        }
303
304        struct Node {
305            int prev; // set to sentinel value (-1) for initial node
306            int prevCount; // number of breaks so far
307            float demerits;
308            float width;
309            bool flags;
310        };
311};
312
313class GreedyLineBreaker : public LineBreaker {
314    public:
315        GreedyLineBreaker(const std::vector<Primitive>& primitives, const LineWidth& lineWidth) :
316            LineBreaker(primitives, lineWidth) {}
317        void computeBreaks(std::vector<int>* breaks, std::vector<float>* widths,
318                           std::vector<unsigned char>* flags) const {
319            int lineNum = 0;
320            float width = 0, printedWidth = 0;
321            bool breakFound = false, goodBreakFound = false;
322            int breakIndex = 0, goodBreakIndex = 0;
323            float breakWidth = 0, goodBreakWidth = 0;
324            int firstTabIndex = INT_MAX;
325
326            float maxWidth = mLineWidth.getLineWidth(lineNum);
327
328            const int numPrimitives = mPrimitives.size();
329            // greedily fit as many characters as possible on each line
330            // loop over all primitives, and choose the best break point
331            // (if possible, a break point without splitting a word)
332            // after going over the maximum length
333            for (int i = 0; i < numPrimitives; i++) {
334                const Primitive& p = mPrimitives[i];
335
336                // update the current line width
337                if (p.type == kPrimitiveType_Box || p.type == kPrimitiveType_Glue) {
338                    width += p.width;
339                    if (p.type == kPrimitiveType_Box) {
340                        printedWidth = width;
341                    }
342                } else if (p.type == kPrimitiveType_Variable) {
343                    width = p.tabStop->width(width);
344                    // keep track of first tab character in the region we are examining
345                    // so we can determine whether or not a line contains a tab
346                    firstTabIndex = std::min(firstTabIndex, i);
347                }
348
349                // find the best break point for the characters examined so far
350                if (printedWidth > maxWidth) {
351                    if (breakFound || goodBreakFound) {
352                        if (goodBreakFound) {
353                            // a true line break opportunity existed in the characters examined so far,
354                            // so there is no need to split a word
355                            i = goodBreakIndex; // no +1 because of i++
356                            lineNum++;
357                            maxWidth = mLineWidth.getLineWidth(lineNum);
358                            breaks->push_back(mPrimitives[goodBreakIndex].location);
359                            widths->push_back(goodBreakWidth);
360                            flags->push_back(firstTabIndex < goodBreakIndex);
361                            firstTabIndex = SIZE_MAX;
362                        } else {
363                            // must split a word because there is no other option
364                            i = breakIndex; // no +1 because of i++
365                            lineNum++;
366                            maxWidth = mLineWidth.getLineWidth(lineNum);
367                            breaks->push_back(mPrimitives[breakIndex].location);
368                            widths->push_back(breakWidth);
369                            flags->push_back(firstTabIndex < breakIndex);
370                            firstTabIndex = SIZE_MAX;
371                        }
372                        printedWidth = width = 0;
373                        goodBreakFound = breakFound = false;
374                        goodBreakWidth = breakWidth = 0;
375                        continue;
376                    } else {
377                        // no choice, keep going... must make progress by putting at least one
378                        // character on a line, even if part of that character is cut off --
379                        // there is no other option
380                    }
381                }
382
383                // update possible break points
384                if (p.type == kPrimitiveType_Penalty && p.penalty < PENALTY_INFINITY) {
385                    // this does not handle penalties with width
386
387                    // handle forced line break
388                    if (p.penalty == -PENALTY_INFINITY) {
389                        lineNum++;
390                        maxWidth = mLineWidth.getLineWidth(lineNum);
391                        breaks->push_back(p.location);
392                        widths->push_back(printedWidth);
393                        flags->push_back(firstTabIndex < i);
394                        firstTabIndex = SIZE_MAX;
395                        printedWidth = width = 0;
396                        goodBreakFound = breakFound = false;
397                        goodBreakWidth = breakWidth = 0;
398                        continue;
399                    }
400                    if (i > breakIndex && (printedWidth <= maxWidth || breakFound == false)) {
401                        breakFound = true;
402                        breakIndex = i;
403                        breakWidth = printedWidth;
404                    }
405                    if (i > goodBreakIndex && printedWidth <= maxWidth) {
406                        goodBreakFound = true;
407                        goodBreakIndex = i;
408                        goodBreakWidth = printedWidth;
409                    }
410                } else if (p.type == kPrimitiveType_Wordbreak) {
411                    // only do this if necessary -- we don't want to break words
412                    // when possible, but sometimes it is unavoidable
413                    if (i > breakIndex && (printedWidth <= maxWidth || breakFound == false)) {
414                        breakFound = true;
415                        breakIndex = i;
416                        breakWidth = printedWidth;
417                    }
418                }
419            }
420
421            if (breakFound || goodBreakFound) {
422                // output last break if there are more characters to output
423                if (goodBreakFound) {
424                    breaks->push_back(mPrimitives[goodBreakIndex].location);
425                    widths->push_back(goodBreakWidth);
426                    flags->push_back(firstTabIndex < goodBreakIndex);
427                } else {
428                    breaks->push_back(mPrimitives[breakIndex].location);
429                    widths->push_back(breakWidth);
430                    flags->push_back(firstTabIndex < breakIndex);
431                }
432            }
433        }
434};
435
436class ScopedBreakIterator {
437    public:
438        ScopedBreakIterator(JNIEnv* env, BreakIterator* breakIterator, const jchar* inputText,
439                            jint length) : mBreakIterator(breakIterator), mChars(inputText) {
440            UErrorCode status = U_ZERO_ERROR;
441            mUText = utext_openUChars(NULL, mChars, length, &status);
442            if (mUText == NULL) {
443                return;
444            }
445
446            mBreakIterator->setText(mUText, status);
447        }
448
449        inline BreakIterator* operator->() {
450            return mBreakIterator;
451        }
452
453        ~ScopedBreakIterator() {
454            utext_close(mUText);
455            delete mBreakIterator;
456        }
457    private:
458        BreakIterator* mBreakIterator;
459        const jchar* mChars;
460        UText* mUText;
461
462        // disable copying and assignment
463        ScopedBreakIterator(const ScopedBreakIterator&);
464        void operator=(const ScopedBreakIterator&);
465};
466
467static jint recycleCopy(JNIEnv* env, jobject recycle, jintArray recycleBreaks,
468                        jfloatArray recycleWidths, jbooleanArray recycleFlags,
469                        jint recycleLength, const std::vector<jint>& breaks,
470                        const std::vector<jfloat>& widths, const std::vector<jboolean>& flags) {
471    int bufferLength = breaks.size();
472    if (recycleLength < bufferLength) {
473        // have to reallocate buffers
474        recycleBreaks = env->NewIntArray(bufferLength);
475        recycleWidths = env->NewFloatArray(bufferLength);
476        recycleFlags = env->NewBooleanArray(bufferLength);
477
478        env->SetObjectField(recycle, gLineBreaks_fieldID.breaks, recycleBreaks);
479        env->SetObjectField(recycle, gLineBreaks_fieldID.widths, recycleWidths);
480        env->SetObjectField(recycle, gLineBreaks_fieldID.flags, recycleFlags);
481    }
482    // copy data
483    env->SetIntArrayRegion(recycleBreaks, 0, breaks.size(), &breaks.front());
484    env->SetFloatArrayRegion(recycleWidths, 0, widths.size(), &widths.front());
485    env->SetBooleanArrayRegion(recycleFlags, 0, flags.size(), &flags.front());
486
487    return bufferLength;
488}
489
490void computePrimitives(const jchar* textArr, const jfloat* widthsArr, jint length, const std::vector<int>& breaks,
491                       const TabStops& tabStopCalculator, std::vector<Primitive>* primitives) {
492    int breaksSize = breaks.size();
493    int breakIndex = 0;
494    Primitive p;
495    for (int i = 0; i < length; i++) {
496        p.location = i;
497        jchar c = textArr[i];
498        if (c == CHAR_SPACE || c == CHAR_ZWSP) {
499            p.type = kPrimitiveType_Glue;
500            p.width = widthsArr[i];
501            primitives->push_back(p);
502        } else if (c == CHAR_TAB) {
503            p.type = kPrimitiveType_Variable;
504            p.tabStop = &tabStopCalculator; // shared between all variable primitives
505            primitives->push_back(p);
506        } else if (c != CHAR_NEWLINE) {
507            while (breakIndex < breaksSize && breaks[breakIndex] < i) breakIndex++;
508            p.width = 0;
509            if (breakIndex < breaksSize && breaks[breakIndex] == i) {
510                p.type = kPrimitiveType_Penalty;
511                p.penalty = 0;
512            } else {
513                p.type = kPrimitiveType_Wordbreak;
514            }
515            if (widthsArr[i] != 0) {
516                primitives->push_back(p);
517            }
518
519            p.type = kPrimitiveType_Box;
520            p.width = widthsArr[i];
521            primitives->push_back(p);
522        }
523    }
524    // final break at end of everything
525    p.location = length;
526    p.type = kPrimitiveType_Penalty;
527    p.width = 0;
528    p.penalty = -PENALTY_INFINITY;
529    primitives->push_back(p);
530}
531
532static jint nComputeLineBreaks(JNIEnv* env, jclass, jstring javaLocaleName,
533                               jcharArray inputText, jfloatArray widths, jint length,
534                               jfloat firstWidth, jint firstWidthLineLimit, jfloat restWidth,
535                               jintArray variableTabStops, jint defaultTabStop, jboolean optimize,
536                               jobject recycle, jintArray recycleBreaks,
537                               jfloatArray recycleWidths, jbooleanArray recycleFlags,
538                               jint recycleLength) {
539    jintArray ret;
540    std::vector<int> breaks;
541
542    ScopedCharArrayRO textScopedArr(env, inputText);
543    ScopedFloatArrayRO widthsScopedArr(env, widths);
544
545    ScopedIcuLocale icuLocale(env, javaLocaleName);
546    if (icuLocale.valid()) {
547        UErrorCode status = U_ZERO_ERROR;
548        BreakIterator* it = BreakIterator::createLineInstance(icuLocale.locale(), status);
549        if (!U_SUCCESS(status) || it == NULL) {
550            if (it) {
551                delete it;
552            }
553        } else {
554            ScopedBreakIterator breakIterator(env, it, textScopedArr.get(), length);
555            int loc = breakIterator->first();
556            while ((loc = breakIterator->next()) != BreakIterator::DONE) {
557                breaks.push_back(loc);
558            }
559        }
560    }
561
562    std::vector<Primitive> primitives;
563    TabStops tabStops(env, variableTabStops, defaultTabStop);
564    computePrimitives(textScopedArr.get(), widthsScopedArr.get(), length, breaks, tabStops, &primitives);
565
566    LineWidth lineWidth(firstWidth, firstWidthLineLimit, restWidth);
567    std::vector<int> computedBreaks;
568    std::vector<float> computedWidths;
569    std::vector<unsigned char> computedFlags;
570
571    if (optimize) {
572        OptimizingLineBreaker breaker(primitives, lineWidth);
573        breaker.computeBreaks(&computedBreaks, &computedWidths, &computedFlags);
574    } else {
575        GreedyLineBreaker breaker(primitives, lineWidth);
576        breaker.computeBreaks(&computedBreaks, &computedWidths, &computedFlags);
577    }
578
579    return recycleCopy(env, recycle, recycleBreaks, recycleWidths, recycleFlags, recycleLength,
580            computedBreaks, computedWidths, computedFlags);
581}
582
583static JNINativeMethod gMethods[] = {
584    {"nComputeLineBreaks", "(Ljava/lang/String;[C[FIFIF[IIZLandroid/text/StaticLayout$LineBreaks;[I[F[ZI)I", (void*) nComputeLineBreaks}
585};
586
587int register_android_text_StaticLayout(JNIEnv* env)
588{
589    gLineBreaks_class = reinterpret_cast<jclass>(env->NewGlobalRef(
590            env->FindClass("android/text/StaticLayout$LineBreaks")));
591
592    gLineBreaks_fieldID.breaks = env->GetFieldID(gLineBreaks_class, "breaks", "[I");
593    gLineBreaks_fieldID.widths = env->GetFieldID(gLineBreaks_class, "widths", "[F");
594    gLineBreaks_fieldID.flags = env->GetFieldID(gLineBreaks_class, "flags", "[Z");
595
596    return AndroidRuntime::registerNativeMethods(env, "android/text/StaticLayout",
597            gMethods, NELEM(gMethods));
598}
599
600}
601