ComplexTextControllerLinux.cpp revision cad810f21b803229eb11403f9209855525a25d57
1/*
2 * Copyright (c) 2010 Google Inc. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are
6 * met:
7 *
8 *     * Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 *     * Redistributions in binary form must reproduce the above
11 * copyright notice, this list of conditions and the following disclaimer
12 * in the documentation and/or other materials provided with the
13 * distribution.
14 *     * Neither the name of Google Inc. nor the names of its
15 * contributors may be used to endorse or promote products derived from
16 * this software without specific prior written permission.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 */
30
31#include "config.h"
32#include "ComplexTextControllerLinux.h"
33
34#include "Font.h"
35
36#include <unicode/normlzr.h>
37
38namespace WebCore {
39
40// Harfbuzz uses 26.6 fixed point values for pixel offsets. However, we don't
41// handle subpixel positioning so this function is used to truncate Harfbuzz
42// values to a number of pixels.
43static int truncateFixedPointToInteger(HB_Fixed value)
44{
45    return value >> 6;
46}
47
48ComplexTextController::ComplexTextController(const TextRun& run, unsigned startingX, const Font* font)
49    : m_font(font)
50    , m_startingX(startingX)
51    , m_offsetX(m_startingX)
52    , m_run(getNormalizedTextRun(run, m_normalizedRun, m_normalizedBuffer))
53    , m_iterateBackwards(m_run.rtl())
54    , m_wordSpacingAdjustment(0)
55    , m_padding(0)
56    , m_padPerWordBreak(0)
57    , m_padError(0)
58    , m_letterSpacing(0)
59{
60    // Do not use |run| inside this constructor. Use |m_run| instead.
61
62    memset(&m_item, 0, sizeof(m_item));
63    // We cannot know, ahead of time, how many glyphs a given script run
64    // will produce. We take a guess that script runs will not produce more
65    // than twice as many glyphs as there are code points plus a bit of
66    // padding and fallback if we find that we are wrong.
67    createGlyphArrays((m_run.length() + 2) * 2);
68
69    m_item.log_clusters = new unsigned short[m_run.length()];
70
71    m_item.face = 0;
72    m_item.font = allocHarfbuzzFont();
73
74    m_item.item.bidiLevel = m_run.rtl();
75
76    m_item.string = m_run.characters();
77    m_item.stringLength = m_run.length();
78
79    reset();
80}
81
82ComplexTextController::~ComplexTextController()
83{
84    fastFree(m_item.font);
85    deleteGlyphArrays();
86    delete[] m_item.log_clusters;
87}
88
89bool ComplexTextController::isWordBreak(unsigned index)
90{
91    return index && isCodepointSpace(m_item.string[index]) && !isCodepointSpace(m_item.string[index - 1]);
92}
93
94int ComplexTextController::determineWordBreakSpacing(unsigned logClustersIndex)
95{
96    int wordBreakSpacing = 0;
97    // The first half of the conjunction works around the case where
98    // output glyphs aren't associated with any codepoints by the
99    // clusters log.
100    if (logClustersIndex < m_item.item.length
101        && isWordBreak(m_item.item.pos + logClustersIndex)) {
102        wordBreakSpacing = m_wordSpacingAdjustment;
103
104        if (m_padding > 0) {
105            int toPad = roundf(m_padPerWordBreak + m_padError);
106            m_padError += m_padPerWordBreak - toPad;
107
108            if (m_padding < toPad)
109                toPad = m_padding;
110            m_padding -= toPad;
111            wordBreakSpacing += toPad;
112        }
113    }
114    return wordBreakSpacing;
115}
116
117// setPadding sets a number of pixels to be distributed across the TextRun.
118// WebKit uses this to justify text.
119void ComplexTextController::setPadding(int padding)
120{
121    m_padding = padding;
122    if (!m_padding)
123        return;
124
125    // If we have padding to distribute, then we try to give an equal
126    // amount to each space. The last space gets the smaller amount, if
127    // any.
128    unsigned numWordBreaks = 0;
129
130    for (unsigned i = 0; i < m_item.stringLength; i++) {
131        if (isWordBreak(i))
132            numWordBreaks++;
133    }
134
135    if (numWordBreaks)
136        m_padPerWordBreak = m_padding / numWordBreaks;
137    else
138        m_padPerWordBreak = 0;
139}
140
141void ComplexTextController::reset()
142{
143    if (m_iterateBackwards)
144        m_indexOfNextScriptRun = m_run.length() - 1;
145    else
146        m_indexOfNextScriptRun = 0;
147    m_offsetX = m_startingX;
148}
149
150void ComplexTextController::setBackwardsIteration(bool isBackwards)
151{
152    m_iterateBackwards = isBackwards;
153    reset();
154}
155
156// Advance to the next script run, returning false when the end of the
157// TextRun has been reached.
158bool ComplexTextController::nextScriptRun()
159{
160    if (m_iterateBackwards) {
161        // In right-to-left mode we need to render the shaped glyph backwards and
162        // also render the script runs themselves backwards. So given a TextRun:
163        //    AAAAAAACTTTTTTT   (A = Arabic, C = Common, T = Thai)
164        // we render:
165        //    TTTTTTCAAAAAAA
166        // (and the glyphs in each A, C and T section are backwards too)
167        if (!hb_utf16_script_run_prev(&m_numCodePoints, &m_item.item, m_run.characters(), m_run.length(), &m_indexOfNextScriptRun))
168            return false;
169        m_currentFontData = m_font->glyphDataForCharacter(m_item.string[m_item.item.pos], false).fontData;
170    } else {
171        if (!hb_utf16_script_run_next(&m_numCodePoints, &m_item.item, m_run.characters(), m_run.length(), &m_indexOfNextScriptRun))
172            return false;
173
174        // It is actually wrong to consider script runs at all in this code.
175        // Other WebKit code (e.g. Mac) segments complex text just by finding
176        // the longest span of text covered by a single font.
177        // But we currently need to call hb_utf16_script_run_next anyway to fill
178        // in the harfbuzz data structures to e.g. pick the correct script's shaper.
179        // So we allow that to run first, then do a second pass over the range it
180        // found and take the largest subregion that stays within a single font.
181        m_currentFontData = m_font->glyphDataForCharacter(m_item.string[m_item.item.pos], false).fontData;
182        unsigned endOfRun;
183        for (endOfRun = 1; endOfRun < m_item.item.length; ++endOfRun) {
184            const SimpleFontData* nextFontData = m_font->glyphDataForCharacter(m_item.string[m_item.item.pos + endOfRun], false).fontData;
185            if (nextFontData != m_currentFontData)
186                break;
187        }
188        m_item.item.length = endOfRun;
189        m_indexOfNextScriptRun = m_item.item.pos + endOfRun;
190    }
191
192    setupFontForScriptRun();
193    shapeGlyphs();
194    setGlyphXPositions(rtl());
195
196    return true;
197}
198
199float ComplexTextController::widthOfFullRun()
200{
201    float widthSum = 0;
202    while (nextScriptRun())
203        widthSum += width();
204
205    return widthSum;
206}
207
208void ComplexTextController::setupFontForScriptRun()
209{
210    const FontData* fontData = m_font->glyphDataForCharacter(m_item.string[m_item.item.pos], false).fontData;
211    const FontPlatformData& platformData = fontData->fontDataForCharacter(' ')->platformData();
212    m_item.face = platformData.harfbuzzFace();
213    void* opaquePlatformData = const_cast<FontPlatformData*>(&platformData);
214    m_item.font->userData = opaquePlatformData;
215}
216
217HB_FontRec* ComplexTextController::allocHarfbuzzFont()
218{
219    HB_FontRec* font = reinterpret_cast<HB_FontRec*>(fastMalloc(sizeof(HB_FontRec)));
220    memset(font, 0, sizeof(HB_FontRec));
221    font->klass = &harfbuzzSkiaClass;
222    font->userData = 0;
223    // The values which harfbuzzSkiaClass returns are already scaled to
224    // pixel units, so we just set all these to one to disable further
225    // scaling.
226    font->x_ppem = 1;
227    font->y_ppem = 1;
228    font->x_scale = 1;
229    font->y_scale = 1;
230
231    return font;
232}
233
234void ComplexTextController::deleteGlyphArrays()
235{
236    delete[] m_item.glyphs;
237    delete[] m_item.attributes;
238    delete[] m_item.advances;
239    delete[] m_item.offsets;
240    delete[] m_glyphs16;
241    delete[] m_xPositions;
242}
243
244void ComplexTextController::createGlyphArrays(int size)
245{
246    m_item.glyphs = new HB_Glyph[size];
247    m_item.attributes = new HB_GlyphAttributes[size];
248    m_item.advances = new HB_Fixed[size];
249    m_item.offsets = new HB_FixedPoint[size];
250
251    m_glyphs16 = new uint16_t[size];
252    m_xPositions = new SkScalar[size];
253
254    m_item.num_glyphs = size;
255    m_glyphsArrayCapacity = size; // Save the GlyphArrays size.
256    resetGlyphArrays();
257}
258
259void ComplexTextController::resetGlyphArrays()
260{
261    int size = m_item.num_glyphs;
262    // All the types here don't have pointers. It is safe to reset to
263    // zero unless Harfbuzz breaks the compatibility in the future.
264    memset(m_item.glyphs, 0, size * sizeof(HB_Glyph));
265    memset(m_item.attributes, 0, size * sizeof(HB_GlyphAttributes));
266    memset(m_item.advances, 0, size * sizeof(HB_Fixed));
267    memset(m_item.offsets, 0, size * sizeof(HB_FixedPoint));
268    memset(m_glyphs16, 0, size * sizeof(uint16_t));
269    memset(m_xPositions, 0, size * sizeof(SkScalar));
270}
271
272void ComplexTextController::shapeGlyphs()
273{
274    // HB_ShapeItem() resets m_item.num_glyphs. If the previous call to
275    // HB_ShapeItem() used less space than was available, the capacity of
276    // the array may be larger than the current value of m_item.num_glyphs.
277    // So, we need to reset the num_glyphs to the capacity of the array.
278    m_item.num_glyphs = m_glyphsArrayCapacity;
279    resetGlyphArrays();
280    while (!HB_ShapeItem(&m_item)) {
281        // We overflowed our arrays. Resize and retry.
282        // HB_ShapeItem fills in m_item.num_glyphs with the needed size.
283        deleteGlyphArrays();
284        // The |+ 1| here is a workaround for a bug in Harfbuzz: the Khmer
285        // shaper (at least) can fail because of insufficient glyph buffers
286        // and request 0 additional glyphs: throwing us into an infinite
287        // loop.
288        createGlyphArrays(m_item.num_glyphs + 1);
289    }
290}
291
292void ComplexTextController::setGlyphXPositions(bool isRTL)
293{
294    double position = 0;
295    // logClustersIndex indexes logClusters for the first (or last when
296    // RTL) codepoint of the current glyph.  Each time we advance a glyph,
297    // we skip over all the codepoints that contributed to the current
298    // glyph.
299    int logClustersIndex = 0;
300
301    if (isRTL) {
302        logClustersIndex = m_item.num_glyphs - 1;
303
304        // Glyphs are stored in logical order, but for layout purposes we
305        // always go left to right.
306        for (int i = m_item.num_glyphs - 1; i >= 0; --i) {
307            if (!m_currentFontData->isZeroWidthSpaceGlyph(m_glyphs16[i])) {
308                // Whitespace must be laid out in logical order, so when inserting
309                // spaces in RTL (but iterating in LTR order) we must insert spaces
310                // _before_ the next glyph.
311                if (static_cast<unsigned>(i + 1) >= m_item.num_glyphs || m_item.attributes[i + 1].clusterStart)
312                    position += m_letterSpacing;
313
314                position += determineWordBreakSpacing(logClustersIndex);
315            }
316
317            m_glyphs16[i] = m_item.glyphs[i];
318            double offsetX = truncateFixedPointToInteger(m_item.offsets[i].x);
319            m_xPositions[i] = m_offsetX + position + offsetX;
320
321            while (logClustersIndex > 0 && logClusters()[logClustersIndex] == i)
322                logClustersIndex--;
323
324            if (!m_currentFontData->isZeroWidthSpaceGlyph(m_glyphs16[i]))
325                position += truncateFixedPointToInteger(m_item.advances[i]);
326        }
327    } else {
328        for (size_t i = 0; i < m_item.num_glyphs; ++i) {
329            m_glyphs16[i] = m_item.glyphs[i];
330            double offsetX = truncateFixedPointToInteger(m_item.offsets[i].x);
331            m_xPositions[i] = m_offsetX + position + offsetX;
332
333            if (m_currentFontData->isZeroWidthSpaceGlyph(m_glyphs16[i]))
334                continue;
335
336            double advance = truncateFixedPointToInteger(m_item.advances[i]);
337
338            advance += determineWordBreakSpacing(logClustersIndex);
339
340            if (m_item.attributes[i].clusterStart)
341                advance += m_letterSpacing;
342
343            while (static_cast<unsigned>(logClustersIndex) < m_item.item.length && logClusters()[logClustersIndex] == i)
344                logClustersIndex++;
345
346            position += advance;
347        }
348    }
349    m_pixelWidth = std::max(position, 0.0);
350    m_offsetX += m_pixelWidth;
351}
352
353void ComplexTextController::normalizeSpacesAndMirrorChars(const UChar* source, bool rtl, UChar* destination, int length)
354{
355    int position = 0;
356    bool error = false;
357    // Iterate characters in source and mirror character if needed.
358    while (position < length) {
359        UChar32 character;
360        int nextPosition = position;
361        U16_NEXT(source, nextPosition, length, character);
362        if (Font::treatAsSpace(character))
363            character = ' ';
364        else if (Font::treatAsZeroWidthSpace(character))
365            character = zeroWidthSpace;
366        else if (rtl)
367            character = u_charMirror(character);
368        U16_APPEND(destination, position, length, character, error);
369        ASSERT(!error);
370        position = nextPosition;
371    }
372}
373
374const TextRun& ComplexTextController::getNormalizedTextRun(const TextRun& originalRun, OwnPtr<TextRun>& normalizedRun, OwnArrayPtr<UChar>& normalizedBuffer)
375{
376    // Normalize the text run in three ways:
377    // 1) Convert the |originalRun| to NFC normalized form if combining diacritical marks
378    // (U+0300..) are used in the run. This conversion is necessary since most OpenType
379    // fonts (e.g., Arial) don't have substitution rules for the diacritical marks in
380    // their GSUB tables.
381    //
382    // Note that we don't use the icu::Normalizer::isNormalized(UNORM_NFC) API here since
383    // the API returns FALSE (= not normalized) for complex runs that don't require NFC
384    // normalization (e.g., Arabic text). Unless the run contains the diacritical marks,
385    // Harfbuzz will do the same thing for us using the GSUB table.
386    // 2) Convert spacing characters into plain spaces, as some fonts will provide glyphs
387    // for characters like '\n' otherwise.
388    // 3) Convert mirrored characters such as parenthesis for rtl text.
389
390    // Convert to NFC form if the text has diacritical marks.
391    icu::UnicodeString normalizedString;
392    UErrorCode error = U_ZERO_ERROR;
393
394    for (int16_t i = 0; i < originalRun.length(); ++i) {
395        UChar ch = originalRun[i];
396        if (::ublock_getCode(ch) == UBLOCK_COMBINING_DIACRITICAL_MARKS) {
397            icu::Normalizer::normalize(icu::UnicodeString(originalRun.characters(),
398                                       originalRun.length()), UNORM_NFC, 0 /* no options */,
399                                       normalizedString, error);
400            if (U_FAILURE(error))
401                return originalRun;
402            break;
403        }
404    }
405
406    // Normalize space and mirror parenthesis for rtl text.
407    int normalizedBufferLength;
408    const UChar* sourceText;
409    if (normalizedString.isEmpty()) {
410        normalizedBufferLength = originalRun.length();
411        sourceText = originalRun.characters();
412    } else {
413        normalizedBufferLength = normalizedString.length();
414        sourceText = normalizedString.getBuffer();
415    }
416
417    normalizedBuffer.set(new UChar[normalizedBufferLength + 1]);
418
419    normalizeSpacesAndMirrorChars(sourceText, originalRun.rtl(), normalizedBuffer.get(), normalizedBufferLength);
420
421    normalizedRun.set(new TextRun(originalRun));
422    normalizedRun->setText(normalizedBuffer.get(), normalizedBufferLength);
423    return *normalizedRun;
424}
425
426} // namespace WebCore
427