ComplexTextControllerLinux.cpp revision 65f03d4f644ce73618e5f4f50dd694b26f55ae12
1/*
2 * Copyright (c) 2010 Google Inc. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are
6 * met:
7 *
8 *     * Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 *     * Redistributions in binary form must reproduce the above
11 * copyright notice, this list of conditions and the following disclaimer
12 * in the documentation and/or other materials provided with the
13 * distribution.
14 *     * Neither the name of Google Inc. nor the names of its
15 * contributors may be used to endorse or promote products derived from
16 * this software without specific prior written permission.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 */
30
31#include "config.h"
32#include "ComplexTextControllerLinux.h"
33
34#include "Font.h"
35
36#include <unicode/normlzr.h>
37
38namespace WebCore {
39
40// Harfbuzz uses 26.6 fixed point values for pixel offsets. However, we don't
41// handle subpixel positioning so this function is used to truncate Harfbuzz
42// values to a number of pixels.
43static int truncateFixedPointToInteger(HB_Fixed value)
44{
45    return value >> 6;
46}
47
48ComplexTextController::ComplexTextController(const TextRun& run, unsigned startingX, const Font* font)
49    : m_font(font)
50    , m_startingX(startingX)
51    , m_offsetX(m_startingX)
52    , m_run(getNormalizedTextRun(run, m_normalizedRun, m_normalizedBuffer))
53    , m_wordSpacingAdjustment(0)
54    , m_padding(0)
55    , m_padPerWordBreak(0)
56    , m_padError(0)
57    , m_letterSpacing(0)
58{
59    // Do not use |run| inside this constructor. Use |m_run| instead.
60
61    memset(&m_item, 0, sizeof(m_item));
62    // We cannot know, ahead of time, how many glyphs a given script run
63    // will produce. We take a guess that script runs will not produce more
64    // than twice as many glyphs as there are code points plus a bit of
65    // padding and fallback if we find that we are wrong.
66    createGlyphArrays((m_run.length() + 2) * 2);
67
68    m_item.log_clusters = new unsigned short[m_run.length()];
69
70    m_item.face = 0;
71    m_item.font = allocHarfbuzzFont();
72
73    m_item.item.bidiLevel = m_run.rtl();
74
75    m_item.string = m_run.characters();
76    m_item.stringLength = m_run.length();
77
78    reset();
79}
80
81ComplexTextController::~ComplexTextController()
82{
83    fastFree(m_item.font);
84    deleteGlyphArrays();
85    delete[] m_item.log_clusters;
86}
87
88bool ComplexTextController::isWordBreak(unsigned index)
89{
90    return index && isCodepointSpace(m_item.string[index]) && !isCodepointSpace(m_item.string[index - 1]);
91}
92
93int ComplexTextController::determineWordBreakSpacing(unsigned logClustersIndex)
94{
95    int wordBreakSpacing = 0;
96    // The first half of the conjunction works around the case where
97    // output glyphs aren't associated with any codepoints by the
98    // clusters log.
99    if (logClustersIndex < m_item.item.length
100        && isWordBreak(m_item.item.pos + logClustersIndex)) {
101        wordBreakSpacing = m_wordSpacingAdjustment;
102
103        if (m_padding > 0) {
104            int toPad = roundf(m_padPerWordBreak + m_padError);
105            m_padError += m_padPerWordBreak - toPad;
106
107            if (m_padding < toPad)
108                toPad = m_padding;
109            m_padding -= toPad;
110            wordBreakSpacing += toPad;
111        }
112    }
113    return wordBreakSpacing;
114}
115
116// setPadding sets a number of pixels to be distributed across the TextRun.
117// WebKit uses this to justify text.
118void ComplexTextController::setPadding(int padding)
119{
120    m_padding = padding;
121    if (!m_padding)
122        return;
123
124    // If we have padding to distribute, then we try to give an equal
125    // amount to each space. The last space gets the smaller amount, if
126    // any.
127    unsigned numWordBreaks = 0;
128
129    for (unsigned i = 0; i < m_item.stringLength; i++) {
130        if (isWordBreak(i))
131            numWordBreaks++;
132    }
133
134    if (numWordBreaks)
135        m_padPerWordBreak = m_padding / numWordBreaks;
136    else
137        m_padPerWordBreak = 0;
138}
139
140void ComplexTextController::reset()
141{
142    m_indexOfNextScriptRun = 0;
143    m_offsetX = m_startingX;
144}
145
146// Advance to the next script run, returning false when the end of the
147// TextRun has been reached.
148bool ComplexTextController::nextScriptRun()
149{
150    if (!hb_utf16_script_run_next(&m_numCodePoints, &m_item.item, m_run.characters(), m_run.length(), &m_indexOfNextScriptRun))
151        return false;
152
153    // It is actually wrong to consider script runs at all in this code.
154    // Other WebKit code (e.g. Mac) segments complex text just by finding
155    // the longest span of text covered by a single font.
156    // But we currently need to call hb_utf16_script_run_next anyway to fill
157    // in the harfbuzz data structures to e.g. pick the correct script's shaper.
158    // So we allow that to run first, then do a second pass over the range it
159    // found and take the largest subregion that stays within a single font.
160    m_currentFontData = m_font->glyphDataForCharacter(m_item.string[m_item.item.pos], false).fontData;
161    unsigned endOfRun;
162    for (endOfRun = 1; endOfRun < m_item.item.length; ++endOfRun) {
163        const SimpleFontData* nextFontData = m_font->glyphDataForCharacter(m_item.string[m_item.item.pos + endOfRun], false).fontData;
164        if (nextFontData != m_currentFontData)
165            break;
166    }
167    m_item.item.length = endOfRun;
168    m_indexOfNextScriptRun = m_item.item.pos + endOfRun;
169
170    setupFontForScriptRun();
171    shapeGlyphs();
172    setGlyphXPositions(rtl());
173
174    return true;
175}
176
177float ComplexTextController::widthOfFullRun()
178{
179    float widthSum = 0;
180    while (nextScriptRun())
181        widthSum += width();
182
183    return widthSum;
184}
185
186void ComplexTextController::setupFontForScriptRun()
187{
188    const FontData* fontData = m_font->glyphDataForCharacter(m_item.string[m_item.item.pos], false).fontData;
189    const FontPlatformData& platformData = fontData->fontDataForCharacter(' ')->platformData();
190    m_item.face = platformData.harfbuzzFace();
191    void* opaquePlatformData = const_cast<FontPlatformData*>(&platformData);
192    m_item.font->userData = opaquePlatformData;
193}
194
195HB_FontRec* ComplexTextController::allocHarfbuzzFont()
196{
197    HB_FontRec* font = reinterpret_cast<HB_FontRec*>(fastMalloc(sizeof(HB_FontRec)));
198    memset(font, 0, sizeof(HB_FontRec));
199    font->klass = &harfbuzzSkiaClass;
200    font->userData = 0;
201    // The values which harfbuzzSkiaClass returns are already scaled to
202    // pixel units, so we just set all these to one to disable further
203    // scaling.
204    font->x_ppem = 1;
205    font->y_ppem = 1;
206    font->x_scale = 1;
207    font->y_scale = 1;
208
209    return font;
210}
211
212void ComplexTextController::deleteGlyphArrays()
213{
214    delete[] m_item.glyphs;
215    delete[] m_item.attributes;
216    delete[] m_item.advances;
217    delete[] m_item.offsets;
218    delete[] m_glyphs16;
219    delete[] m_xPositions;
220}
221
222void ComplexTextController::createGlyphArrays(int size)
223{
224    m_item.glyphs = new HB_Glyph[size];
225    m_item.attributes = new HB_GlyphAttributes[size];
226    m_item.advances = new HB_Fixed[size];
227    m_item.offsets = new HB_FixedPoint[size];
228
229    m_glyphs16 = new uint16_t[size];
230    m_xPositions = new SkScalar[size];
231
232    m_item.num_glyphs = size;
233    m_glyphsArrayCapacity = size; // Save the GlyphArrays size.
234    resetGlyphArrays();
235}
236
237void ComplexTextController::resetGlyphArrays()
238{
239    int size = m_item.num_glyphs;
240    // All the types here don't have pointers. It is safe to reset to
241    // zero unless Harfbuzz breaks the compatibility in the future.
242    memset(m_item.glyphs, 0, size * sizeof(HB_Glyph));
243    memset(m_item.attributes, 0, size * sizeof(HB_GlyphAttributes));
244    memset(m_item.advances, 0, size * sizeof(HB_Fixed));
245    memset(m_item.offsets, 0, size * sizeof(HB_FixedPoint));
246    memset(m_glyphs16, 0, size * sizeof(uint16_t));
247    memset(m_xPositions, 0, size * sizeof(SkScalar));
248}
249
250void ComplexTextController::shapeGlyphs()
251{
252    // HB_ShapeItem() resets m_item.num_glyphs. If the previous call to
253    // HB_ShapeItem() used less space than was available, the capacity of
254    // the array may be larger than the current value of m_item.num_glyphs.
255    // So, we need to reset the num_glyphs to the capacity of the array.
256    m_item.num_glyphs = m_glyphsArrayCapacity;
257    resetGlyphArrays();
258    while (!HB_ShapeItem(&m_item)) {
259        // We overflowed our arrays. Resize and retry.
260        // HB_ShapeItem fills in m_item.num_glyphs with the needed size.
261        deleteGlyphArrays();
262        // The |+ 1| here is a workaround for a bug in Harfbuzz: the Khmer
263        // shaper (at least) can fail because of insufficient glyph buffers
264        // and request 0 additional glyphs: throwing us into an infinite
265        // loop.
266        createGlyphArrays(m_item.num_glyphs + 1);
267    }
268}
269
270void ComplexTextController::setGlyphXPositions(bool isRTL)
271{
272    const double rtlFlip = isRTL ? -1 : 1;
273    double position = 0;
274
275    // logClustersIndex indexes logClusters for the first codepoint of the current glyph.
276    // Each time we advance a glyph, we skip over all the codepoints that contributed to the current glyph.
277    int logClustersIndex = 0;
278
279    // Iterate through the glyphs in logical order, flipping for RTL where necessary.
280    // In RTL mode all variables are positive except m_xPositions, which starts from m_offsetX and runs negative.
281    // It is fixed up in a second pass below.
282    for (size_t i = 0; i < m_item.num_glyphs; ++i) {
283        while (static_cast<unsigned>(logClustersIndex) < m_item.item.length && logClusters()[logClustersIndex] < i)
284            logClustersIndex++;
285
286        // If the current glyph is just after a space, add in the word spacing.
287        position += determineWordBreakSpacing(logClustersIndex);
288
289        m_glyphs16[i] = m_item.glyphs[i];
290        double offsetX = truncateFixedPointToInteger(m_item.offsets[i].x);
291        double advance = truncateFixedPointToInteger(m_item.advances[i]);
292        if (isRTL)
293            offsetX -= advance;
294
295        m_xPositions[i] = m_offsetX + (position * rtlFlip) + offsetX;
296
297        if (m_currentFontData->isZeroWidthSpaceGlyph(m_glyphs16[i]))
298            continue;
299
300        // At the end of each cluster, add in the letter spacing.
301        if (i + 1 == m_item.num_glyphs || m_item.attributes[i + 1].clusterStart)
302            position += m_letterSpacing;
303
304        position += advance;
305    }
306    const double width = position;
307
308    // Now that we've computed the total width, do another pass to fix positioning for RTL.
309    if (isRTL) {
310        for (size_t i = 0; i < m_item.num_glyphs; ++i)
311            m_xPositions[i] += width;
312    }
313
314    m_pixelWidth = std::max(width, 0.0);
315    m_offsetX += m_pixelWidth;
316}
317
318void ComplexTextController::normalizeSpacesAndMirrorChars(const UChar* source, bool rtl, UChar* destination, int length)
319{
320    int position = 0;
321    bool error = false;
322    // Iterate characters in source and mirror character if needed.
323    while (position < length) {
324        UChar32 character;
325        int nextPosition = position;
326        U16_NEXT(source, nextPosition, length, character);
327        if (Font::treatAsSpace(character))
328            character = ' ';
329        else if (Font::treatAsZeroWidthSpace(character))
330            character = zeroWidthSpace;
331        else if (rtl)
332            character = u_charMirror(character);
333        U16_APPEND(destination, position, length, character, error);
334        ASSERT(!error);
335        position = nextPosition;
336    }
337}
338
339const TextRun& ComplexTextController::getNormalizedTextRun(const TextRun& originalRun, OwnPtr<TextRun>& normalizedRun, OwnArrayPtr<UChar>& normalizedBuffer)
340{
341    // Normalize the text run in three ways:
342    // 1) Convert the |originalRun| to NFC normalized form if combining diacritical marks
343    // (U+0300..) are used in the run. This conversion is necessary since most OpenType
344    // fonts (e.g., Arial) don't have substitution rules for the diacritical marks in
345    // their GSUB tables.
346    //
347    // Note that we don't use the icu::Normalizer::isNormalized(UNORM_NFC) API here since
348    // the API returns FALSE (= not normalized) for complex runs that don't require NFC
349    // normalization (e.g., Arabic text). Unless the run contains the diacritical marks,
350    // Harfbuzz will do the same thing for us using the GSUB table.
351    // 2) Convert spacing characters into plain spaces, as some fonts will provide glyphs
352    // for characters like '\n' otherwise.
353    // 3) Convert mirrored characters such as parenthesis for rtl text.
354
355    // Convert to NFC form if the text has diacritical marks.
356    icu::UnicodeString normalizedString;
357    UErrorCode error = U_ZERO_ERROR;
358
359    for (int16_t i = 0; i < originalRun.length(); ++i) {
360        UChar ch = originalRun[i];
361        if (::ublock_getCode(ch) == UBLOCK_COMBINING_DIACRITICAL_MARKS) {
362            icu::Normalizer::normalize(icu::UnicodeString(originalRun.characters(),
363                                       originalRun.length()), UNORM_NFC, 0 /* no options */,
364                                       normalizedString, error);
365            if (U_FAILURE(error))
366                return originalRun;
367            break;
368        }
369    }
370
371    // Normalize space and mirror parenthesis for rtl text.
372    int normalizedBufferLength;
373    const UChar* sourceText;
374    if (normalizedString.isEmpty()) {
375        normalizedBufferLength = originalRun.length();
376        sourceText = originalRun.characters();
377    } else {
378        normalizedBufferLength = normalizedString.length();
379        sourceText = normalizedString.getBuffer();
380    }
381
382    normalizedBuffer.set(new UChar[normalizedBufferLength + 1]);
383
384    normalizeSpacesAndMirrorChars(sourceText, originalRun.rtl(), normalizedBuffer.get(), normalizedBufferLength);
385
386    normalizedRun.set(new TextRun(originalRun));
387    normalizedRun->setText(normalizedBuffer.get(), normalizedBufferLength);
388    return *normalizedRun;
389}
390
391} // namespace WebCore
392