WidthIterator.cpp revision ab9e7a118cf1ea2e3a93dce683b2ded3e7291ddb
1/*
2 * Copyright (C) 2003, 2006, 2008, 2009, 2010 Apple Inc. All rights reserved.
3 * Copyright (C) 2008 Holger Hans Peter Freyther
4 *
5 * This library is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU Library General Public
7 * License as published by the Free Software Foundation; either
8 * version 2 of the License, or (at your option) any later version.
9 *
10 * This library is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13 * Library General Public License for more details.
14 *
15 * You should have received a copy of the GNU Library General Public License
16 * along with this library; see the file COPYING.LIB.  If not, write to
17 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
18 * Boston, MA 02110-1301, USA.
19 *
20 */
21
22#include "config.h"
23#include "WidthIterator.h"
24
25#include "Font.h"
26#include "GlyphBuffer.h"
27#include "SimpleFontData.h"
28#include "TextRun.h"
29#include <wtf/MathExtras.h>
30
31#if USE(ICU_UNICODE)
32#include <unicode/unorm.h>
33#endif
34
35using namespace WTF;
36using namespace Unicode;
37using namespace std;
38
39namespace WebCore {
40
41// According to http://www.unicode.org/Public/UNIDATA/UCD.html#Canonical_Combining_Class_Values
42static const uint8_t hiraganaKatakanaVoicingMarksCombiningClass = 8;
43
44WidthIterator::WidthIterator(const Font* font, const TextRun& run, HashSet<const SimpleFontData*>* fallbackFonts, bool accountForGlyphBounds, bool forTextEmphasis)
45    : m_font(font)
46    , m_run(run)
47    , m_end(run.length())
48    , m_currentCharacter(0)
49    , m_runWidthSoFar(0)
50    , m_finalRoundingWidth(0)
51    , m_fallbackFonts(fallbackFonts)
52    , m_accountForGlyphBounds(accountForGlyphBounds)
53    , m_maxGlyphBoundingBoxY(numeric_limits<float>::min())
54    , m_minGlyphBoundingBoxY(numeric_limits<float>::max())
55    , m_firstGlyphOverflow(0)
56    , m_lastGlyphOverflow(0)
57    , m_forTextEmphasis(forTextEmphasis)
58{
59    // If the padding is non-zero, count the number of spaces in the run
60    // and divide that by the padding for per space addition.
61    m_padding = m_run.padding();
62    if (!m_padding)
63        m_padPerSpace = 0;
64    else {
65        int numSpaces = 0;
66        for (int i = 0; i < run.length(); i++) {
67            if (Font::treatAsSpace(m_run[i]))
68                numSpaces++;
69        }
70
71        if (!numSpaces)
72            m_padPerSpace = 0;
73        else
74            m_padPerSpace = m_padding / numSpaces;
75    }
76}
77
78void WidthIterator::advance(int offset, GlyphBuffer* glyphBuffer)
79{
80    if (offset > m_end)
81        offset = m_end;
82
83    int currentCharacter = m_currentCharacter;
84    const UChar* cp = m_run.data(currentCharacter);
85
86    bool rtl = m_run.rtl();
87    bool hasExtraSpacing = (m_font->letterSpacing() || m_font->wordSpacing() || m_padding) && !m_run.spacingDisabled();
88
89    float widthSinceLastRounding = m_runWidthSoFar;
90    m_runWidthSoFar = floorf(m_runWidthSoFar);
91    widthSinceLastRounding -= m_runWidthSoFar;
92
93    float lastRoundingWidth = m_finalRoundingWidth;
94    FloatRect bounds;
95
96    const SimpleFontData* primaryFont = m_font->primaryFont();
97    const SimpleFontData* lastFontData = primaryFont;
98
99    while (currentCharacter < offset) {
100        UChar32 c = *cp;
101        unsigned clusterLength = 1;
102        if (c >= 0x3041) {
103            if (c <= 0x30FE) {
104                // Deal with Hiragana and Katakana voiced and semi-voiced syllables.
105                // Normalize into composed form, and then look for glyph with base + combined mark.
106                // Check above for character range to minimize performance impact.
107                UChar32 normalized = normalizeVoicingMarks(currentCharacter);
108                if (normalized) {
109                    c = normalized;
110                    clusterLength = 2;
111                }
112            } else if (U16_IS_SURROGATE(c)) {
113                if (!U16_IS_SURROGATE_LEAD(c))
114                    break;
115
116                // Do we have a surrogate pair?  If so, determine the full Unicode (32 bit)
117                // code point before glyph lookup.
118                // Make sure we have another character and it's a low surrogate.
119                if (currentCharacter + 1 >= m_run.length())
120                    break;
121                UChar low = cp[1];
122                if (!U16_IS_TRAIL(low))
123                    break;
124                c = U16_GET_SUPPLEMENTARY(c, low);
125                clusterLength = 2;
126            }
127        }
128
129        const GlyphData& glyphData = m_font->glyphDataForCharacter(c, rtl);
130        Glyph glyph = glyphData.glyph;
131        const SimpleFontData* fontData = glyphData.fontData;
132
133        ASSERT(fontData);
134
135        // Now that we have a glyph and font data, get its width.
136        float width;
137        if (c == '\t' && m_run.allowTabs()) {
138            float tabWidth = m_font->tabWidth(*fontData);
139            width = tabWidth - fmodf(m_run.xPos() + m_runWidthSoFar + widthSinceLastRounding, tabWidth);
140        } else {
141            width = fontData->widthForGlyph(glyph);
142
143#if ENABLE(SVG)
144            // SVG uses horizontalGlyphStretch(), when textLength is used to stretch/squeeze text.
145            width *= m_run.horizontalGlyphStretch();
146#endif
147
148            // We special case spaces in two ways when applying word rounding.
149            // First, we round spaces to an adjusted width in all fonts.
150            // Second, in fixed-pitch fonts we ensure that all characters that
151            // match the width of the space character have the same width as the space character.
152            if (width == fontData->spaceWidth() && (fontData->pitch() == FixedPitch || glyph == fontData->spaceGlyph()) && m_run.applyWordRounding())
153                width = fontData->adjustedSpaceWidth();
154        }
155
156        if (fontData != lastFontData && width) {
157            lastFontData = fontData;
158            if (m_fallbackFonts && fontData != primaryFont) {
159                // FIXME: This does a little extra work that could be avoided if
160                // glyphDataForCharacter() returned whether it chose to use a small caps font.
161                if (!m_font->isSmallCaps() || c == toUpper(c))
162                    m_fallbackFonts->add(fontData);
163                else {
164                    const GlyphData& uppercaseGlyphData = m_font->glyphDataForCharacter(toUpper(c), rtl);
165                    if (uppercaseGlyphData.fontData != primaryFont)
166                        m_fallbackFonts->add(uppercaseGlyphData.fontData);
167                }
168            }
169        }
170
171        if (hasExtraSpacing) {
172            // Account for letter-spacing.
173            if (width && m_font->letterSpacing())
174                width += m_font->letterSpacing();
175
176            if (Font::treatAsSpace(c)) {
177                // Account for padding. WebCore uses space padding to justify text.
178                // We distribute the specified padding over the available spaces in the run.
179                if (m_padding) {
180                    // Use left over padding if not evenly divisible by number of spaces.
181                    if (m_padding < m_padPerSpace) {
182                        width += m_padding;
183                        m_padding = 0;
184                    } else {
185                        float previousPadding = m_padding;
186                        m_padding -= m_padPerSpace;
187                        width += roundf(previousPadding) - roundf(m_padding);
188                    }
189                }
190
191                // Account for word spacing.
192                // We apply additional space between "words" by adding width to the space character.
193                if (currentCharacter != 0 && !Font::treatAsSpace(cp[-1]) && m_font->wordSpacing())
194                    width += m_font->wordSpacing();
195            }
196        }
197
198        if (m_accountForGlyphBounds) {
199            bounds = fontData->boundsForGlyph(glyph);
200            if (!currentCharacter)
201                m_firstGlyphOverflow = max<float>(0, -bounds.x());
202        }
203
204        if (m_forTextEmphasis && !Font::canReceiveTextEmphasis(c))
205            glyph = 0;
206
207        // Advance past the character we just dealt with.
208        cp += clusterLength;
209        currentCharacter += clusterLength;
210
211        // Account for float/integer impedance mismatch between CG and KHTML. "Words" (characters
212        // followed by a character defined by isRoundingHackCharacter()) are always an integer width.
213        // We adjust the width of the last character of a "word" to ensure an integer width.
214        // If we move KHTML to floats we can remove this (and related) hacks.
215
216        float oldWidth = width;
217
218        // Force characters that are used to determine word boundaries for the rounding hack
219        // to be integer width, so following words will start on an integer boundary.
220        if (m_run.applyWordRounding() && Font::isRoundingHackCharacter(c)) {
221            width = ceilf(width);
222
223            // Since widthSinceLastRounding can lose precision if we include measurements for
224            // preceding whitespace, we bypass it here.
225            m_runWidthSoFar += width;
226
227            // Since this is a rounding hack character, we should have reset this sum on the previous
228            // iteration.
229            ASSERT(!widthSinceLastRounding);
230        } else {
231            // Check to see if the next character is a "rounding hack character", if so, adjust
232            // width so that the total run width will be on an integer boundary.
233            if ((m_run.applyWordRounding() && currentCharacter < m_run.length() && Font::isRoundingHackCharacter(*cp))
234                    || (m_run.applyRunRounding() && currentCharacter >= m_end)) {
235                float totalWidth = widthSinceLastRounding + width;
236                widthSinceLastRounding = ceilf(totalWidth);
237                width += widthSinceLastRounding - totalWidth;
238                m_runWidthSoFar += widthSinceLastRounding;
239                widthSinceLastRounding = 0;
240            } else
241                widthSinceLastRounding += width;
242        }
243
244        if (glyphBuffer)
245            glyphBuffer->add(glyph, fontData, (rtl ? oldWidth + lastRoundingWidth : width));
246
247        lastRoundingWidth = width - oldWidth;
248
249        if (m_accountForGlyphBounds) {
250            m_maxGlyphBoundingBoxY = max(m_maxGlyphBoundingBoxY, bounds.bottom());
251            m_minGlyphBoundingBoxY = min(m_minGlyphBoundingBoxY, bounds.y());
252            m_lastGlyphOverflow = max<float>(0, bounds.right() - width);
253        }
254    }
255
256    m_currentCharacter = currentCharacter;
257    m_runWidthSoFar += widthSinceLastRounding;
258    m_finalRoundingWidth = lastRoundingWidth;
259}
260
261bool WidthIterator::advanceOneCharacter(float& width, GlyphBuffer* glyphBuffer)
262{
263    glyphBuffer->clear();
264    advance(m_currentCharacter + 1, glyphBuffer);
265    float w = 0;
266    for (int i = 0; i < glyphBuffer->size(); ++i)
267        w += glyphBuffer->advanceAt(i);
268    width = w;
269    return !glyphBuffer->isEmpty();
270}
271
272UChar32 WidthIterator::normalizeVoicingMarks(int currentCharacter)
273{
274    if (currentCharacter + 1 < m_end) {
275        if (combiningClass(m_run[currentCharacter + 1]) == hiraganaKatakanaVoicingMarksCombiningClass) {
276#if USE(ICU_UNICODE)
277            // Normalize into composed form using 3.2 rules.
278            UChar normalizedCharacters[2] = { 0, 0 };
279            UErrorCode uStatus = U_ZERO_ERROR;
280            int32_t resultLength = unorm_normalize(m_run.data(currentCharacter), 2,
281                UNORM_NFC, UNORM_UNICODE_3_2, &normalizedCharacters[0], 2, &uStatus);
282            if (resultLength == 1 && uStatus == 0)
283                return normalizedCharacters[0];
284#elif USE(QT4_UNICODE)
285            QString tmp(reinterpret_cast<const QChar*>(m_run.data(currentCharacter)), 2);
286            QString res = tmp.normalized(QString::NormalizationForm_C, QChar::Unicode_3_2);
287            if (res.length() == 1)
288                return res.at(0).unicode();
289#endif
290        }
291    }
292    return 0;
293}
294
295}
296