WidthIterator.cpp revision ab9e7a118cf1ea2e3a93dce683b2ded3e7291ddb
1/* 2 * Copyright (C) 2003, 2006, 2008, 2009, 2010 Apple Inc. All rights reserved. 3 * Copyright (C) 2008 Holger Hans Peter Freyther 4 * 5 * This library is free software; you can redistribute it and/or 6 * modify it under the terms of the GNU Library General Public 7 * License as published by the Free Software Foundation; either 8 * version 2 of the License, or (at your option) any later version. 9 * 10 * This library is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 * Library General Public License for more details. 14 * 15 * You should have received a copy of the GNU Library General Public License 16 * along with this library; see the file COPYING.LIB. If not, write to 17 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, 18 * Boston, MA 02110-1301, USA. 19 * 20 */ 21 22#include "config.h" 23#include "WidthIterator.h" 24 25#include "Font.h" 26#include "GlyphBuffer.h" 27#include "SimpleFontData.h" 28#include "TextRun.h" 29#include <wtf/MathExtras.h> 30 31#if USE(ICU_UNICODE) 32#include <unicode/unorm.h> 33#endif 34 35using namespace WTF; 36using namespace Unicode; 37using namespace std; 38 39namespace WebCore { 40 41// According to http://www.unicode.org/Public/UNIDATA/UCD.html#Canonical_Combining_Class_Values 42static const uint8_t hiraganaKatakanaVoicingMarksCombiningClass = 8; 43 44WidthIterator::WidthIterator(const Font* font, const TextRun& run, HashSet<const SimpleFontData*>* fallbackFonts, bool accountForGlyphBounds, bool forTextEmphasis) 45 : m_font(font) 46 , m_run(run) 47 , m_end(run.length()) 48 , m_currentCharacter(0) 49 , m_runWidthSoFar(0) 50 , m_finalRoundingWidth(0) 51 , m_fallbackFonts(fallbackFonts) 52 , m_accountForGlyphBounds(accountForGlyphBounds) 53 , m_maxGlyphBoundingBoxY(numeric_limits<float>::min()) 54 , m_minGlyphBoundingBoxY(numeric_limits<float>::max()) 55 , m_firstGlyphOverflow(0) 56 , m_lastGlyphOverflow(0) 57 , m_forTextEmphasis(forTextEmphasis) 58{ 59 // If the padding is non-zero, count the number of spaces in the run 60 // and divide that by the padding for per space addition. 61 m_padding = m_run.padding(); 62 if (!m_padding) 63 m_padPerSpace = 0; 64 else { 65 int numSpaces = 0; 66 for (int i = 0; i < run.length(); i++) { 67 if (Font::treatAsSpace(m_run[i])) 68 numSpaces++; 69 } 70 71 if (!numSpaces) 72 m_padPerSpace = 0; 73 else 74 m_padPerSpace = m_padding / numSpaces; 75 } 76} 77 78void WidthIterator::advance(int offset, GlyphBuffer* glyphBuffer) 79{ 80 if (offset > m_end) 81 offset = m_end; 82 83 int currentCharacter = m_currentCharacter; 84 const UChar* cp = m_run.data(currentCharacter); 85 86 bool rtl = m_run.rtl(); 87 bool hasExtraSpacing = (m_font->letterSpacing() || m_font->wordSpacing() || m_padding) && !m_run.spacingDisabled(); 88 89 float widthSinceLastRounding = m_runWidthSoFar; 90 m_runWidthSoFar = floorf(m_runWidthSoFar); 91 widthSinceLastRounding -= m_runWidthSoFar; 92 93 float lastRoundingWidth = m_finalRoundingWidth; 94 FloatRect bounds; 95 96 const SimpleFontData* primaryFont = m_font->primaryFont(); 97 const SimpleFontData* lastFontData = primaryFont; 98 99 while (currentCharacter < offset) { 100 UChar32 c = *cp; 101 unsigned clusterLength = 1; 102 if (c >= 0x3041) { 103 if (c <= 0x30FE) { 104 // Deal with Hiragana and Katakana voiced and semi-voiced syllables. 105 // Normalize into composed form, and then look for glyph with base + combined mark. 106 // Check above for character range to minimize performance impact. 107 UChar32 normalized = normalizeVoicingMarks(currentCharacter); 108 if (normalized) { 109 c = normalized; 110 clusterLength = 2; 111 } 112 } else if (U16_IS_SURROGATE(c)) { 113 if (!U16_IS_SURROGATE_LEAD(c)) 114 break; 115 116 // Do we have a surrogate pair? If so, determine the full Unicode (32 bit) 117 // code point before glyph lookup. 118 // Make sure we have another character and it's a low surrogate. 119 if (currentCharacter + 1 >= m_run.length()) 120 break; 121 UChar low = cp[1]; 122 if (!U16_IS_TRAIL(low)) 123 break; 124 c = U16_GET_SUPPLEMENTARY(c, low); 125 clusterLength = 2; 126 } 127 } 128 129 const GlyphData& glyphData = m_font->glyphDataForCharacter(c, rtl); 130 Glyph glyph = glyphData.glyph; 131 const SimpleFontData* fontData = glyphData.fontData; 132 133 ASSERT(fontData); 134 135 // Now that we have a glyph and font data, get its width. 136 float width; 137 if (c == '\t' && m_run.allowTabs()) { 138 float tabWidth = m_font->tabWidth(*fontData); 139 width = tabWidth - fmodf(m_run.xPos() + m_runWidthSoFar + widthSinceLastRounding, tabWidth); 140 } else { 141 width = fontData->widthForGlyph(glyph); 142 143#if ENABLE(SVG) 144 // SVG uses horizontalGlyphStretch(), when textLength is used to stretch/squeeze text. 145 width *= m_run.horizontalGlyphStretch(); 146#endif 147 148 // We special case spaces in two ways when applying word rounding. 149 // First, we round spaces to an adjusted width in all fonts. 150 // Second, in fixed-pitch fonts we ensure that all characters that 151 // match the width of the space character have the same width as the space character. 152 if (width == fontData->spaceWidth() && (fontData->pitch() == FixedPitch || glyph == fontData->spaceGlyph()) && m_run.applyWordRounding()) 153 width = fontData->adjustedSpaceWidth(); 154 } 155 156 if (fontData != lastFontData && width) { 157 lastFontData = fontData; 158 if (m_fallbackFonts && fontData != primaryFont) { 159 // FIXME: This does a little extra work that could be avoided if 160 // glyphDataForCharacter() returned whether it chose to use a small caps font. 161 if (!m_font->isSmallCaps() || c == toUpper(c)) 162 m_fallbackFonts->add(fontData); 163 else { 164 const GlyphData& uppercaseGlyphData = m_font->glyphDataForCharacter(toUpper(c), rtl); 165 if (uppercaseGlyphData.fontData != primaryFont) 166 m_fallbackFonts->add(uppercaseGlyphData.fontData); 167 } 168 } 169 } 170 171 if (hasExtraSpacing) { 172 // Account for letter-spacing. 173 if (width && m_font->letterSpacing()) 174 width += m_font->letterSpacing(); 175 176 if (Font::treatAsSpace(c)) { 177 // Account for padding. WebCore uses space padding to justify text. 178 // We distribute the specified padding over the available spaces in the run. 179 if (m_padding) { 180 // Use left over padding if not evenly divisible by number of spaces. 181 if (m_padding < m_padPerSpace) { 182 width += m_padding; 183 m_padding = 0; 184 } else { 185 float previousPadding = m_padding; 186 m_padding -= m_padPerSpace; 187 width += roundf(previousPadding) - roundf(m_padding); 188 } 189 } 190 191 // Account for word spacing. 192 // We apply additional space between "words" by adding width to the space character. 193 if (currentCharacter != 0 && !Font::treatAsSpace(cp[-1]) && m_font->wordSpacing()) 194 width += m_font->wordSpacing(); 195 } 196 } 197 198 if (m_accountForGlyphBounds) { 199 bounds = fontData->boundsForGlyph(glyph); 200 if (!currentCharacter) 201 m_firstGlyphOverflow = max<float>(0, -bounds.x()); 202 } 203 204 if (m_forTextEmphasis && !Font::canReceiveTextEmphasis(c)) 205 glyph = 0; 206 207 // Advance past the character we just dealt with. 208 cp += clusterLength; 209 currentCharacter += clusterLength; 210 211 // Account for float/integer impedance mismatch between CG and KHTML. "Words" (characters 212 // followed by a character defined by isRoundingHackCharacter()) are always an integer width. 213 // We adjust the width of the last character of a "word" to ensure an integer width. 214 // If we move KHTML to floats we can remove this (and related) hacks. 215 216 float oldWidth = width; 217 218 // Force characters that are used to determine word boundaries for the rounding hack 219 // to be integer width, so following words will start on an integer boundary. 220 if (m_run.applyWordRounding() && Font::isRoundingHackCharacter(c)) { 221 width = ceilf(width); 222 223 // Since widthSinceLastRounding can lose precision if we include measurements for 224 // preceding whitespace, we bypass it here. 225 m_runWidthSoFar += width; 226 227 // Since this is a rounding hack character, we should have reset this sum on the previous 228 // iteration. 229 ASSERT(!widthSinceLastRounding); 230 } else { 231 // Check to see if the next character is a "rounding hack character", if so, adjust 232 // width so that the total run width will be on an integer boundary. 233 if ((m_run.applyWordRounding() && currentCharacter < m_run.length() && Font::isRoundingHackCharacter(*cp)) 234 || (m_run.applyRunRounding() && currentCharacter >= m_end)) { 235 float totalWidth = widthSinceLastRounding + width; 236 widthSinceLastRounding = ceilf(totalWidth); 237 width += widthSinceLastRounding - totalWidth; 238 m_runWidthSoFar += widthSinceLastRounding; 239 widthSinceLastRounding = 0; 240 } else 241 widthSinceLastRounding += width; 242 } 243 244 if (glyphBuffer) 245 glyphBuffer->add(glyph, fontData, (rtl ? oldWidth + lastRoundingWidth : width)); 246 247 lastRoundingWidth = width - oldWidth; 248 249 if (m_accountForGlyphBounds) { 250 m_maxGlyphBoundingBoxY = max(m_maxGlyphBoundingBoxY, bounds.bottom()); 251 m_minGlyphBoundingBoxY = min(m_minGlyphBoundingBoxY, bounds.y()); 252 m_lastGlyphOverflow = max<float>(0, bounds.right() - width); 253 } 254 } 255 256 m_currentCharacter = currentCharacter; 257 m_runWidthSoFar += widthSinceLastRounding; 258 m_finalRoundingWidth = lastRoundingWidth; 259} 260 261bool WidthIterator::advanceOneCharacter(float& width, GlyphBuffer* glyphBuffer) 262{ 263 glyphBuffer->clear(); 264 advance(m_currentCharacter + 1, glyphBuffer); 265 float w = 0; 266 for (int i = 0; i < glyphBuffer->size(); ++i) 267 w += glyphBuffer->advanceAt(i); 268 width = w; 269 return !glyphBuffer->isEmpty(); 270} 271 272UChar32 WidthIterator::normalizeVoicingMarks(int currentCharacter) 273{ 274 if (currentCharacter + 1 < m_end) { 275 if (combiningClass(m_run[currentCharacter + 1]) == hiraganaKatakanaVoicingMarksCombiningClass) { 276#if USE(ICU_UNICODE) 277 // Normalize into composed form using 3.2 rules. 278 UChar normalizedCharacters[2] = { 0, 0 }; 279 UErrorCode uStatus = U_ZERO_ERROR; 280 int32_t resultLength = unorm_normalize(m_run.data(currentCharacter), 2, 281 UNORM_NFC, UNORM_UNICODE_3_2, &normalizedCharacters[0], 2, &uStatus); 282 if (resultLength == 1 && uStatus == 0) 283 return normalizedCharacters[0]; 284#elif USE(QT4_UNICODE) 285 QString tmp(reinterpret_cast<const QChar*>(m_run.data(currentCharacter)), 2); 286 QString res = tmp.normalized(QString::NormalizationForm_C, QChar::Unicode_3_2); 287 if (res.length() == 1) 288 return res.at(0).unicode(); 289#endif 290 } 291 } 292 return 0; 293} 294 295} 296