UniscribeHelper.h revision 2daae5fd11344eaa88a0d92b0f6d65f8d2255c00
1/*
2 * Copyright (c) 2006, 2007, 2008, 2009, Google Inc. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are
6 * met:
7 *
8 *     * Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 *     * Redistributions in binary form must reproduce the above
11 * copyright notice, this list of conditions and the following disclaimer
12 * in the documentation and/or other materials provided with the
13 * distribution.
14 *     * Neither the name of Google Inc. nor the names of its
15 * contributors may be used to endorse or promote products derived from
16 * this software without specific prior written permission.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 */
30
31// A wrapper around Uniscribe that provides a reasonable API.
32
33#ifndef UniscribeHelper_h
34#define UniscribeHelper_h
35
36#include <windows.h>
37#include <usp10.h>
38#include <map>
39
40#include <unicode/uchar.h>
41#include <wtf/Vector.h>
42
43class UniscribeTest_TooBig_Test;  // A gunit test for UniscribeHelper.
44
45namespace WebCore {
46
47class GraphicsContext;
48
49#define UNISCRIBE_HELPER_STACK_RUNS 8
50#define UNISCRIBE_HELPER_STACK_CHARS 32
51
52// This object should be safe to create & destroy frequently, as long as the
53// caller preserves the script_cache when possible (this data may be slow to
54// compute).
55//
56// This object is "kind of large" (~1K) because it reserves a lot of space for
57// working with to avoid expensive heap operations. Therefore, not only should
58// you not worry about creating and destroying it, you should try to not keep
59// them around.
60class UniscribeHelper {
61public:
62    // Initializes this Uniscribe run with the text pointed to by |run| with
63    // |length|. The input is NOT null terminated.
64    //
65    // The is_rtl flag should be set if the input script is RTL. It is assumed
66    // that the caller has already divided up the input text (using ICU, for
67    // example) into runs of the same direction of script. This avoids
68    // disagreements between the caller and Uniscribe later (see FillItems).
69    //
70    // A script cache should be provided by the caller that is initialized to
71    // NULL. When the caller is done with the cache (it may be stored between
72    // runs as long as it is used consistently with the same HFONT), it should
73    // call ScriptFreeCache().
74    UniscribeHelper(const UChar* input,
75                    int inputLength,
76                    bool isRtl,
77                    HFONT,
78                    SCRIPT_CACHE*,
79                    SCRIPT_FONTPROPERTIES*,
80                    WORD);
81
82    virtual ~UniscribeHelper();
83
84    // Sets Uniscribe's directional override flag. False by default.
85    bool directionalOverride() const
86    {
87        return m_directionalOverride;
88    }
89    void setDirectionalOverride(bool override)
90    {
91        m_directionalOverride = override;
92    }
93
94    // Set's Uniscribe's no-ligate override flag. False by default.
95    bool inhibitLigate() const
96    {
97        return m_inhibitLigate;
98    }
99    void setInhibitLigate(bool inhibit)
100    {
101        m_inhibitLigate = inhibit;
102    }
103
104    // Set letter spacing. We will try to insert this much space between
105    // graphemes (one or more glyphs perceived as a single unit by ordinary
106    // users of a script). Positive values increase letter spacing, negative
107    // values decrease it. 0 by default.
108    int letterSpacing() const
109    {
110        return m_letterSpacing;
111    }
112    void setLetterSpacing(int letterSpacing)
113    {
114        m_letterSpacing = letterSpacing;
115    }
116
117    // Set the width of a standard space character. We use this to normalize
118    // space widths. Windows will make spaces after Hindi characters larger than
119    // other spaces. A space_width of 0 means to use the default space width.
120    //
121    // Must be set before Init() is called.
122    int spaceWidth() const
123    {
124        return m_spaceWidth;
125    }
126    void setSpaceWidth(int spaceWidth)
127    {
128        m_spaceWidth = spaceWidth;
129    }
130
131    // Set word spacing. We will try to insert this much extra space between
132    // each word in the input (beyond whatever whitespace character separates
133    // words). Positive values lead to increased letter spacing, negative values
134    // decrease it. 0 by default.
135    //
136    // Must be set before Init() is called.
137    int wordSpacing() const
138    {
139        return m_wordSpacing;
140    }
141    void setWordSpacing(int wordSpacing)
142    {
143        m_wordSpacing = wordSpacing;
144    }
145
146    void setAscent(int ascent)
147    {
148        m_ascent = ascent;
149    }
150
151    // When set to true, this class is used only to look up glyph
152    // indices for a range of Unicode characters without glyph placement.
153    // By default, it's false. This should be set to true when this
154    // class is used for glyph index look-up for non-BMP characters
155    // in GlyphPageNodeChromiumWin.cpp.
156    void setDisableFontFallback(bool disableFontFallback)
157    {
158        m_disableFontFallback = true;
159    }
160
161    // You must call this after setting any options but before doing any
162    // other calls like asking for widths or drawing.
163    void init()
164    {
165        initWithOptionalLengthProtection(true);
166    }
167
168    // Returns the total width in pixels of the text run.
169    int width() const;
170
171    // Call to justify the text, with the amount of space that should be ADDED
172    // to get the desired width that the column should be justified to.
173    // Normally, spaces are inserted, but for Arabic there will be kashidas
174    // (extra strokes) inserted instead.
175    //
176    // This function MUST be called AFTER Init().
177    void justify(int additionalSpace);
178
179    // Computes the given character offset into a pixel offset of the beginning
180    // of that character.
181    int characterToX(int offset) const;
182
183    // Converts the given pixel X position into a logical character offset into
184    // the run. For positions appearing before the first character, this will
185    // return -1.
186    int xToCharacter(int x) const;
187
188    // Draws the given characters to (x, y) in the given DC. The font will be
189    // handled by this function, but the font color and other attributes should
190    // be pre-set.
191    //
192    // The y position is the upper left corner, NOT the baseline.
193    void draw(GraphicsContext* graphicsContext, HDC dc, int x, int y, int from,
194              int to);
195
196    // Returns the first glyph assigned to the character at the given offset.
197    // This function is used to retrieve glyph information when Uniscribe is
198    // being used to generate glyphs for non-complex, non-BMP (above U+FFFF)
199    // characters. These characters are not otherwise special and have no
200    // complex shaping rules, so we don't otherwise need Uniscribe, except
201    // Uniscribe is the only way to get glyphs for non-BMP characters.
202    //
203    // Returns 0 if there is no glyph for the given character.
204    WORD firstGlyphForCharacter(int charOffset) const;
205
206protected:
207    // Backend for init. The flag allows the unit test to specify whether we
208    // should fail early for very long strings like normal, or try to pass the
209    // long string to Uniscribe. The latter provides a way to force failure of
210    // shaping.
211    void initWithOptionalLengthProtection(bool lengthProtection);
212
213    // Tries to preload the font when the it is not accessible.
214    // This is the default implementation and it does not do anything.
215    virtual void tryToPreloadFont(HFONT) {}
216
217private:
218    friend class UniscribeTest_TooBig_Test;
219
220    // An array corresponding to each item in runs_ containing information
221    // on each of the glyphs that were generated. Like runs_, this is in
222    // reading order. However, for rtl text, the characters within each
223    // item will be reversed.
224    struct Shaping {
225        Shaping()
226            : m_prePadding(0)
227            , m_hfont(NULL)
228            , m_scriptCache(NULL)
229            , m_ascentOffset(0)
230            , m_spaceGlyph(0)
231        {
232            m_abc.abcA = 0;
233            m_abc.abcB = 0;
234            m_abc.abcC = 0;
235        }
236
237        // Returns the number of glyphs (which will be drawn to the screen)
238        // in this run.
239        int glyphLength() const
240        {
241            return static_cast<int>(m_glyphs.size());
242        }
243
244        // Returns the number of characters (that we started with) in this run.
245        int charLength() const
246        {
247            return static_cast<int>(m_logs.size());
248        }
249
250        // Returns the advance array that should be used when measuring glyphs.
251        // The returned pointer will indicate an array with glyph_length()
252        // elements and the advance that should be used for each one. This is
253        // either the real advance, or the justified advances if there is one,
254        // and is the array we want to use for measurement.
255        const int* effectiveAdvances() const
256        {
257            if (m_advance.size() == 0)
258                return 0;
259            if (m_justify.size() == 0)
260                return &m_advance[0];
261            return &m_justify[0];
262        }
263
264        // This is the advance amount of space that we have added to the
265        // beginning of the run. It is like the ABC's |A| advance but one that
266        // we create and must handle internally whenever computing with pixel
267        // offsets.
268        int m_prePadding;
269
270        // Glyph indices in the font used to display this item. These indices
271        // are in screen order.
272        Vector<WORD, UNISCRIBE_HELPER_STACK_CHARS> m_glyphs;
273
274        // For each input character, this tells us the first glyph index it
275        // generated. This is the only array with size of the input chars.
276        //
277        // All offsets are from the beginning of this run. Multiple characters
278        // can generate one glyph, in which case there will be adjacent
279        // duplicates in this list. One character can also generate multiple
280        // glyphs, in which case there will be skipped indices in this list.
281        Vector<WORD, UNISCRIBE_HELPER_STACK_CHARS> m_logs;
282
283        // Flags and such for each glyph.
284        Vector<SCRIPT_VISATTR, UNISCRIBE_HELPER_STACK_CHARS> m_visualAttributes;
285
286        // Horizontal advances for each glyph listed above, this is basically
287        // how wide each glyph is.
288        Vector<int, UNISCRIBE_HELPER_STACK_CHARS> m_advance;
289
290        // This contains glyph offsets, from the nominal position of a glyph.
291        // It is used to adjust the positions of multiple combining characters
292        // around/above/below base characters in a context-sensitive manner so
293        // that they don't bump against each other and the base character.
294        Vector<GOFFSET, UNISCRIBE_HELPER_STACK_CHARS> m_offsets;
295
296        // Filled by a call to Justify, this is empty for nonjustified text.
297        // If nonempty, this contains the array of justify characters for each
298        // character as returned by ScriptJustify.
299        //
300        // This is the same as the advance array, but with extra space added
301        // for some characters. The difference between a glyph's |justify|
302        // width and it's |advance| width is the extra space added.
303        Vector<int, UNISCRIBE_HELPER_STACK_CHARS> m_justify;
304
305        // Sizing information for this run. This treats the entire run as a
306        // character with a preceeding advance, width, and ending advance.  The
307        // B width is the sum of the |advance| array, and the A and C widths
308        // are any extra spacing applied to each end.
309        //
310        // It is unclear from the documentation what this actually means. From
311        // experimentation, it seems that the sum of the character advances is
312        // always the sum of the ABC values, and I'm not sure what you're
313        // supposed to do with the ABC values.
314        ABC m_abc;
315
316        // Pointers to windows font data used to render this run.
317        HFONT m_hfont;
318        SCRIPT_CACHE* m_scriptCache;
319
320        // Ascent offset between the ascent of the primary font
321        // and that of the fallback font. The offset needs to be applied,
322        // when drawing a string, to align multiple runs rendered with
323        // different fonts.
324        int m_ascentOffset;
325
326        WORD m_spaceGlyph;
327    };
328
329    // Computes the runs_ array from the text run.
330    void fillRuns();
331
332    // Computes the shapes_ array given an runs_ array already filled in.
333    void fillShapes();
334
335    // Fills in the screen_order_ array (see below).
336    void fillScreenOrder();
337
338    // Called to update the glyph positions based on the current spacing
339    // options that are set.
340    void applySpacing();
341
342    // Normalizes all advances for spaces to the same width. This keeps windows
343    // from making spaces after Hindi characters larger, which is then
344    // inconsistent with our meaure of the width since WebKit doesn't include
345    // spaces in text-runs sent to uniscribe unless white-space:pre.
346    void adjustSpaceAdvances();
347
348    // Returns the total width of a single item.
349    int advanceForItem(int) const;
350
351    bool containsMissingGlyphs(const Shaping&,
352                               const SCRIPT_ITEM&,
353                               const SCRIPT_FONTPROPERTIES*) const;
354
355    // Shapes a run (pointed to by |input|) using |hfont| first.
356    // Tries a series of fonts specified retrieved with NextWinFontData
357    // and finally a font covering characters in |*input|. A string pointed
358    // by |input| comes from ScriptItemize and is supposed to contain
359    // characters belonging to a single script aside from characters common to
360    // all scripts (e.g. space).
361    bool shape(const UChar* input, int itemLength, int numGlyphs, SCRIPT_ITEM& run, Shaping&);
362
363    // Gets Windows font data for the next best font to try in the list
364    // of fonts. When there's no more font available, returns false
365    // without touching any of out params. Need to call ResetFontIndex
366    // to start scanning of the font list from the beginning.
367    virtual bool nextWinFontData(HFONT*, SCRIPT_CACHE**, SCRIPT_FONTPROPERTIES**, int* ascent)
368    {
369        return false;
370    }
371
372    // Resets the font index to the first in the list of fonts to try after the
373    // primaryFont turns out not to work. With fontIndex reset,
374    // NextWinFontData scans fallback fonts from the beginning.
375    virtual void resetFontIndex() {}
376
377    // The input data for this run of Uniscribe. See the constructor.
378    const UChar* m_input;
379    const int m_inputLength;
380    const bool m_isRtl;
381
382    // Windows font data for the primary font. In a sense, m_logfont and m_style
383    // are redundant because m_hfont contains all the information. However,
384    // invoking GetObject, everytime we need the height and the style, is rather
385    // expensive so that we cache them. Would it be better to add getter and
386    // (virtual) setter for the height and the style of the primary font,
387    // instead of m_logfont? Then, a derived class ctor can set m_ascent,
388    // m_height and m_style if they're known. Getters for them would have to
389    // 'infer' their values from m_hfont ONLY when they're not set.
390    HFONT m_hfont;
391    SCRIPT_CACHE* m_scriptCache;
392    SCRIPT_FONTPROPERTIES* m_fontProperties;
393    int m_ascent;
394    LOGFONT m_logfont;
395    int m_style;
396    WORD m_spaceGlyph;
397
398    // Options, see the getters/setters above.
399    bool m_directionalOverride;
400    bool m_inhibitLigate;
401    int m_letterSpacing;
402    int m_spaceWidth;
403    int m_wordSpacing;
404    bool m_disableFontFallback;
405
406    // Uniscribe breaks the text into Runs. These are one length of text that is
407    // in one script and one direction. This array is in reading order.
408    Vector<SCRIPT_ITEM, UNISCRIBE_HELPER_STACK_RUNS> m_runs;
409
410    Vector<Shaping, UNISCRIBE_HELPER_STACK_RUNS> m_shapes;
411
412    // This is a mapping between reading order and screen order for the items.
413    // Uniscribe's items array are in reading order. For right-to-left text,
414    // or mixed (although WebKit's |TextRun| should really be only one
415    // direction), this makes it very difficult to compute character offsets
416    // and positions. This list is in screen order from left to right, and
417    // gives the index into the |m_runs| and |m_shapes| arrays of each
418    // subsequent item.
419    Vector<int, UNISCRIBE_HELPER_STACK_RUNS> m_screenOrder;
420};
421
422}  // namespace WebCore
423
424#endif  // UniscribeHelper_h
425