1/*
2 * Copyright (C) 2005, 2007, 2010 Apple Inc. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 * 1. Redistributions of source code must retain the above copyright
8 *    notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 *    notice, this list of conditions and the following disclaimer in the
11 *    documentation and/or other materials provided with the distribution.
12 *
13 * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS''
14 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
15 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS
17 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
18 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
19 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
20 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
21 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
22 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
23 * THE POSSIBILITY OF SUCH DAMAGE.
24 */
25
26#include "config.h"
27#include "break_lines.h"
28
29#include "CharacterNames.h"
30#include "TextBreakIterator.h"
31
32#if PLATFORM(MAC)
33#include <CoreServices/CoreServices.h>
34#endif
35
36namespace WebCore {
37
38static inline bool isBreakableSpace(UChar ch, bool treatNoBreakSpaceAsBreak)
39{
40    switch (ch) {
41        case ' ':
42        case '\n':
43        case '\t':
44            return true;
45        case noBreakSpace:
46            return treatNoBreakSpaceAsBreak;
47        default:
48            return false;
49    }
50}
51
52// This differs from the Unicode algorithm only in that Unicode does not break
53// between a question mark and a vertical line (U+007C).
54static const unsigned char internetExplorerLineBreaksAfterQuestionMarkTable[0x80] = {
55    1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, // \t
56    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
57    1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, // ! " ' ) , . /
58    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, // : ; ?
59    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
60    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, // ]
61    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
62    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1  // }
63};
64
65static const size_t internetExplorerLineBreaksAfterQuestionMarkTableSize = sizeof(internetExplorerLineBreaksAfterQuestionMarkTable) / sizeof(*internetExplorerLineBreaksAfterQuestionMarkTable);
66
67static inline bool shouldBreakAfter(UChar ch, UChar nextCh)
68{
69    switch (ch) {
70        // For a question mark preceding a non-ASCII characters, defer to the Unicode algorithm by returning false.
71        // For ASCII characters, use a lookup table for enhanced speed and for compatibility with Internet Explorer.
72        case '?':
73            return nextCh < internetExplorerLineBreaksAfterQuestionMarkTableSize && internetExplorerLineBreaksAfterQuestionMarkTable[nextCh];
74        // Internet Explorer always allows breaking after a hyphen.
75        case '-':
76        case softHyphen:
77        // FIXME: cases for ideographicComma and ideographicFullStop are a workaround for an issue in Unicode 5.0
78        // which is likely to be resolved in Unicode 5.1 <http://bugs.webkit.org/show_bug.cgi?id=17411>.
79        // We may want to remove or conditionalize this workaround at some point.
80        case ideographicComma:
81        case ideographicFullStop:
82#ifdef ANDROID_LAYOUT
83        // as '/' is used in uri which is always long, we would like to break it
84        case '/':
85#endif
86            return true;
87        default:
88            return false;
89    }
90}
91
92static inline bool needsLineBreakIterator(UChar ch)
93{
94    return ch > 0x7F && ch != noBreakSpace;
95}
96
97#if PLATFORM(MAC) && defined(BUILDING_ON_TIGER)
98static inline TextBreakLocatorRef lineBreakLocator()
99{
100    TextBreakLocatorRef locator = 0;
101    UCCreateTextBreakLocator(0, 0, kUCTextBreakLineMask, &locator);
102    return locator;
103}
104#endif
105
106int nextBreakablePosition(const UChar* str, int pos, int len, bool treatNoBreakSpaceAsBreak)
107{
108#if !PLATFORM(MAC) || !defined(BUILDING_ON_TIGER)
109    TextBreakIterator* breakIterator = 0;
110#endif
111    int nextBreak = -1;
112
113    UChar lastCh = pos > 0 ? str[pos - 1] : 0;
114    for (int i = pos; i < len; i++) {
115        UChar ch = str[i];
116
117        if (isBreakableSpace(ch, treatNoBreakSpaceAsBreak) || shouldBreakAfter(lastCh, ch))
118            return i;
119
120        if (needsLineBreakIterator(ch) || needsLineBreakIterator(lastCh)) {
121            if (nextBreak < i && i) {
122#if !PLATFORM(MAC) || !defined(BUILDING_ON_TIGER)
123                if (!breakIterator)
124                    breakIterator = lineBreakIterator(str, len);
125                if (breakIterator)
126                    nextBreak = textBreakFollowing(breakIterator, i - 1);
127#else
128                static TextBreakLocatorRef breakLocator = lineBreakLocator();
129                if (breakLocator) {
130                    UniCharArrayOffset nextUCBreak;
131                    if (UCFindTextBreak(breakLocator, kUCTextBreakLineMask, 0, str, len, i, &nextUCBreak) == 0)
132                        nextBreak = nextUCBreak;
133                }
134#endif
135            }
136            if (i == nextBreak && !isBreakableSpace(lastCh, treatNoBreakSpaceAsBreak))
137                return i;
138        }
139
140        lastCh = ch;
141    }
142
143    return len;
144}
145
146} // namespace WebCore
147