1/* 2 * Copyright (C) 2005, 2007, 2010 Apple Inc. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions 6 * are met: 7 * 1. Redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer. 9 * 2. Redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution. 12 * 13 * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS'' 14 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 15 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 16 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS 17 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 18 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 19 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 20 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 21 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 22 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF 23 * THE POSSIBILITY OF SUCH DAMAGE. 24 */ 25 26#include "config.h" 27#include "break_lines.h" 28 29#include "CharacterNames.h" 30#include "TextBreakIterator.h" 31 32#if PLATFORM(MAC) 33#include <CoreServices/CoreServices.h> 34#endif 35 36namespace WebCore { 37 38static inline bool isBreakableSpace(UChar ch, bool treatNoBreakSpaceAsBreak) 39{ 40 switch (ch) { 41 case ' ': 42 case '\n': 43 case '\t': 44 return true; 45 case noBreakSpace: 46 return treatNoBreakSpaceAsBreak; 47 default: 48 return false; 49 } 50} 51 52// This differs from the Unicode algorithm only in that Unicode does not break 53// between a question mark and a vertical line (U+007C). 54static const unsigned char internetExplorerLineBreaksAfterQuestionMarkTable[0x80] = { 55 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, // \t 56 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 57 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, // ! " ' ) , . / 58 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, // : ; ? 59 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 60 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, // ] 61 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 62 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1 // } 63}; 64 65static const size_t internetExplorerLineBreaksAfterQuestionMarkTableSize = sizeof(internetExplorerLineBreaksAfterQuestionMarkTable) / sizeof(*internetExplorerLineBreaksAfterQuestionMarkTable); 66 67static inline bool shouldBreakAfter(UChar ch, UChar nextCh) 68{ 69 switch (ch) { 70 // For a question mark preceding a non-ASCII characters, defer to the Unicode algorithm by returning false. 71 // For ASCII characters, use a lookup table for enhanced speed and for compatibility with Internet Explorer. 72 case '?': 73 return nextCh < internetExplorerLineBreaksAfterQuestionMarkTableSize && internetExplorerLineBreaksAfterQuestionMarkTable[nextCh]; 74 // Internet Explorer always allows breaking after a hyphen. 75 case '-': 76 case softHyphen: 77 // FIXME: cases for ideographicComma and ideographicFullStop are a workaround for an issue in Unicode 5.0 78 // which is likely to be resolved in Unicode 5.1 <http://bugs.webkit.org/show_bug.cgi?id=17411>. 79 // We may want to remove or conditionalize this workaround at some point. 80 case ideographicComma: 81 case ideographicFullStop: 82#ifdef ANDROID_LAYOUT 83 // as '/' is used in uri which is always long, we would like to break it 84 case '/': 85#endif 86 return true; 87 default: 88 return false; 89 } 90} 91 92static inline bool needsLineBreakIterator(UChar ch) 93{ 94 return ch > 0x7F && ch != noBreakSpace; 95} 96 97#if PLATFORM(MAC) && defined(BUILDING_ON_TIGER) 98static inline TextBreakLocatorRef lineBreakLocator() 99{ 100 TextBreakLocatorRef locator = 0; 101 UCCreateTextBreakLocator(0, 0, kUCTextBreakLineMask, &locator); 102 return locator; 103} 104#endif 105 106int nextBreakablePosition(const UChar* str, int pos, int len, bool treatNoBreakSpaceAsBreak) 107{ 108#if !PLATFORM(MAC) || !defined(BUILDING_ON_TIGER) 109 TextBreakIterator* breakIterator = 0; 110#endif 111 int nextBreak = -1; 112 113 UChar lastCh = pos > 0 ? str[pos - 1] : 0; 114 for (int i = pos; i < len; i++) { 115 UChar ch = str[i]; 116 117 if (isBreakableSpace(ch, treatNoBreakSpaceAsBreak) || shouldBreakAfter(lastCh, ch)) 118 return i; 119 120 if (needsLineBreakIterator(ch) || needsLineBreakIterator(lastCh)) { 121 if (nextBreak < i && i) { 122#if !PLATFORM(MAC) || !defined(BUILDING_ON_TIGER) 123 if (!breakIterator) 124 breakIterator = lineBreakIterator(str, len); 125 if (breakIterator) 126 nextBreak = textBreakFollowing(breakIterator, i - 1); 127#else 128 static TextBreakLocatorRef breakLocator = lineBreakLocator(); 129 if (breakLocator) { 130 UniCharArrayOffset nextUCBreak; 131 if (UCFindTextBreak(breakLocator, kUCTextBreakLineMask, 0, str, len, i, &nextUCBreak) == 0) 132 nextBreak = nextUCBreak; 133 } 134#endif 135 } 136 if (i == nextBreak && !isBreakableSpace(lastCh, treatNoBreakSpaceAsBreak)) 137 return i; 138 } 139 140 lastCh = ch; 141 } 142 143 return len; 144} 145 146} // namespace WebCore 147