16f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/**
26f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ************************************************************************************
36f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Copyright (C) 2006-2012, International Business Machines Corporation and others. *
46f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * All Rights Reserved.                                                             *
56f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ************************************************************************************
66f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */
76f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
86f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#ifndef BRKENG_H
96f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#define BRKENG_H
106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "unicode/utypes.h"
126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "unicode/uobject.h"
136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "unicode/utext.h"
146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "unicode/uscript.h"
156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_NAMESPACE_BEGIN
176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgclass UnicodeSet;
196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgclass UStack;
206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgclass DictionaryMatcher;
216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/*******************************************************************
236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * LanguageBreakEngine
246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */
256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/**
276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * <p>LanguageBreakEngines implement language-specific knowledge for
286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * finding text boundaries within a run of characters belonging to a
296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * specific set. The boundaries will be of a specific kind, e.g. word,
306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * line, etc.</p>
316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *
326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * <p>LanguageBreakEngines should normally be implemented so as to
336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * be shared between threads without locking.</p>
346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */
356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgclass LanguageBreakEngine : public UMemory {
366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org public:
376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  /**
396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org   * <p>Default constructor.</p>
406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org   *
416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org   */
426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  LanguageBreakEngine();
436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  /**
456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org   * <p>Virtual destructor.</p>
466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org   */
476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  virtual ~LanguageBreakEngine();
486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /**
506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  * <p>Indicate whether this engine handles a particular character for
516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  * a particular kind of break.</p>
526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  *
536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  * @param c A character which begins a run that the engine might handle
546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  * @param breakType The type of text break which the caller wants to determine
556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  * @return TRUE if this engine handles the particular character and break
566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  * type.
576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  */
586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  virtual UBool handles(UChar32 c, int32_t breakType) const = 0;
596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /**
616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  * <p>Find any breaks within a run in the supplied text.</p>
626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  *
636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  * @param text A UText representing the text. The
646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  * iterator is left at the end of the run of characters which the engine
656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  * is capable of handling.
666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  * @param startPos The start of the run within the supplied text.
676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  * @param endPos The end of the run within the supplied text.
686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  * @param reverse Whether the caller is looking for breaks in a reverse
696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  * direction.
706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  * @param breakType The type of break desired, or -1.
716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  * @param foundBreaks An allocated C array of the breaks found, if any
726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  * @return The number of breaks found.
736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  */
746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  virtual int32_t findBreaks( UText *text,
756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                              int32_t startPos,
766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                              int32_t endPos,
776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                              UBool reverse,
786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                              int32_t breakType,
796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                              UStack &foundBreaks ) const = 0;
806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org};
826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/*******************************************************************
846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * LanguageBreakFactory
856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */
866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/**
886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * <p>LanguageBreakFactorys find and return a LanguageBreakEngine
896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * that can determine breaks for characters in a specific set, if
906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * such an object can be found.</p>
916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *
926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * <p>If a LanguageBreakFactory is to be shared between threads,
936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * appropriate synchronization must be used; there is none internal
946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * to the factory.</p>
956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *
966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * <p>A LanguageBreakEngine returned by a LanguageBreakFactory can
976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * normally be shared between threads without synchronization, unless
986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * the specific subclass of LanguageBreakFactory indicates otherwise.</p>
996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *
1006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * <p>A LanguageBreakFactory is responsible for deleting any LanguageBreakEngine
1016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * it returns when it itself is deleted, unless the specific subclass of
1026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * LanguageBreakFactory indicates otherwise. Naturally, the factory should
1036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * not be deleted until the LanguageBreakEngines it has returned are no
1046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * longer needed.</p>
1056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */
1066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgclass LanguageBreakFactory : public UMemory {
1076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org public:
1086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  /**
1106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org   * <p>Default constructor.</p>
1116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org   *
1126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org   */
1136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  LanguageBreakFactory();
1146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  /**
1166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org   * <p>Virtual destructor.</p>
1176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org   */
1186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  virtual ~LanguageBreakFactory();
1196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /**
1216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  * <p>Find and return a LanguageBreakEngine that can find the desired
1226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  * kind of break for the set of characters to which the supplied
1236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  * character belongs. It is up to the set of available engines to
1246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  * determine what the sets of characters are.</p>
1256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  *
1266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  * @param c A character that begins a run for which a LanguageBreakEngine is
1276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  * sought.
1286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  * @param breakType The kind of text break for which a LanguageBreakEngine is
1296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  * sought.
1306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  * @return A LanguageBreakEngine with the desired characteristics, or 0.
1316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  */
1326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  virtual const LanguageBreakEngine *getEngineFor(UChar32 c, int32_t breakType) = 0;
1336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org};
1356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/*******************************************************************
1376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * UnhandledEngine
1386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */
1396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/**
1416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * <p>UnhandledEngine is a special subclass of LanguageBreakEngine that
1426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * handles characters that no other LanguageBreakEngine is available to
1436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * handle. It is told the character and the type of break; at its
1446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * discretion it may handle more than the specified character (e.g.,
1456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * the entire script to which that character belongs.</p>
1466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *
1476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * <p>UnhandledEngines may not be shared between threads without
1486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * external synchronization.</p>
1496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */
1506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgclass UnhandledEngine : public LanguageBreakEngine {
1526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org private:
1536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    /**
1556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * The sets of characters handled, for each break type
1566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * @internal
1576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     */
1586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  UnicodeSet    *fHandled[4];
1606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org public:
1626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  /**
1646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org   * <p>Default constructor.</p>
1656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org   *
1666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org   */
1676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  UnhandledEngine(UErrorCode &status);
1686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  /**
1706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org   * <p>Virtual destructor.</p>
1716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org   */
1726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  virtual ~UnhandledEngine();
1736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /**
1756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  * <p>Indicate whether this engine handles a particular character for
1766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  * a particular kind of break.</p>
1776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  *
1786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  * @param c A character which begins a run that the engine might handle
1796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  * @param breakType The type of text break which the caller wants to determine
1806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  * @return TRUE if this engine handles the particular character and break
1816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  * type.
1826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  */
1836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  virtual UBool handles(UChar32 c, int32_t breakType) const;
1846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /**
1866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  * <p>Find any breaks within a run in the supplied text.</p>
1876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  *
1886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  * @param text A UText representing the text (TODO: UText). The
1896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  * iterator is left at the end of the run of characters which the engine
1906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  * is capable of handling.
1916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  * @param startPos The start of the run within the supplied text.
1926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  * @param endPos The end of the run within the supplied text.
1936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  * @param reverse Whether the caller is looking for breaks in a reverse
1946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  * direction.
1956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  * @param breakType The type of break desired, or -1.
1966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  * @param foundBreaks An allocated C array of the breaks found, if any
1976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  * @return The number of breaks found.
1986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  */
1996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  virtual int32_t findBreaks( UText *text,
2006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                              int32_t startPos,
2016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                              int32_t endPos,
2026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                              UBool reverse,
2036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                              int32_t breakType,
2046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                              UStack &foundBreaks ) const;
2056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
2066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /**
2076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  * <p>Tell the engine to handle a particular character and break type.</p>
2086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  *
2096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  * @param c A character which the engine should handle
2106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  * @param breakType The type of text break for which the engine should handle c
2116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  */
2126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  virtual void handleCharacter(UChar32 c, int32_t breakType);
2136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
2146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org};
2156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
2166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/*******************************************************************
2176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * ICULanguageBreakFactory
2186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */
2196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
2206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/**
2216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * <p>ICULanguageBreakFactory is the default LanguageBreakFactory for
2226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * ICU. It creates dictionary-based LanguageBreakEngines from dictionary
2236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * data in the ICU data file.</p>
2246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */
2256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgclass ICULanguageBreakFactory : public LanguageBreakFactory {
2266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org private:
2276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
2286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    /**
2296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * The stack of break engines created by this factory
2306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * @internal
2316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     */
2326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
2336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  UStack    *fEngines;
2346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
2356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org public:
2366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
2376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  /**
2386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org   * <p>Standard constructor.</p>
2396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org   *
2406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org   */
2416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  ICULanguageBreakFactory(UErrorCode &status);
2426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
2436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  /**
2446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org   * <p>Virtual destructor.</p>
2456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org   */
2466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  virtual ~ICULanguageBreakFactory();
2476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
2486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /**
2496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  * <p>Find and return a LanguageBreakEngine that can find the desired
2506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  * kind of break for the set of characters to which the supplied
2516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  * character belongs. It is up to the set of available engines to
2526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  * determine what the sets of characters are.</p>
2536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  *
2546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  * @param c A character that begins a run for which a LanguageBreakEngine is
2556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  * sought.
2566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  * @param breakType The kind of text break for which a LanguageBreakEngine is
2576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  * sought.
2586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  * @return A LanguageBreakEngine with the desired characteristics, or 0.
2596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  */
2606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  virtual const LanguageBreakEngine *getEngineFor(UChar32 c, int32_t breakType);
2616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
2626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgprotected:
2636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /**
2646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  * <p>Create a LanguageBreakEngine for the set of characters to which
2656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  * the supplied character belongs, for the specified break type.</p>
2666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  *
2676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  * @param c A character that begins a run for which a LanguageBreakEngine is
2686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  * sought.
2696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  * @param breakType The kind of text break for which a LanguageBreakEngine is
2706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  * sought.
2716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  * @return A LanguageBreakEngine with the desired characteristics, or 0.
2726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  */
2736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  virtual const LanguageBreakEngine *loadEngineFor(UChar32 c, int32_t breakType);
2746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
2756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  /**
2766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org   * <p>Create a DictionaryMatcher for the specified script and break type.</p>
2776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org   * @param script An ISO 15924 script code that identifies the dictionary to be
2786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org   * created.
2796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org   * @param breakType The kind of text break for which a dictionary is
2806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org   * sought.
2816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org   * @return A DictionaryMatcher with the desired characteristics, or NULL.
2826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org   */
2836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  virtual DictionaryMatcher *loadDictionaryMatcherFor(UScriptCode script, int32_t breakType);
2846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org};
2856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
2866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_NAMESPACE_END
2876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
2886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    /* BRKENG_H */
2896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#endif
290