15db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang/* Libhnj is dual licensed under LGPL and MPL. Boilerplate for both
25db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang * licenses follows.
35db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang */
45db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang
55db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang/* LibHnj - a library for high quality hyphenation and justification
65db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang * Copyright (C) 1998 Raph Levien,
75db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang * 	     (C) 2001 ALTLinux, Moscow (http://www.alt-linux.org),
85db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang *           (C) 2001 Peter Novodvorsky (nidd@cs.msu.su)
95db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang *           (C) 2006, 2007, 2008 László Németh (nemeth at OOo)
105db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang *
115db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang * This library is free software; you can redistribute it and/or
125db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang * modify it under the terms of the GNU Library General Public
135db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang * License as published by the Free Software Foundation; either
145db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang * version 2 of the License, or (at your option) any later version.
155db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang *
165db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang * This library is distributed in the hope that it will be useful,
175db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang * but WITHOUT ANY WARRANTY; without even the implied warranty of
185db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
195db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang * Library General Public License for more details.
205db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang *
215db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang * You should have received a copy of the GNU Library General Public
225db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang * License along with this library; if not, write to the
235db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
245db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang * Boston, MA  02111-1307  USA.
255db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang */
265db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang
275db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang/*
285db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang * The contents of this file are subject to the Mozilla Public License
295db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang * Version 1.0 (the "MPL"); you may not use this file except in
305db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang * compliance with the MPL.  You may obtain a copy of the MPL at
315db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang * http://www.mozilla.org/MPL/
325db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang *
335db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang * Software distributed under the MPL is distributed on an "AS IS" basis,
345db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the MPL
355db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang * for the specific language governing rights and limitations under the
365db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang * MPL.
375db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang *
385db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang */
394760752af1c9b7507b51917ff4e4d8eb0491e353Shimeng (Simon) Wang#include <fcntl.h>
404760752af1c9b7507b51917ff4e4d8eb0491e353Shimeng (Simon) Wang#include <sys/mman.h>
414760752af1c9b7507b51917ff4e4d8eb0491e353Shimeng (Simon) Wang#include <sys/stat.h>
425db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang#include <stdlib.h> /* for NULL, malloc */
435db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang#include <stdio.h>  /* for fprintf */
445db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang#include <string.h> /* for strdup */
454760752af1c9b7507b51917ff4e4d8eb0491e353Shimeng (Simon) Wang#include <unistd.h> /* for close */
465db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang
475db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang#define noVERBOSE
485db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang
495db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang#include "hnjalloc.h"
505db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang#include "hyphen.h"
515db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang
525db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wangstatic char *
535db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wanghnj_strdup (const char *s)
545db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang{
555db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    char *new;
565db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    int l;
575db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang
585db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    l = strlen (s);
595db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    new = hnj_malloc (l + 1);
605db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    memcpy (new, s, l);
615db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    new[l] = 0;
625db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    return new;
635db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang}
645db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang
655db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang/* remove cross-platform text line end characters */
665db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wangvoid hnj_strchomp(char * s)
675db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang{
685db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    int k = strlen(s);
695db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    if ((k > 0) && ((*(s+k-1)=='\r') || (*(s+k-1)=='\n'))) *(s+k-1) = '\0';
705db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    if ((k > 1) && (*(s+k-2) == '\r')) *(s+k-2) = '\0';
715db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang}
725db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang
735db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang/* a little bit of a hash table implementation. This simply maps strings
745db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang   to state numbers */
755db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang
765db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wangtypedef struct _HashTab HashTab;
775db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wangtypedef struct _HashEntry HashEntry;
785db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang
795db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang/* A cheap, but effective, hack. */
805db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang#define HASH_SIZE 31627
815db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang
825db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wangstruct _HashTab {
835db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    HashEntry *entries[HASH_SIZE];
845db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang};
855db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang
865db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wangstruct _HashEntry {
875db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    HashEntry *next;
885db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    char *key;
895db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    int val;
905db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang};
915db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang
925db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang/* a char* hash function from ASU - adapted from Gtk+ */
935db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wangstatic unsigned int
945db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wanghnj_string_hash (const char *s)
955db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang{
965db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    const char *p;
975db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    unsigned int h=0, g;
985db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    for(p = s; *p != '\0'; p += 1) {
995db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang        h = ( h << 4 ) + *p;
1005db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang        if ( ( g = h & 0xf0000000 ) ) {
1015db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang            h = h ^ (g >> 24);
1025db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang            h = h ^ g;
1035db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang        }
1045db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    }
1055db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    return h /* % M */;
1065db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang}
1075db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang
1085db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wangstatic HashTab *
1095db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wanghnj_hash_new (void)
1105db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang{
1115db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    HashTab *hashtab;
1125db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    int i;
1135db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang
1145db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    hashtab = hnj_malloc (sizeof(HashTab));
1155db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    for (i = 0; i < HASH_SIZE; i++)
1165db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang        hashtab->entries[i] = NULL;
1175db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang
1185db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    return hashtab;
1195db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang}
1205db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang
1215db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wangstatic void
1225db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wanghnj_hash_free (HashTab *hashtab)
1235db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang{
1245db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    int i;
1255db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    HashEntry *e, *next;
1265db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang
1275db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    for (i = 0; i < HASH_SIZE; i++)
1285db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang        for (e = hashtab->entries[i]; e; e = next)
1295db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang        {
1305db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang            next = e->next;
1315db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang            hnj_free (e->key);
1325db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang            hnj_free (e);
1335db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang        }
1345db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang
1355db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    hnj_free (hashtab);
1365db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang}
1375db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang
1385db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang/* assumes that key is not already present! */
1395db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wangstatic void
1405db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wanghnj_hash_insert (HashTab *hashtab, const char *key, int val)
1415db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang{
1425db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    int i;
1435db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    HashEntry *e;
1445db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang
1455db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    i = hnj_string_hash (key) % HASH_SIZE;
1465db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    e = hnj_malloc (sizeof(HashEntry));
1475db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    e->next = hashtab->entries[i];
1485db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    e->key = hnj_strdup (key);
1495db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    e->val = val;
1505db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    hashtab->entries[i] = e;
1515db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang}
1525db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang
1535db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang/* return val if found, otherwise -1 */
1545db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wangstatic int
1555db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wanghnj_hash_lookup (HashTab *hashtab, const char *key)
1565db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang{
1575db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    int i;
1585db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    HashEntry *e;
1595db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    i = hnj_string_hash (key) % HASH_SIZE;
1605db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    for (e = hashtab->entries[i]; e; e = e->next)
1615db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang        if (!strcmp (key, e->key))
1625db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang            return e->val;
1635db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    return -1;
1645db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang}
1655db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang
1665db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang/* Get the state number, allocating a new state if necessary. */
1675db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wangstatic int
1685db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wanghnj_get_state (HyphenDict *dict, HashTab *hashtab, const char *string)
1695db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang{
1705db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    int state_num;
1715db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang
1725db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    state_num = hnj_hash_lookup (hashtab, string);
1735db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang
1745db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    if (state_num >= 0)
1755db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang        return state_num;
1765db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang
1775db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    hnj_hash_insert (hashtab, string, dict->num_states);
1785db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    /* predicate is true if dict->num_states is a power of two */
1795db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    if (!(dict->num_states & (dict->num_states - 1)))
1805db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    {
1815db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang        dict->states = hnj_realloc (dict->states,
1825db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang            (dict->num_states << 1) *
1835db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang            sizeof(HyphenState));
1845db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    }
1855db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    dict->states[dict->num_states].match = NULL;
1865db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    dict->states[dict->num_states].repl = NULL;
1875db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    dict->states[dict->num_states].fallback_state = -1;
1885db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    dict->states[dict->num_states].num_trans = 0;
1895db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    dict->states[dict->num_states].trans = NULL;
1905db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    return dict->num_states++;
1915db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang}
1925db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang
1935db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang/* add a transition from state1 to state2 through ch - assumes that the
1945db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang   transition does not already exist */
1955db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wangstatic void
1965db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wanghnj_add_trans (HyphenDict *dict, int state1, int state2, char ch)
1975db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang{
1985db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    int num_trans;
1995db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang
2005db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    num_trans = dict->states[state1].num_trans;
2015db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    if (num_trans == 0)
2025db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    {
2035db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang        dict->states[state1].trans = hnj_malloc (sizeof(HyphenTrans));
2045db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    }
2055db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    else if (!(num_trans & (num_trans - 1)))
2065db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    {
2075db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang        dict->states[state1].trans = hnj_realloc (dict->states[state1].trans,
2085db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang            (num_trans << 1) *
2095db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang            sizeof(HyphenTrans));
2105db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    }
2115db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    dict->states[state1].trans[num_trans].ch = ch;
2125db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    dict->states[state1].trans[num_trans].new_state = state2;
2135db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    dict->states[state1].num_trans++;
2145db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang}
2155db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang
2165db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang#ifdef VERBOSE
2175db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) WangHashTab *global;
2185db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang
2195db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wangstatic char *
2205db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wangget_state_str (int state)
2215db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang{
2225db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    int i;
2235db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    HashEntry *e;
2245db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang
2255db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    for (i = 0; i < HASH_SIZE; i++)
2265db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang        for (e = global->entries[i]; e; e = e->next)
2275db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang            if (e->val == state)
2285db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                return e->key;
2295db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    return NULL;
2305db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang}
2315db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang#endif
2325db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang
2334760752af1c9b7507b51917ff4e4d8eb0491e353Shimeng (Simon) Wang// Get a line from the dictionary contents.
2344760752af1c9b7507b51917ff4e4d8eb0491e353Shimeng (Simon) Wangstatic char *
2354760752af1c9b7507b51917ff4e4d8eb0491e353Shimeng (Simon) Wangget_line (char *s, int size, const char *dict_contents, int dict_length,
2364760752af1c9b7507b51917ff4e4d8eb0491e353Shimeng (Simon) Wang    int *dict_ptr)
2374760752af1c9b7507b51917ff4e4d8eb0491e353Shimeng (Simon) Wang{
2384760752af1c9b7507b51917ff4e4d8eb0491e353Shimeng (Simon) Wang    int len = 0;
2394760752af1c9b7507b51917ff4e4d8eb0491e353Shimeng (Simon) Wang    while (len < (size - 1) && *dict_ptr < dict_length) {
2404760752af1c9b7507b51917ff4e4d8eb0491e353Shimeng (Simon) Wang        s[len++] = *(dict_contents + *dict_ptr);
2414760752af1c9b7507b51917ff4e4d8eb0491e353Shimeng (Simon) Wang        (*dict_ptr)++;
2424760752af1c9b7507b51917ff4e4d8eb0491e353Shimeng (Simon) Wang        if (s[len - 1] == '\n')
2434760752af1c9b7507b51917ff4e4d8eb0491e353Shimeng (Simon) Wang            break;
2444760752af1c9b7507b51917ff4e4d8eb0491e353Shimeng (Simon) Wang    }
2454760752af1c9b7507b51917ff4e4d8eb0491e353Shimeng (Simon) Wang    s[len] = '\0';
2464760752af1c9b7507b51917ff4e4d8eb0491e353Shimeng (Simon) Wang    if (len > 0) {
2474760752af1c9b7507b51917ff4e4d8eb0491e353Shimeng (Simon) Wang        return s;
2484760752af1c9b7507b51917ff4e4d8eb0491e353Shimeng (Simon) Wang    } else {
2494760752af1c9b7507b51917ff4e4d8eb0491e353Shimeng (Simon) Wang        return NULL;
2504760752af1c9b7507b51917ff4e4d8eb0491e353Shimeng (Simon) Wang    }
2514760752af1c9b7507b51917ff4e4d8eb0491e353Shimeng (Simon) Wang}
2524760752af1c9b7507b51917ff4e4d8eb0491e353Shimeng (Simon) Wang
2535db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) WangHyphenDict *
2545db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wanghnj_hyphen_load (const char *fn)
2555db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang{
2564760752af1c9b7507b51917ff4e4d8eb0491e353Shimeng (Simon) Wang    if (fn == NULL)
2574760752af1c9b7507b51917ff4e4d8eb0491e353Shimeng (Simon) Wang        return NULL;
2584760752af1c9b7507b51917ff4e4d8eb0491e353Shimeng (Simon) Wang    const int fd = open(fn, O_RDONLY);
2594760752af1c9b7507b51917ff4e4d8eb0491e353Shimeng (Simon) Wang    if (fd == -1)
2604760752af1c9b7507b51917ff4e4d8eb0491e353Shimeng (Simon) Wang        return NULL;
2614760752af1c9b7507b51917ff4e4d8eb0491e353Shimeng (Simon) Wang    struct stat sb;
2624760752af1c9b7507b51917ff4e4d8eb0491e353Shimeng (Simon) Wang    if (fstat(fd, &sb) == -1)  {  /* To obtain file size */
2634760752af1c9b7507b51917ff4e4d8eb0491e353Shimeng (Simon) Wang        close(fd);
2644760752af1c9b7507b51917ff4e4d8eb0491e353Shimeng (Simon) Wang        return NULL;
2654760752af1c9b7507b51917ff4e4d8eb0491e353Shimeng (Simon) Wang    }
2664760752af1c9b7507b51917ff4e4d8eb0491e353Shimeng (Simon) Wang
2674760752af1c9b7507b51917ff4e4d8eb0491e353Shimeng (Simon) Wang    const char *addr = mmap(NULL, sb.st_size, PROT_READ, MAP_PRIVATE, fd, 0);
2684760752af1c9b7507b51917ff4e4d8eb0491e353Shimeng (Simon) Wang    if (addr == MAP_FAILED) {
2694760752af1c9b7507b51917ff4e4d8eb0491e353Shimeng (Simon) Wang        close(fd);
2704760752af1c9b7507b51917ff4e4d8eb0491e353Shimeng (Simon) Wang        return NULL;
2714760752af1c9b7507b51917ff4e4d8eb0491e353Shimeng (Simon) Wang    }
2724760752af1c9b7507b51917ff4e4d8eb0491e353Shimeng (Simon) Wang    HyphenDict *dict = hnj_hyphen_load_from_buffer(addr, sb.st_size);
2734760752af1c9b7507b51917ff4e4d8eb0491e353Shimeng (Simon) Wang    munmap((void *)addr, sb.st_size);
2744760752af1c9b7507b51917ff4e4d8eb0491e353Shimeng (Simon) Wang    close(fd);
2754760752af1c9b7507b51917ff4e4d8eb0491e353Shimeng (Simon) Wang
2764760752af1c9b7507b51917ff4e4d8eb0491e353Shimeng (Simon) Wang    return dict;
2774760752af1c9b7507b51917ff4e4d8eb0491e353Shimeng (Simon) Wang}
2784760752af1c9b7507b51917ff4e4d8eb0491e353Shimeng (Simon) Wang
2794760752af1c9b7507b51917ff4e4d8eb0491e353Shimeng (Simon) WangHyphenDict *
2804760752af1c9b7507b51917ff4e4d8eb0491e353Shimeng (Simon) Wanghnj_hyphen_load_from_buffer (const char *dict_contents, int dict_length)
2814760752af1c9b7507b51917ff4e4d8eb0491e353Shimeng (Simon) Wang{
2825db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    HyphenDict *dict[2];
2835db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    HashTab *hashtab;
2845db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    char buf[MAX_CHARS];
2855db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    char word[MAX_CHARS];
2865db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    char pattern[MAX_CHARS];
2875db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    char * repl;
2885db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    signed char replindex;
2895db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    signed char replcut;
2905db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    int state_num = 0, last_state;
2915db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    int i, j, k;
2925db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    char ch;
2935db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    int found;
2945db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    HashEntry *e;
2955db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    int nextlevel = 0;
2965db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang
2974760752af1c9b7507b51917ff4e4d8eb0491e353Shimeng (Simon) Wang    if (dict_contents == NULL)
2985db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang        return NULL;
2995db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang
3004760752af1c9b7507b51917ff4e4d8eb0491e353Shimeng (Simon) Wang    int dict_ptr = 0;
3015db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang// loading one or two dictionaries (separated by NEXTLEVEL keyword)
3025db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    for (k = 0; k == 0 || (k == 1 && nextlevel); k++) {
3035db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang        hashtab = hnj_hash_new ();
3045db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang#ifdef VERBOSE
3055db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang        global = hashtab;
3065db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang#endif
3075db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang        hnj_hash_insert (hashtab, "", 0);
3085db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang        dict[k] = hnj_malloc (sizeof(HyphenDict));
3095db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang        dict[k]->num_states = 1;
3105db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang        dict[k]->states = hnj_malloc (sizeof(HyphenState));
3115db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang        dict[k]->states[0].match = NULL;
3125db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang        dict[k]->states[0].repl = NULL;
3135db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang        dict[k]->states[0].fallback_state = -1;
3145db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang        dict[k]->states[0].num_trans = 0;
3155db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang        dict[k]->states[0].trans = NULL;
3165db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang        dict[k]->nextlevel = NULL;
3175db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang        dict[k]->lhmin = 0;
3185db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang        dict[k]->rhmin = 0;
3195db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang        dict[k]->clhmin = 0;
3205db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang        dict[k]->crhmin = 0;
3215db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang
3225db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang        /* read in character set info */
3235db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang        if (k == 0) {
3245db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang            for (i=0;i<MAX_NAME;i++) dict[k]->cset[i]= 0;
3254760752af1c9b7507b51917ff4e4d8eb0491e353Shimeng (Simon) Wang            get_line(dict[k]->cset, sizeof(dict[k]->cset), dict_contents,
3264760752af1c9b7507b51917ff4e4d8eb0491e353Shimeng (Simon) Wang                dict_length, &dict_ptr);
3275db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang            for (i=0;i<MAX_NAME;i++)
3285db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                if ((dict[k]->cset[i] == '\r') || (dict[k]->cset[i] == '\n'))
3295db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                    dict[k]->cset[i] = 0;
3305db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang            dict[k]->utf8 = (strcmp(dict[k]->cset, "UTF-8") == 0);
3315db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang        } else {
3325db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang            strcpy(dict[k]->cset, dict[0]->cset);
3335db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang            dict[k]->utf8 = dict[0]->utf8;
3345db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang        }
3355db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang
3364760752af1c9b7507b51917ff4e4d8eb0491e353Shimeng (Simon) Wang        while (get_line(buf, sizeof(buf), dict_contents, dict_length,
3374760752af1c9b7507b51917ff4e4d8eb0491e353Shimeng (Simon) Wang                &dict_ptr) != NULL)
3385db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang        {
3395db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang            if (buf[0] != '%')
3405db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang            {
3415db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                if (strncmp(buf, "NEXTLEVEL", 9) == 0) {
3425db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                    nextlevel = 1;
3435db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                    break;
3445db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                } else if (strncmp(buf, "LEFTHYPHENMIN", 13) == 0) {
3455db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                    dict[k]->lhmin = atoi(buf + 13);
3465db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                    continue;
3475db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                } else if (strncmp(buf, "RIGHTHYPHENMIN", 14) == 0) {
3485db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                    dict[k]->rhmin = atoi(buf + 14);
3495db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                    continue;
3505db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                } else if (strncmp(buf, "COMPOUNDLEFTHYPHENMIN", 21) == 0) {
3515db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                    dict[k]->clhmin = atoi(buf + 21);
3525db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                    continue;
3535db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                } else if (strncmp(buf, "COMPOUNDRIGHTHYPHENMIN", 22) == 0) {
3545db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                    dict[k]->crhmin = atoi(buf + 22);
3555db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                    continue;
3565db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                }
3575db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                j = 0;
3585db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                pattern[j] = '0';
3595db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                repl = strchr(buf, '/');
3605db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                replindex = 0;
3615db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                replcut = 0;
3625db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                if (repl) {
3635db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                    char * index = strchr(repl + 1, ',');
3645db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                    *repl = '\0';
3655db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                    if (index) {
3665db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                        char * index2 = strchr(index + 1, ',');
3675db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                        *index = '\0';
3685db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                        if (index2) {
3695db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                            *index2 = '\0';
3705db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                            replindex = (signed char) atoi(index + 1) - 1;
3715db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                            replcut = (signed char) atoi(index2 + 1);
3725db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                        }
3735db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                    } else {
3745db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                        hnj_strchomp(repl + 1);
3755db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                        replindex = 0;
3765db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                        replcut = strlen(buf);
3775db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                    }
3785db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                    repl = hnj_strdup(repl + 1);
3795db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                }
3805db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                for (i = 0; ((buf[i] > ' ') || (buf[i] < 0)); i++)
3815db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                {
3825db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                    if (buf[i] >= '0' && buf[i] <= '9')
3835db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                        pattern[j] = buf[i];
3845db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                    else
3855db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                    {
3865db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                        word[j] = buf[i];
3875db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                        pattern[++j] = '0';
3885db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                    }
3895db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                }
3905db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                word[j] = '\0';
3915db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                pattern[j + 1] = '\0';
3925db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang
3935db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                i = 0;
3945db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                if (!repl) {
3955db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                    /* Optimize away leading zeroes */
3965db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                    for (; pattern[i] == '0'; i++);
3975db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                } else {
3985db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                    if (*word == '.') i++;
3995db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                    /* convert UTF-8 char. positions of discretionary hyph. replacements to 8-bit */
4005db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                    if (dict[k]->utf8) {
4015db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                        int pu = -1;        /* unicode character position */
4025db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                        int ps = -1;        /* unicode start position (original replindex) */
4035db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                        int pc = (*word == '.') ? 1: 0; /* 8-bit character position */
4045db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                        for (; pc < (strlen(word) + 1); pc++) {
4055db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                            /* beginning of an UTF-8 character (not '10' start bits) */
4065db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                            if ((((unsigned char) word[pc]) >> 6) != 2) pu++;
4075db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                            if ((ps < 0) && (replindex == pu)) {
4085db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                                ps = replindex;
4095db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                                replindex = pc;
4105db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                            }
4115db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                            if ((ps >= 0) && ((pu - ps) == replcut)) {
4125db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                                replcut = (pc - replindex);
4135db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                                break;
4145db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                            }
4155db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                        }
4165db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                        if (*word == '.') replindex--;
4175db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                    }
4185db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                }
4195db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang
4205db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang#ifdef VERBOSE
4215db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                printf ("word %s pattern %s, j = %d  repl: %s\n", word, pattern + i, j, repl);
4225db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang#endif
4235db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                found = hnj_hash_lookup (hashtab, word);
4245db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                state_num = hnj_get_state (dict[k], hashtab, word);
4255db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                dict[k]->states[state_num].match = hnj_strdup (pattern + i);
4265db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                dict[k]->states[state_num].repl = repl;
4275db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                dict[k]->states[state_num].replindex = replindex;
4285db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                if (!replcut) {
4295db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                    dict[k]->states[state_num].replcut = strlen(word);
4305db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                } else {
4315db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                    dict[k]->states[state_num].replcut = replcut;
4325db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                }
4335db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang
4345db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                /* now, put in the prefix transitions */
4355db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                for (; found < 0 ;j--)
4365db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                {
4375db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                    last_state = state_num;
4385db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                    ch = word[j - 1];
4395db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                    word[j - 1] = '\0';
4405db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                    found = hnj_hash_lookup (hashtab, word);
4415db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                    state_num = hnj_get_state (dict[k], hashtab, word);
4425db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                    hnj_add_trans (dict[k], state_num, last_state, ch);
4435db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                }
4445db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang            }
4455db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang        }
4465db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang
4475db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang        /* Could do unioning of matches here (instead of the preprocessor script).
4485db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang           If we did, the pseudocode would look something like this:
4495db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang
4505db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang           foreach state in the hash table
4515db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang           foreach i = [1..length(state) - 1]
4525db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang           state to check is substr (state, i)
4535db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang           look it up
4545db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang           if found, and if there is a match, union the match in.
4555db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang
4565db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang           It's also possible to avoid the quadratic blowup by doing the
4575db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang           search in order of increasing state string sizes - then you
4585db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang           can break the loop after finding the first match.
4595db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang
4605db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang           This step should be optional in any case - if there is a
4615db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang           preprocessed rule table, it's always faster to use that.
4625db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang
4635db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang        */
4645db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang
4655db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang        /* put in the fallback states */
4665db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang        for (i = 0; i < HASH_SIZE; i++)
4675db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang            for (e = hashtab->entries[i]; e; e = e->next)
4685db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang            {
4695db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                if (*(e->key)) for (j = 1; 1; j++)
4705db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                               {
4715db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                                   state_num = hnj_hash_lookup (hashtab, e->key + j);
4725db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                                   if (state_num >= 0)
4735db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                                       break;
4745db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                               }
4755db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                /* KBH: FIXME state 0 fallback_state should always be -1? */
4765db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                if (e->val)
4775db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                    dict[k]->states[e->val].fallback_state = state_num;
4785db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang            }
4795db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang#ifdef VERBOSE
4805db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang        for (i = 0; i < HASH_SIZE; i++)
4815db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang            for (e = hashtab->entries[i]; e; e = e->next)
4825db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang            {
4835db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                printf ("%d string %s state %d, fallback=%d\n", i, e->key, e->val,
4845db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                    dict[k]->states[e->val].fallback_state);
4855db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                for (j = 0; j < dict[k]->states[e->val].num_trans; j++)
4865db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                    printf (" %c->%d\n", dict[k]->states[e->val].trans[j].ch,
4875db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                        dict[k]->states[e->val].trans[j].new_state);
4885db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang            }
4895db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang#endif
4905db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang
4915db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang#ifndef VERBOSE
4925db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang        hnj_hash_free (hashtab);
4935db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang#endif
4945db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang        state_num = 0;
4955db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    }
4965db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    if (k == 2) dict[0]->nextlevel = dict[1];
4975db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    return dict[0];
4985db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang}
4995db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang
5005db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wangvoid hnj_hyphen_free (HyphenDict *dict)
5015db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang{
5025db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    int state_num;
5035db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    HyphenState *hstate;
5045db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang
5055db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    for (state_num = 0; state_num < dict->num_states; state_num++)
5065db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    {
5075db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang        hstate = &dict->states[state_num];
5085db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang        if (hstate->match)
5095db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang            hnj_free (hstate->match);
5105db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang        if (hstate->repl)
5115db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang            hnj_free (hstate->repl);
5125db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang        if (hstate->trans)
5135db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang            hnj_free (hstate->trans);
5145db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    }
5155db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    if (dict->nextlevel) hnj_hyphen_free(dict->nextlevel);
5165db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang
5175db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    hnj_free (dict->states);
5185db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang
5195db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    hnj_free (dict);
5205db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang}
5215db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang
5225db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang#define MAX_WORD 256
5235db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang
5245db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wangint hnj_hyphen_hyphenate (HyphenDict *dict,
5255db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    const char *word, int word_size,
5265db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    char *hyphens)
5275db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang{
5285db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    char prep_word_buf[MAX_WORD];
5295db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    char *prep_word;
5305db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    int i, j, k;
5315db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    int state;
5325db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    char ch;
5335db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    HyphenState *hstate;
5345db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    char *match;
5355db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    int offset;
5365db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang
5375db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    if (word_size + 3 < MAX_WORD)
5385db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang        prep_word = prep_word_buf;
5395db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    else
5405db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang        prep_word = hnj_malloc (word_size + 3);
5415db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang
5425db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    j = 0;
5435db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    prep_word[j++] = '.';
5445db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang
5455db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    for (i = 0; i < word_size; i++)
5465db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang        prep_word[j++] = word[i];
5475db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang
5485db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    prep_word[j++] = '.';
5495db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    prep_word[j] = '\0';
5505db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang
5515db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    for (i = 0; i < j; i++)
5525db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang        hyphens[i] = '0';
5535db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang
5545db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang#ifdef VERBOSE
5555db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    printf ("prep_word = %s\n", prep_word);
5565db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang#endif
5575db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang
5585db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    /* now, run the finite state machine */
5595db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    state = 0;
5605db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    for (i = 0; i < j; i++)
5615db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    {
5625db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang        ch = prep_word[i];
5635db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang        for (;;)
5645db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang        {
5655db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang
5665db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang            if (state == -1) {
5675db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                /* return 1; */
5685db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                /*  KBH: FIXME shouldn't this be as follows? */
5695db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                state = 0;
5705db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                goto try_next_letter;
5715db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang            }
5725db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang
5735db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang#ifdef VERBOSE
5745db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang            char *state_str;
5755db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang            state_str = get_state_str (state);
5765db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang
5775db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang            for (k = 0; k < i - strlen (state_str); k++)
5785db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                putchar (' ');
5795db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang            printf ("%s", state_str);
5805db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang#endif
5815db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang
5825db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang            hstate = &dict->states[state];
5835db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang            for (k = 0; k < hstate->num_trans; k++)
5845db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                if (hstate->trans[k].ch == ch)
5855db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                {
5865db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                    state = hstate->trans[k].new_state;
5875db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                    goto found_state;
5885db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                }
5895db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang            state = hstate->fallback_state;
5905db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang#ifdef VERBOSE
5915db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang            printf (" falling back, fallback_state %d\n", state);
5925db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang#endif
5935db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang        }
5945db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang      found_state:
5955db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang#ifdef VERBOSE
5965db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang        printf ("found state %d\n",state);
5975db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang#endif
5985db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang        /* Additional optimization is possible here - especially,
5995db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang           elimination of trailing zeroes from the match. Leading zeroes
6005db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang           have already been optimized. */
6015db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang        match = dict->states[state].match;
6025db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang        /* replacing rules not handled by hyphen_hyphenate() */
6035db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang        if (match && !dict->states[state].repl)
6045db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang        {
6055db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang            offset = i + 1 - strlen (match);
6065db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang#ifdef VERBOSE
6075db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang            for (k = 0; k < offset; k++)
6085db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                putchar (' ');
6095db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang            printf ("%s\n", match);
6105db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang#endif
6115db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang            /* This is a linear search because I tried a binary search and
6125db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang               found it to be just a teeny bit slower. */
6135db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang            for (k = 0; match[k]; k++)
6145db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                if (hyphens[offset + k] < match[k])
6155db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                    hyphens[offset + k] = match[k];
6165db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang        }
6175db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang
6185db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang        /* KBH: we need this to make sure we keep looking in a word */
6195db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang        /* for patterns even if the current character is not known in state 0 */
6205db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang        /* since patterns for hyphenation may occur anywhere in the word */
6215db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang      try_next_letter: ;
6225db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang
6235db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    }
6245db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang#ifdef VERBOSE
6255db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    for (i = 0; i < j; i++)
6265db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang        putchar (hyphens[i]);
6275db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    putchar ('\n');
6285db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang#endif
6295db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang
6305db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    for (i = 0; i < j - 4; i++)
6315db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang#if 0
6325db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang        if (hyphens[i + 1] & 1)
6335db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang            hyphens[i] = '-';
6345db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang#else
6355db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    hyphens[i] = hyphens[i + 1];
6365db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang#endif
6375db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    hyphens[0] = '0';
6385db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    for (; i < word_size; i++)
6395db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang        hyphens[i] = '0';
6405db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    hyphens[word_size] = '\0';
6415db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang
6425db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    if (prep_word != prep_word_buf)
6435db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang        hnj_free (prep_word);
6445db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang
6455db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    return 0;
6465db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang}
6475db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang
6485db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang/* character length of the first n byte of the input word */
6495db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wangint hnj_hyphen_strnlen(const char * word, int n, int utf8)
6505db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang{
6515db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    int i = 0;
6525db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    int j = 0;
6535db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    while (j < n && word[j] != '\0') {
6545db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang        i++;
6555db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang        for (j++; utf8 && (word[j] & 0xc0) == 0x80; j++);
6565db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    }
6575db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    return i;
6585db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang}
6595db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang
6605db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wangint hnj_hyphen_lhmin(int utf8, const char *word, int word_size, char * hyphens,
6615db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang	char *** rep, int ** pos, int ** cut, int lhmin)
6625db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang{
6635db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    int i, j;
6645db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    for (i = 1, j = 0; i < lhmin && word[j] != '\0'; i++) do {
6655db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang            // check length of the non-standard part
6665db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang            if (*rep && *pos && *cut && (*rep)[j]) {
6675db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                char * rh = strchr((*rep)[j], '=');
6685db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                if (rh && (hnj_hyphen_strnlen(word, j - (*pos)[j] + 1, utf8) +
6695db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                        hnj_hyphen_strnlen((*rep)[j], rh - (*rep)[j], utf8)) < lhmin) {
6705db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                    free((*rep)[j]);
6715db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                    (*rep)[j] = NULL;
6725db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                    hyphens[j] = '0';
6735db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                }
6745db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang            } else {
6755db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                hyphens[j] = '0';
6765db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang            }
6775db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang            j++;
6785db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang        } while (utf8 && (word[j + 1] & 0xc0) == 0xc0);
6795db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    return 0;
6805db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang}
6815db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang
6825db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wangint hnj_hyphen_rhmin(int utf8, const char *word, int word_size, char * hyphens,
6835db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang	char *** rep, int ** pos, int ** cut, int rhmin)
6845db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang{
6855db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    int i;
6865db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    int j = word_size - 2;
6875db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    for (i = 1; i < rhmin && j > 0; j--) {
6885db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang        // check length of the non-standard part
6895db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang        if (*rep && *pos && *cut && (*rep)[j]) {
6905db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang            char * rh = strchr((*rep)[j], '=');
6915db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang            if (rh && (hnj_hyphen_strnlen(word + j - (*pos)[j] + (*cut)[j] + 1, 100, utf8) +
6925db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                    hnj_hyphen_strnlen(rh + 1, strlen(rh + 1), utf8)) < rhmin) {
6935db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                free((*rep)[j]);
6945db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                (*rep)[j] = NULL;
6955db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                hyphens[j] = '0';
6965db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang            }
6975db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang        } else {
6985db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang            hyphens[j] = '0';
6995db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang        }
7005db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang        if (!utf8 || (word[j] & 0xc0) != 0xc0) i++;
7015db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    }
7025db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    return 0;
7035db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang}
7045db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang
7055db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang// recursive function for compound level hyphenation
7065db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wangint hnj_hyphen_hyph_(HyphenDict *dict, const char *word, int word_size,
7075db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    char * hyphens, char *** rep, int ** pos, int ** cut,
7085db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    int clhmin, int crhmin, int lend, int rend)
7095db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang{
7105db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    char prep_word_buf[MAX_WORD];
7115db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    char *prep_word;
7125db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    int i, j, k;
7135db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    int state;
7145db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    char ch;
7155db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    HyphenState *hstate;
7165db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    char *match;
7175db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    char *repl;
7185db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    signed char replindex;
7195db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    signed char replcut;
7205db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    int offset;
7215db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    int matchlen_buf[MAX_CHARS];
7225db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    int matchindex_buf[MAX_CHARS];
7235db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    char * matchrepl_buf[MAX_CHARS];
7245db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    int * matchlen;
7255db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    int * matchindex;
7265db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    char ** matchrepl;
7275db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    int isrepl = 0;
7285db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    int nHyphCount;
7295db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang
7305db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    if (word_size + 3 < MAX_CHARS) {
7315db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang        prep_word = prep_word_buf;
7325db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang        matchlen = matchlen_buf;
7335db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang        matchindex = matchindex_buf;
7345db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang        matchrepl = matchrepl_buf;
7355db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    } else {
7365db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang        prep_word = hnj_malloc (word_size + 3);
7375db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang        matchlen = hnj_malloc ((word_size + 3) * sizeof(int));
7385db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang        matchindex = hnj_malloc ((word_size + 3) * sizeof(int));
7395db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang        matchrepl = hnj_malloc ((word_size + 3) * sizeof(char *));
7405db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    }
7415db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang
7425db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    j = 0;
7435db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    prep_word[j++] = '.';
7445db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang
7455db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    for (i = 0; i < word_size; i++)
7465db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang        prep_word[j++] = word[i];
7475db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang
7485db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    prep_word[j++] = '.';
7495db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    prep_word[j] = '\0';
7505db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang
7515db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    for (i = 0; i < j; i++)
7525db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang        hyphens[i] = '0';
7535db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang
7545db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang#ifdef VERBOSE
7555db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    printf ("prep_word = %s\n", prep_word);
7565db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang#endif
7575db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang
7585db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    /* now, run the finite state machine */
7595db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    state = 0;
7605db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    for (i = 0; i < j; i++)
7615db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    {
7625db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang        ch = prep_word[i];
7635db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang        for (;;)
7645db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang        {
7655db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang
7665db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang            if (state == -1) {
7675db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                /* return 1; */
7685db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                /*  KBH: FIXME shouldn't this be as follows? */
7695db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                state = 0;
7705db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                goto try_next_letter;
7715db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang            }
7725db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang
7735db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang#ifdef VERBOSE
7745db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang            char *state_str;
7755db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang            state_str = get_state_str (state);
7765db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang
7775db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang            for (k = 0; k < i - strlen (state_str); k++)
7785db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                putchar (' ');
7795db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang            printf ("%s", state_str);
7805db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang#endif
7815db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang
7825db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang            hstate = &dict->states[state];
7835db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang            for (k = 0; k < hstate->num_trans; k++)
7845db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                if (hstate->trans[k].ch == ch)
7855db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                {
7865db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                    state = hstate->trans[k].new_state;
7875db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                    goto found_state;
7885db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                }
7895db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang            state = hstate->fallback_state;
7905db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang#ifdef VERBOSE
7915db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang            printf (" falling back, fallback_state %d\n", state);
7925db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang#endif
7935db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang        }
7945db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang      found_state:
7955db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang#ifdef VERBOSE
7965db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang        printf ("found state %d\n",state);
7975db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang#endif
7985db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang        /* Additional optimization is possible here - especially,
7995db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang           elimination of trailing zeroes from the match. Leading zeroes
8005db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang           have already been optimized. */
8015db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang        match = dict->states[state].match;
8025db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang        repl = dict->states[state].repl;
8035db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang        replindex = dict->states[state].replindex;
8045db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang        replcut = dict->states[state].replcut;
8055db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang        /* replacing rules not handled by hyphen_hyphenate() */
8065db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang        if (match)
8075db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang        {
8085db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang            offset = i + 1 - strlen (match);
8095db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang#ifdef VERBOSE
8105db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang            for (k = 0; k < offset; k++)
8115db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                putchar (' ');
8125db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang            printf ("%s (%s)\n", match, repl);
8135db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang#endif
8145db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang            if (repl) {
8155db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                if (!isrepl) for(; isrepl < word_size; isrepl++) {
8165db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                        matchrepl[isrepl] = NULL;
8175db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                        matchindex[isrepl] = -1;
8185db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                    }
8195db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                matchlen[offset + replindex] = replcut;
8205db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang            }
8215db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang            /* This is a linear search because I tried a binary search and
8225db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang               found it to be just a teeny bit slower. */
8235db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang            for (k = 0; match[k]; k++) {
8245db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                if ((hyphens[offset + k] < match[k])) {
8255db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                    hyphens[offset + k] = match[k];
8265db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                    if (match[k]&1) {
8275db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                        matchrepl[offset + k] = repl;
8285db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                        if (repl && (k >= replindex) && (k <= replindex + replcut)) {
8295db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                            matchindex[offset + replindex] = offset + k;
8305db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                        }
8315db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                    }
8325db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                }
8335db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang            }
8345db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang
8355db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang        }
8365db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang
8375db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang        /* KBH: we need this to make sure we keep looking in a word */
8385db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang        /* for patterns even if the current character is not known in state 0 */
8395db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang        /* since patterns for hyphenation may occur anywhere in the word */
8405db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang      try_next_letter: ;
8415db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang
8425db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    }
8435db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang#ifdef VERBOSE
8445db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    for (i = 0; i < j; i++)
8455db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang        putchar (hyphens[i]);
8465db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    putchar ('\n');
8475db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang#endif
8485db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang
8495db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    for (i = 0; i < j - 3; i++)
8505db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang#if 0
8515db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang        if (hyphens[i + 1] & 1)
8525db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang            hyphens[i] = '-';
8535db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang#else
8545db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    hyphens[i] = hyphens[i + 1];
8555db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang#endif
8565db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    for (; i < word_size; i++)
8575db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang        hyphens[i] = '0';
8585db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    hyphens[word_size] = '\0';
8595db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang
8605db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    /* now create a new char string showing hyphenation positions */
8615db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    /* count the hyphens and allocate space for the new hyphenated string */
8625db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    nHyphCount = 0;
8635db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    for (i = 0; i < word_size; i++)
8645db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang        if (hyphens[i]&1)
8655db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang            nHyphCount++;
8665db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    j = 0;
8675db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    for (i = 0; i < word_size; i++) {
8685db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang        if (isrepl && (matchindex[i] >= 0) && matchrepl[matchindex[i]]) {
8695db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang            if (rep && pos && cut) {
8705db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                if (!*rep && !*pos && !*cut) {
8715db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                    int k;
8725db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                    *rep = (char **) malloc(sizeof(char *) * word_size);
8735db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                    *pos = (int *) malloc(sizeof(int) * word_size);
8745db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                    *cut = (int *) malloc(sizeof(int) * word_size);
8755db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                    for (k = 0; k < word_size; k++) {
8765db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                        (*rep)[k] = NULL;
8775db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                        (*pos)[k] = 0;
8785db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                        (*cut)[k] = 0;
8795db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                    }
8805db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                }
8815db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                (*rep)[matchindex[i] - 1] = hnj_strdup(matchrepl[matchindex[i]]);
8825db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                (*pos)[matchindex[i] - 1] = matchindex[i] - i;
8835db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                (*cut)[matchindex[i] - 1] = matchlen[i];
8845db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang            }
8855db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang            j += strlen(matchrepl[matchindex[i]]);
8865db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang            i += matchlen[i] - 1;
8875db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang        }
8885db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    }
8895db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang
8905db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    if (matchrepl != matchrepl_buf) {
8915db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang        hnj_free (matchrepl);
8925db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang        hnj_free (matchlen);
8935db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang        hnj_free (matchindex);
8945db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    }
8955db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang
8965db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    // recursive hyphenation of the first (compound) level segments
8975db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    if (dict->nextlevel) {
8985db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang        char * rep2_buf[MAX_WORD];
8995db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang        int pos2_buf[MAX_WORD];
9005db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang        int cut2_buf[MAX_WORD];
9015db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang        char hyphens2_buf[MAX_WORD];
9025db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang        char ** rep2;
9035db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang        int * pos2;
9045db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang        int * cut2;
9055db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang        char * hyphens2;
9065db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang        int begin = 0;
9075db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang        if (word_size < MAX_CHARS) {
9085db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang            rep2 = rep2_buf;
9095db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang            pos2 = pos2_buf;
9105db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang            cut2 = cut2_buf;
9115db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang            hyphens2 = hyphens2_buf;
9125db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang        } else {
9135db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang            rep2 = hnj_malloc (word_size * sizeof(char *));
9145db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang            pos2 = hnj_malloc (word_size * sizeof(int));
9155db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang            cut2 = hnj_malloc (word_size * sizeof(int));
9165db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang            hyphens2 = hnj_malloc (word_size);
9175db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang        }
9185db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang        for (i = 0; i < word_size; i++) rep2[i] = NULL;
9194760752af1c9b7507b51917ff4e4d8eb0491e353Shimeng (Simon) Wang        for (i = 0; i < word_size; i++)
9204760752af1c9b7507b51917ff4e4d8eb0491e353Shimeng (Simon) Wang            if (hyphens[i]&1 || (begin > 0 && i + 1 == word_size)) {
9215db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                if (i - begin > 1) {
9225db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                    int hyph = 0;
9235db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                    prep_word[i + 2] = '\0';
9245db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                    /* non-standard hyphenation at compound boundary (Schiffahrt) */
9255db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                    if (*rep && *pos && *cut && (*rep)[i]) {
9265db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                        char * l = strchr((*rep)[i], '=');
9275db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                        strcpy(prep_word + 2 + i - (*pos)[i], (*rep)[i]);
9285db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                        if (l) {
9295db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                            hyph = (l - (*rep)[i]) - (*pos)[i];
9305db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                            prep_word[2 + i + hyph] = '\0';
9315db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                        }
9325db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                    }
9335db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                    hnj_hyphen_hyph_(dict, prep_word + begin + 1, i - begin + 1 + hyph,
9345db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                        hyphens2, &rep2, &pos2, &cut2, clhmin,
9355db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                        crhmin, (begin > 0 ? 0 : lend), (hyphens[i]&1 ? 0 : rend));
9365db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                    for (j = 0; j < i - begin - 1; j++) {
9375db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                        hyphens[begin + j] = hyphens2[j];
9385db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                        if (rep2[j] && rep && pos && cut) {
9395db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                            if (!*rep && !*pos && !*cut) {
9405db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                                int k;
9415db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                                *rep = (char **) malloc(sizeof(char *) * word_size);
9425db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                                *pos = (int *) malloc(sizeof(int) * word_size);
9435db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                                *cut = (int *) malloc(sizeof(int) * word_size);
9445db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                                for (k = 0; k < word_size; k++) {
9455db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                                    (*rep)[k] = NULL;
9465db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                                    (*pos)[k] = 0;
9475db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                                    (*cut)[k] = 0;
9485db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                                }
9495db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                            }
9505db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                            (*rep)[begin + j] = rep2[j];
9515db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                            (*pos)[begin + j] = pos2[j];
9525db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                            (*cut)[begin + j] = cut2[j];
9535db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                        }
9545db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                    }
9555db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                    prep_word[i + 2] = word[i + 1];
9565db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                    if (*rep && *pos && *cut && (*rep)[i]) {
9575db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                        strcpy(prep_word + 1, word);
9585db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                    }
9595db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                }
9605db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                begin = i + 1;
9615db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                for (j = 0; j < word_size; j++) rep2[j] = NULL;
9625db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang            }
9635db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang
9645db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang        // non-compound
9655db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang        if (begin == 0) {
9665db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang            hnj_hyphen_hyph_(dict->nextlevel, word, word_size,
9675db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                hyphens, rep, pos, cut, clhmin, crhmin, lend, rend);
9685db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang            if (!lend) hnj_hyphen_lhmin(dict->utf8, word, word_size, hyphens,
9695db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                rep, pos, cut, clhmin);
9705db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang            if (!rend) hnj_hyphen_rhmin(dict->utf8, word, word_size, hyphens,
9715db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                rep, pos, cut, crhmin);
9725db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang        }
9735db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang
9745db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang        if (rep2 != rep2_buf) {
9755db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang            free(rep2);
9765db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang            free(cut2);
9775db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang            free(pos2);
9785db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang            free(hyphens2);
9795db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang        }
9805db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    }
9815db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang
9825db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    if (prep_word != prep_word_buf) hnj_free (prep_word);
9835db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    return 0;
9845db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang}
9855db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang
9865db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang/* UTF-8 normalization of hyphen and non-standard positions */
9875db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wangint hnj_hyphen_norm(const char *word, int word_size, char * hyphens,
9885db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang	char *** rep, int ** pos, int ** cut)
9895db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang{
9905db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    if ((((unsigned char) word[0]) >> 6) == 2) {
9915db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang        fprintf(stderr, "error - bad, non UTF-8 input: %s\n", word);
9925db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang        return 1;
9935db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    }
9945db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang
9955db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    /* calculate UTF-8 character positions */
9965db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    int i, j, k;
9975db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    for (i = 0, j = -1; i < word_size; i++) {
9985db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang        /* beginning of an UTF-8 character (not '10' start bits) */
9995db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang        if ((((unsigned char) word[i]) >> 6) != 2) j++;
10005db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang        hyphens[j] = hyphens[i];
10015db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang        if (rep && pos && cut && *rep && *pos && *cut) {
10025db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang            int l = (*pos)[i];
10035db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang            (*pos)[j] = 0;
10045db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang            for (k = 0; k < l; k++) {
10055db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                if ((((unsigned char) word[i - k]) >> 6) != 2) (*pos)[j]++;
10065db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang            }
10075db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang            k = i - l + 1;
10085db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang            l = k + (*cut)[i];
10095db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang            (*cut)[j] = 0;
10105db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang            for (; k < l; k++) {
10115db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                if ((((unsigned char) word[k]) >> 6) != 2) (*cut)[j]++;
10125db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang            }
10135db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang            (*rep)[j] = (*rep)[i];
10145db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang            if (j < i) {
10155db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                (*rep)[i] = NULL;
10165db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                (*pos)[i] = 0;
10175db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                (*cut)[i] = 0;
10185db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang            }
10195db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang        }
10205db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    }
10215db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    hyphens[j + 1] = '\0';
10225db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    return 0;
10235db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang}
10245db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang
10255db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang/* get the word with all possible hyphenations (output: hyphword) */
10265db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wangvoid hnj_hyphen_hyphword(const char * word, int l, const char * hyphens,
10275db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    char * hyphword, char *** rep, int ** pos, int ** cut)
10285db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang{
10295db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    int i, j;
10305db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    for (i = 0, j = 0; i < l; i++, j++) {
10315db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang        if (hyphens[i]&1) {
10325db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang            hyphword[j] = word[i];
10335db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang            if (*rep && *pos && *cut && (*rep)[i]) {
10345db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                strcpy(hyphword + j - (*pos)[i] + 1, (*rep)[i]);
10355db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                j += strlen((*rep)[i]) - (*pos)[i];
10365db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                i += (*cut)[i] - (*pos)[i];
10375db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang            } else hyphword[++j] = '=';
10385db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang        } else hyphword[j] = word[i];
10395db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    }
10405db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    hyphword[j] = '\0';
10415db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang}
10425db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang
10435db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang
10445db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang/* main api function with default hyphenmin parameters */
10455db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wangint hnj_hyphen_hyphenate2 (HyphenDict *dict,
10465db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    const char *word, int word_size, char * hyphens,
10475db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    char *hyphword, char *** rep, int ** pos, int ** cut)
10485db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang{
10495db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    hnj_hyphen_hyph_(dict, word, word_size, hyphens, rep, pos, cut,
10505db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang        dict->clhmin, dict->crhmin, 1, 1);
10515db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    hnj_hyphen_lhmin(dict->utf8, word, word_size,
10525db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang        hyphens, rep, pos, cut, (dict->lhmin > 0 ? dict->lhmin : 2));
10535db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    hnj_hyphen_rhmin(dict->utf8, word, word_size,
10545db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang        hyphens, rep, pos, cut, (dict->rhmin > 0 ? dict->rhmin : 2));
10555db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    if (hyphword) hnj_hyphen_hyphword(word, word_size, hyphens, hyphword, rep, pos, cut);
10565db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    if (dict->utf8) return hnj_hyphen_norm(word, word_size, hyphens, rep, pos, cut);
10575db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    return 0;
10585db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang}
10595db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang
10605db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang/* previous main api function with hyphenmin parameters */
10615db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wangint hnj_hyphen_hyphenate3 (HyphenDict *dict,
10625db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang	const char *word, int word_size, char * hyphens,
10635db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang	char *hyphword, char *** rep, int ** pos, int ** cut,
10645db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang	int lhmin, int rhmin, int clhmin, int crhmin)
10655db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang{
10665db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    lhmin = (lhmin > 0 ? lhmin : dict->lhmin);
10675db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    rhmin = (rhmin > 0 ? rhmin : dict->rhmin);
10685db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    hnj_hyphen_hyph_(dict, word, word_size, hyphens, rep, pos, cut,
10695db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang        clhmin, crhmin, 1, 1);
10705db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    hnj_hyphen_lhmin(dict->utf8, word, word_size, hyphens,
10715db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang        rep, pos, cut, (lhmin > 0 ? lhmin : 2));
10725db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    hnj_hyphen_rhmin(dict->utf8, word, word_size, hyphens,
10735db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang        rep, pos, cut, (rhmin > 0 ? rhmin : 2));
10745db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    if (hyphword) hnj_hyphen_hyphword(word, word_size, hyphens, hyphword, rep, pos, cut);
10755db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    if (dict->utf8) return hnj_hyphen_norm(word, word_size, hyphens, rep, pos, cut);
10765db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    return 0;
10775db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang}
1078