15db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang/* Libhnj is dual licensed under LGPL and MPL. Boilerplate for both 25db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang * licenses follows. 35db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang */ 45db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang 55db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang/* LibHnj - a library for high quality hyphenation and justification 65db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang * Copyright (C) 1998 Raph Levien, 75db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang * (C) 2001 ALTLinux, Moscow (http://www.alt-linux.org), 85db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang * (C) 2001 Peter Novodvorsky (nidd@cs.msu.su) 95db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang * (C) 2006, 2007, 2008 László Németh (nemeth at OOo) 105db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang * 115db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang * This library is free software; you can redistribute it and/or 125db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang * modify it under the terms of the GNU Library General Public 135db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang * License as published by the Free Software Foundation; either 145db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang * version 2 of the License, or (at your option) any later version. 155db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang * 165db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang * This library is distributed in the hope that it will be useful, 175db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang * but WITHOUT ANY WARRANTY; without even the implied warranty of 185db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 195db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang * Library General Public License for more details. 205db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang * 215db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang * You should have received a copy of the GNU Library General Public 225db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang * License along with this library; if not, write to the 235db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang * Free Software Foundation, Inc., 59 Temple Place - Suite 330, 245db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang * Boston, MA 02111-1307 USA. 255db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang */ 265db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang 275db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang/* 285db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang * The contents of this file are subject to the Mozilla Public License 295db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang * Version 1.0 (the "MPL"); you may not use this file except in 305db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang * compliance with the MPL. You may obtain a copy of the MPL at 315db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang * http://www.mozilla.org/MPL/ 325db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang * 335db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang * Software distributed under the MPL is distributed on an "AS IS" basis, 345db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the MPL 355db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang * for the specific language governing rights and limitations under the 365db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang * MPL. 375db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang * 385db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang */ 394760752af1c9b7507b51917ff4e4d8eb0491e353Shimeng (Simon) Wang#include <fcntl.h> 404760752af1c9b7507b51917ff4e4d8eb0491e353Shimeng (Simon) Wang#include <sys/mman.h> 414760752af1c9b7507b51917ff4e4d8eb0491e353Shimeng (Simon) Wang#include <sys/stat.h> 425db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang#include <stdlib.h> /* for NULL, malloc */ 435db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang#include <stdio.h> /* for fprintf */ 445db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang#include <string.h> /* for strdup */ 454760752af1c9b7507b51917ff4e4d8eb0491e353Shimeng (Simon) Wang#include <unistd.h> /* for close */ 465db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang 475db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang#define noVERBOSE 485db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang 495db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang#include "hnjalloc.h" 505db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang#include "hyphen.h" 515db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang 525db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wangstatic char * 535db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wanghnj_strdup (const char *s) 545db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang{ 555db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang char *new; 565db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang int l; 575db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang 585db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang l = strlen (s); 595db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang new = hnj_malloc (l + 1); 605db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang memcpy (new, s, l); 615db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang new[l] = 0; 625db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang return new; 635db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang} 645db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang 655db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang/* remove cross-platform text line end characters */ 665db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wangvoid hnj_strchomp(char * s) 675db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang{ 685db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang int k = strlen(s); 695db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang if ((k > 0) && ((*(s+k-1)=='\r') || (*(s+k-1)=='\n'))) *(s+k-1) = '\0'; 705db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang if ((k > 1) && (*(s+k-2) == '\r')) *(s+k-2) = '\0'; 715db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang} 725db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang 735db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang/* a little bit of a hash table implementation. This simply maps strings 745db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang to state numbers */ 755db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang 765db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wangtypedef struct _HashTab HashTab; 775db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wangtypedef struct _HashEntry HashEntry; 785db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang 795db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang/* A cheap, but effective, hack. */ 805db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang#define HASH_SIZE 31627 815db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang 825db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wangstruct _HashTab { 835db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang HashEntry *entries[HASH_SIZE]; 845db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang}; 855db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang 865db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wangstruct _HashEntry { 875db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang HashEntry *next; 885db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang char *key; 895db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang int val; 905db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang}; 915db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang 925db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang/* a char* hash function from ASU - adapted from Gtk+ */ 935db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wangstatic unsigned int 945db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wanghnj_string_hash (const char *s) 955db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang{ 965db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang const char *p; 975db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang unsigned int h=0, g; 985db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang for(p = s; *p != '\0'; p += 1) { 995db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang h = ( h << 4 ) + *p; 1005db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang if ( ( g = h & 0xf0000000 ) ) { 1015db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang h = h ^ (g >> 24); 1025db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang h = h ^ g; 1035db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang } 1045db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang } 1055db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang return h /* % M */; 1065db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang} 1075db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang 1085db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wangstatic HashTab * 1095db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wanghnj_hash_new (void) 1105db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang{ 1115db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang HashTab *hashtab; 1125db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang int i; 1135db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang 1145db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang hashtab = hnj_malloc (sizeof(HashTab)); 1155db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang for (i = 0; i < HASH_SIZE; i++) 1165db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang hashtab->entries[i] = NULL; 1175db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang 1185db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang return hashtab; 1195db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang} 1205db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang 1215db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wangstatic void 1225db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wanghnj_hash_free (HashTab *hashtab) 1235db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang{ 1245db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang int i; 1255db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang HashEntry *e, *next; 1265db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang 1275db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang for (i = 0; i < HASH_SIZE; i++) 1285db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang for (e = hashtab->entries[i]; e; e = next) 1295db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang { 1305db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang next = e->next; 1315db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang hnj_free (e->key); 1325db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang hnj_free (e); 1335db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang } 1345db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang 1355db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang hnj_free (hashtab); 1365db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang} 1375db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang 1385db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang/* assumes that key is not already present! */ 1395db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wangstatic void 1405db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wanghnj_hash_insert (HashTab *hashtab, const char *key, int val) 1415db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang{ 1425db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang int i; 1435db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang HashEntry *e; 1445db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang 1455db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang i = hnj_string_hash (key) % HASH_SIZE; 1465db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang e = hnj_malloc (sizeof(HashEntry)); 1475db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang e->next = hashtab->entries[i]; 1485db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang e->key = hnj_strdup (key); 1495db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang e->val = val; 1505db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang hashtab->entries[i] = e; 1515db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang} 1525db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang 1535db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang/* return val if found, otherwise -1 */ 1545db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wangstatic int 1555db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wanghnj_hash_lookup (HashTab *hashtab, const char *key) 1565db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang{ 1575db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang int i; 1585db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang HashEntry *e; 1595db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang i = hnj_string_hash (key) % HASH_SIZE; 1605db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang for (e = hashtab->entries[i]; e; e = e->next) 1615db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang if (!strcmp (key, e->key)) 1625db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang return e->val; 1635db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang return -1; 1645db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang} 1655db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang 1665db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang/* Get the state number, allocating a new state if necessary. */ 1675db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wangstatic int 1685db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wanghnj_get_state (HyphenDict *dict, HashTab *hashtab, const char *string) 1695db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang{ 1705db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang int state_num; 1715db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang 1725db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang state_num = hnj_hash_lookup (hashtab, string); 1735db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang 1745db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang if (state_num >= 0) 1755db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang return state_num; 1765db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang 1775db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang hnj_hash_insert (hashtab, string, dict->num_states); 1785db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang /* predicate is true if dict->num_states is a power of two */ 1795db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang if (!(dict->num_states & (dict->num_states - 1))) 1805db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang { 1815db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang dict->states = hnj_realloc (dict->states, 1825db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang (dict->num_states << 1) * 1835db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang sizeof(HyphenState)); 1845db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang } 1855db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang dict->states[dict->num_states].match = NULL; 1865db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang dict->states[dict->num_states].repl = NULL; 1875db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang dict->states[dict->num_states].fallback_state = -1; 1885db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang dict->states[dict->num_states].num_trans = 0; 1895db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang dict->states[dict->num_states].trans = NULL; 1905db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang return dict->num_states++; 1915db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang} 1925db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang 1935db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang/* add a transition from state1 to state2 through ch - assumes that the 1945db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang transition does not already exist */ 1955db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wangstatic void 1965db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wanghnj_add_trans (HyphenDict *dict, int state1, int state2, char ch) 1975db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang{ 1985db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang int num_trans; 1995db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang 2005db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang num_trans = dict->states[state1].num_trans; 2015db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang if (num_trans == 0) 2025db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang { 2035db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang dict->states[state1].trans = hnj_malloc (sizeof(HyphenTrans)); 2045db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang } 2055db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang else if (!(num_trans & (num_trans - 1))) 2065db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang { 2075db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang dict->states[state1].trans = hnj_realloc (dict->states[state1].trans, 2085db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang (num_trans << 1) * 2095db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang sizeof(HyphenTrans)); 2105db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang } 2115db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang dict->states[state1].trans[num_trans].ch = ch; 2125db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang dict->states[state1].trans[num_trans].new_state = state2; 2135db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang dict->states[state1].num_trans++; 2145db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang} 2155db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang 2165db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang#ifdef VERBOSE 2175db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) WangHashTab *global; 2185db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang 2195db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wangstatic char * 2205db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wangget_state_str (int state) 2215db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang{ 2225db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang int i; 2235db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang HashEntry *e; 2245db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang 2255db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang for (i = 0; i < HASH_SIZE; i++) 2265db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang for (e = global->entries[i]; e; e = e->next) 2275db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang if (e->val == state) 2285db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang return e->key; 2295db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang return NULL; 2305db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang} 2315db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang#endif 2325db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang 2334760752af1c9b7507b51917ff4e4d8eb0491e353Shimeng (Simon) Wang// Get a line from the dictionary contents. 2344760752af1c9b7507b51917ff4e4d8eb0491e353Shimeng (Simon) Wangstatic char * 2354760752af1c9b7507b51917ff4e4d8eb0491e353Shimeng (Simon) Wangget_line (char *s, int size, const char *dict_contents, int dict_length, 2364760752af1c9b7507b51917ff4e4d8eb0491e353Shimeng (Simon) Wang int *dict_ptr) 2374760752af1c9b7507b51917ff4e4d8eb0491e353Shimeng (Simon) Wang{ 2384760752af1c9b7507b51917ff4e4d8eb0491e353Shimeng (Simon) Wang int len = 0; 2394760752af1c9b7507b51917ff4e4d8eb0491e353Shimeng (Simon) Wang while (len < (size - 1) && *dict_ptr < dict_length) { 2404760752af1c9b7507b51917ff4e4d8eb0491e353Shimeng (Simon) Wang s[len++] = *(dict_contents + *dict_ptr); 2414760752af1c9b7507b51917ff4e4d8eb0491e353Shimeng (Simon) Wang (*dict_ptr)++; 2424760752af1c9b7507b51917ff4e4d8eb0491e353Shimeng (Simon) Wang if (s[len - 1] == '\n') 2434760752af1c9b7507b51917ff4e4d8eb0491e353Shimeng (Simon) Wang break; 2444760752af1c9b7507b51917ff4e4d8eb0491e353Shimeng (Simon) Wang } 2454760752af1c9b7507b51917ff4e4d8eb0491e353Shimeng (Simon) Wang s[len] = '\0'; 2464760752af1c9b7507b51917ff4e4d8eb0491e353Shimeng (Simon) Wang if (len > 0) { 2474760752af1c9b7507b51917ff4e4d8eb0491e353Shimeng (Simon) Wang return s; 2484760752af1c9b7507b51917ff4e4d8eb0491e353Shimeng (Simon) Wang } else { 2494760752af1c9b7507b51917ff4e4d8eb0491e353Shimeng (Simon) Wang return NULL; 2504760752af1c9b7507b51917ff4e4d8eb0491e353Shimeng (Simon) Wang } 2514760752af1c9b7507b51917ff4e4d8eb0491e353Shimeng (Simon) Wang} 2524760752af1c9b7507b51917ff4e4d8eb0491e353Shimeng (Simon) Wang 2535db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) WangHyphenDict * 2545db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wanghnj_hyphen_load (const char *fn) 2555db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang{ 2564760752af1c9b7507b51917ff4e4d8eb0491e353Shimeng (Simon) Wang if (fn == NULL) 2574760752af1c9b7507b51917ff4e4d8eb0491e353Shimeng (Simon) Wang return NULL; 2584760752af1c9b7507b51917ff4e4d8eb0491e353Shimeng (Simon) Wang const int fd = open(fn, O_RDONLY); 2594760752af1c9b7507b51917ff4e4d8eb0491e353Shimeng (Simon) Wang if (fd == -1) 2604760752af1c9b7507b51917ff4e4d8eb0491e353Shimeng (Simon) Wang return NULL; 2614760752af1c9b7507b51917ff4e4d8eb0491e353Shimeng (Simon) Wang struct stat sb; 2624760752af1c9b7507b51917ff4e4d8eb0491e353Shimeng (Simon) Wang if (fstat(fd, &sb) == -1) { /* To obtain file size */ 2634760752af1c9b7507b51917ff4e4d8eb0491e353Shimeng (Simon) Wang close(fd); 2644760752af1c9b7507b51917ff4e4d8eb0491e353Shimeng (Simon) Wang return NULL; 2654760752af1c9b7507b51917ff4e4d8eb0491e353Shimeng (Simon) Wang } 2664760752af1c9b7507b51917ff4e4d8eb0491e353Shimeng (Simon) Wang 2674760752af1c9b7507b51917ff4e4d8eb0491e353Shimeng (Simon) Wang const char *addr = mmap(NULL, sb.st_size, PROT_READ, MAP_PRIVATE, fd, 0); 2684760752af1c9b7507b51917ff4e4d8eb0491e353Shimeng (Simon) Wang if (addr == MAP_FAILED) { 2694760752af1c9b7507b51917ff4e4d8eb0491e353Shimeng (Simon) Wang close(fd); 2704760752af1c9b7507b51917ff4e4d8eb0491e353Shimeng (Simon) Wang return NULL; 2714760752af1c9b7507b51917ff4e4d8eb0491e353Shimeng (Simon) Wang } 2724760752af1c9b7507b51917ff4e4d8eb0491e353Shimeng (Simon) Wang HyphenDict *dict = hnj_hyphen_load_from_buffer(addr, sb.st_size); 2734760752af1c9b7507b51917ff4e4d8eb0491e353Shimeng (Simon) Wang munmap((void *)addr, sb.st_size); 2744760752af1c9b7507b51917ff4e4d8eb0491e353Shimeng (Simon) Wang close(fd); 2754760752af1c9b7507b51917ff4e4d8eb0491e353Shimeng (Simon) Wang 2764760752af1c9b7507b51917ff4e4d8eb0491e353Shimeng (Simon) Wang return dict; 2774760752af1c9b7507b51917ff4e4d8eb0491e353Shimeng (Simon) Wang} 2784760752af1c9b7507b51917ff4e4d8eb0491e353Shimeng (Simon) Wang 2794760752af1c9b7507b51917ff4e4d8eb0491e353Shimeng (Simon) WangHyphenDict * 2804760752af1c9b7507b51917ff4e4d8eb0491e353Shimeng (Simon) Wanghnj_hyphen_load_from_buffer (const char *dict_contents, int dict_length) 2814760752af1c9b7507b51917ff4e4d8eb0491e353Shimeng (Simon) Wang{ 2825db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang HyphenDict *dict[2]; 2835db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang HashTab *hashtab; 2845db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang char buf[MAX_CHARS]; 2855db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang char word[MAX_CHARS]; 2865db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang char pattern[MAX_CHARS]; 2875db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang char * repl; 2885db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang signed char replindex; 2895db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang signed char replcut; 2905db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang int state_num = 0, last_state; 2915db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang int i, j, k; 2925db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang char ch; 2935db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang int found; 2945db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang HashEntry *e; 2955db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang int nextlevel = 0; 2965db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang 2974760752af1c9b7507b51917ff4e4d8eb0491e353Shimeng (Simon) Wang if (dict_contents == NULL) 2985db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang return NULL; 2995db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang 3004760752af1c9b7507b51917ff4e4d8eb0491e353Shimeng (Simon) Wang int dict_ptr = 0; 3015db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang// loading one or two dictionaries (separated by NEXTLEVEL keyword) 3025db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang for (k = 0; k == 0 || (k == 1 && nextlevel); k++) { 3035db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang hashtab = hnj_hash_new (); 3045db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang#ifdef VERBOSE 3055db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang global = hashtab; 3065db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang#endif 3075db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang hnj_hash_insert (hashtab, "", 0); 3085db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang dict[k] = hnj_malloc (sizeof(HyphenDict)); 3095db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang dict[k]->num_states = 1; 3105db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang dict[k]->states = hnj_malloc (sizeof(HyphenState)); 3115db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang dict[k]->states[0].match = NULL; 3125db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang dict[k]->states[0].repl = NULL; 3135db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang dict[k]->states[0].fallback_state = -1; 3145db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang dict[k]->states[0].num_trans = 0; 3155db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang dict[k]->states[0].trans = NULL; 3165db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang dict[k]->nextlevel = NULL; 3175db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang dict[k]->lhmin = 0; 3185db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang dict[k]->rhmin = 0; 3195db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang dict[k]->clhmin = 0; 3205db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang dict[k]->crhmin = 0; 3215db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang 3225db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang /* read in character set info */ 3235db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang if (k == 0) { 3245db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang for (i=0;i<MAX_NAME;i++) dict[k]->cset[i]= 0; 3254760752af1c9b7507b51917ff4e4d8eb0491e353Shimeng (Simon) Wang get_line(dict[k]->cset, sizeof(dict[k]->cset), dict_contents, 3264760752af1c9b7507b51917ff4e4d8eb0491e353Shimeng (Simon) Wang dict_length, &dict_ptr); 3275db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang for (i=0;i<MAX_NAME;i++) 3285db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang if ((dict[k]->cset[i] == '\r') || (dict[k]->cset[i] == '\n')) 3295db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang dict[k]->cset[i] = 0; 3305db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang dict[k]->utf8 = (strcmp(dict[k]->cset, "UTF-8") == 0); 3315db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang } else { 3325db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang strcpy(dict[k]->cset, dict[0]->cset); 3335db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang dict[k]->utf8 = dict[0]->utf8; 3345db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang } 3355db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang 3364760752af1c9b7507b51917ff4e4d8eb0491e353Shimeng (Simon) Wang while (get_line(buf, sizeof(buf), dict_contents, dict_length, 3374760752af1c9b7507b51917ff4e4d8eb0491e353Shimeng (Simon) Wang &dict_ptr) != NULL) 3385db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang { 3395db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang if (buf[0] != '%') 3405db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang { 3415db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang if (strncmp(buf, "NEXTLEVEL", 9) == 0) { 3425db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang nextlevel = 1; 3435db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang break; 3445db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang } else if (strncmp(buf, "LEFTHYPHENMIN", 13) == 0) { 3455db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang dict[k]->lhmin = atoi(buf + 13); 3465db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang continue; 3475db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang } else if (strncmp(buf, "RIGHTHYPHENMIN", 14) == 0) { 3485db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang dict[k]->rhmin = atoi(buf + 14); 3495db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang continue; 3505db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang } else if (strncmp(buf, "COMPOUNDLEFTHYPHENMIN", 21) == 0) { 3515db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang dict[k]->clhmin = atoi(buf + 21); 3525db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang continue; 3535db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang } else if (strncmp(buf, "COMPOUNDRIGHTHYPHENMIN", 22) == 0) { 3545db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang dict[k]->crhmin = atoi(buf + 22); 3555db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang continue; 3565db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang } 3575db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang j = 0; 3585db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang pattern[j] = '0'; 3595db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang repl = strchr(buf, '/'); 3605db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang replindex = 0; 3615db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang replcut = 0; 3625db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang if (repl) { 3635db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang char * index = strchr(repl + 1, ','); 3645db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang *repl = '\0'; 3655db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang if (index) { 3665db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang char * index2 = strchr(index + 1, ','); 3675db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang *index = '\0'; 3685db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang if (index2) { 3695db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang *index2 = '\0'; 3705db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang replindex = (signed char) atoi(index + 1) - 1; 3715db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang replcut = (signed char) atoi(index2 + 1); 3725db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang } 3735db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang } else { 3745db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang hnj_strchomp(repl + 1); 3755db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang replindex = 0; 3765db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang replcut = strlen(buf); 3775db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang } 3785db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang repl = hnj_strdup(repl + 1); 3795db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang } 3805db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang for (i = 0; ((buf[i] > ' ') || (buf[i] < 0)); i++) 3815db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang { 3825db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang if (buf[i] >= '0' && buf[i] <= '9') 3835db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang pattern[j] = buf[i]; 3845db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang else 3855db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang { 3865db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang word[j] = buf[i]; 3875db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang pattern[++j] = '0'; 3885db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang } 3895db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang } 3905db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang word[j] = '\0'; 3915db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang pattern[j + 1] = '\0'; 3925db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang 3935db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang i = 0; 3945db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang if (!repl) { 3955db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang /* Optimize away leading zeroes */ 3965db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang for (; pattern[i] == '0'; i++); 3975db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang } else { 3985db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang if (*word == '.') i++; 3995db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang /* convert UTF-8 char. positions of discretionary hyph. replacements to 8-bit */ 4005db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang if (dict[k]->utf8) { 4015db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang int pu = -1; /* unicode character position */ 4025db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang int ps = -1; /* unicode start position (original replindex) */ 4035db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang int pc = (*word == '.') ? 1: 0; /* 8-bit character position */ 4045db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang for (; pc < (strlen(word) + 1); pc++) { 4055db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang /* beginning of an UTF-8 character (not '10' start bits) */ 4065db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang if ((((unsigned char) word[pc]) >> 6) != 2) pu++; 4075db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang if ((ps < 0) && (replindex == pu)) { 4085db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang ps = replindex; 4095db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang replindex = pc; 4105db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang } 4115db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang if ((ps >= 0) && ((pu - ps) == replcut)) { 4125db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang replcut = (pc - replindex); 4135db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang break; 4145db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang } 4155db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang } 4165db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang if (*word == '.') replindex--; 4175db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang } 4185db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang } 4195db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang 4205db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang#ifdef VERBOSE 4215db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang printf ("word %s pattern %s, j = %d repl: %s\n", word, pattern + i, j, repl); 4225db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang#endif 4235db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang found = hnj_hash_lookup (hashtab, word); 4245db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang state_num = hnj_get_state (dict[k], hashtab, word); 4255db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang dict[k]->states[state_num].match = hnj_strdup (pattern + i); 4265db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang dict[k]->states[state_num].repl = repl; 4275db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang dict[k]->states[state_num].replindex = replindex; 4285db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang if (!replcut) { 4295db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang dict[k]->states[state_num].replcut = strlen(word); 4305db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang } else { 4315db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang dict[k]->states[state_num].replcut = replcut; 4325db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang } 4335db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang 4345db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang /* now, put in the prefix transitions */ 4355db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang for (; found < 0 ;j--) 4365db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang { 4375db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang last_state = state_num; 4385db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang ch = word[j - 1]; 4395db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang word[j - 1] = '\0'; 4405db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang found = hnj_hash_lookup (hashtab, word); 4415db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang state_num = hnj_get_state (dict[k], hashtab, word); 4425db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang hnj_add_trans (dict[k], state_num, last_state, ch); 4435db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang } 4445db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang } 4455db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang } 4465db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang 4475db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang /* Could do unioning of matches here (instead of the preprocessor script). 4485db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang If we did, the pseudocode would look something like this: 4495db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang 4505db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang foreach state in the hash table 4515db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang foreach i = [1..length(state) - 1] 4525db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang state to check is substr (state, i) 4535db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang look it up 4545db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang if found, and if there is a match, union the match in. 4555db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang 4565db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang It's also possible to avoid the quadratic blowup by doing the 4575db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang search in order of increasing state string sizes - then you 4585db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang can break the loop after finding the first match. 4595db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang 4605db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang This step should be optional in any case - if there is a 4615db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang preprocessed rule table, it's always faster to use that. 4625db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang 4635db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang */ 4645db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang 4655db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang /* put in the fallback states */ 4665db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang for (i = 0; i < HASH_SIZE; i++) 4675db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang for (e = hashtab->entries[i]; e; e = e->next) 4685db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang { 4695db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang if (*(e->key)) for (j = 1; 1; j++) 4705db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang { 4715db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang state_num = hnj_hash_lookup (hashtab, e->key + j); 4725db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang if (state_num >= 0) 4735db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang break; 4745db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang } 4755db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang /* KBH: FIXME state 0 fallback_state should always be -1? */ 4765db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang if (e->val) 4775db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang dict[k]->states[e->val].fallback_state = state_num; 4785db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang } 4795db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang#ifdef VERBOSE 4805db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang for (i = 0; i < HASH_SIZE; i++) 4815db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang for (e = hashtab->entries[i]; e; e = e->next) 4825db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang { 4835db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang printf ("%d string %s state %d, fallback=%d\n", i, e->key, e->val, 4845db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang dict[k]->states[e->val].fallback_state); 4855db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang for (j = 0; j < dict[k]->states[e->val].num_trans; j++) 4865db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang printf (" %c->%d\n", dict[k]->states[e->val].trans[j].ch, 4875db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang dict[k]->states[e->val].trans[j].new_state); 4885db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang } 4895db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang#endif 4905db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang 4915db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang#ifndef VERBOSE 4925db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang hnj_hash_free (hashtab); 4935db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang#endif 4945db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang state_num = 0; 4955db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang } 4965db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang if (k == 2) dict[0]->nextlevel = dict[1]; 4975db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang return dict[0]; 4985db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang} 4995db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang 5005db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wangvoid hnj_hyphen_free (HyphenDict *dict) 5015db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang{ 5025db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang int state_num; 5035db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang HyphenState *hstate; 5045db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang 5055db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang for (state_num = 0; state_num < dict->num_states; state_num++) 5065db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang { 5075db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang hstate = &dict->states[state_num]; 5085db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang if (hstate->match) 5095db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang hnj_free (hstate->match); 5105db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang if (hstate->repl) 5115db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang hnj_free (hstate->repl); 5125db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang if (hstate->trans) 5135db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang hnj_free (hstate->trans); 5145db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang } 5155db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang if (dict->nextlevel) hnj_hyphen_free(dict->nextlevel); 5165db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang 5175db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang hnj_free (dict->states); 5185db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang 5195db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang hnj_free (dict); 5205db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang} 5215db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang 5225db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang#define MAX_WORD 256 5235db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang 5245db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wangint hnj_hyphen_hyphenate (HyphenDict *dict, 5255db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang const char *word, int word_size, 5265db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang char *hyphens) 5275db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang{ 5285db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang char prep_word_buf[MAX_WORD]; 5295db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang char *prep_word; 5305db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang int i, j, k; 5315db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang int state; 5325db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang char ch; 5335db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang HyphenState *hstate; 5345db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang char *match; 5355db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang int offset; 5365db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang 5375db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang if (word_size + 3 < MAX_WORD) 5385db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang prep_word = prep_word_buf; 5395db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang else 5405db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang prep_word = hnj_malloc (word_size + 3); 5415db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang 5425db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang j = 0; 5435db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang prep_word[j++] = '.'; 5445db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang 5455db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang for (i = 0; i < word_size; i++) 5465db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang prep_word[j++] = word[i]; 5475db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang 5485db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang prep_word[j++] = '.'; 5495db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang prep_word[j] = '\0'; 5505db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang 5515db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang for (i = 0; i < j; i++) 5525db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang hyphens[i] = '0'; 5535db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang 5545db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang#ifdef VERBOSE 5555db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang printf ("prep_word = %s\n", prep_word); 5565db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang#endif 5575db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang 5585db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang /* now, run the finite state machine */ 5595db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang state = 0; 5605db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang for (i = 0; i < j; i++) 5615db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang { 5625db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang ch = prep_word[i]; 5635db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang for (;;) 5645db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang { 5655db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang 5665db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang if (state == -1) { 5675db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang /* return 1; */ 5685db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang /* KBH: FIXME shouldn't this be as follows? */ 5695db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang state = 0; 5705db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang goto try_next_letter; 5715db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang } 5725db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang 5735db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang#ifdef VERBOSE 5745db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang char *state_str; 5755db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang state_str = get_state_str (state); 5765db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang 5775db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang for (k = 0; k < i - strlen (state_str); k++) 5785db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang putchar (' '); 5795db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang printf ("%s", state_str); 5805db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang#endif 5815db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang 5825db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang hstate = &dict->states[state]; 5835db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang for (k = 0; k < hstate->num_trans; k++) 5845db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang if (hstate->trans[k].ch == ch) 5855db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang { 5865db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang state = hstate->trans[k].new_state; 5875db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang goto found_state; 5885db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang } 5895db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang state = hstate->fallback_state; 5905db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang#ifdef VERBOSE 5915db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang printf (" falling back, fallback_state %d\n", state); 5925db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang#endif 5935db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang } 5945db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang found_state: 5955db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang#ifdef VERBOSE 5965db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang printf ("found state %d\n",state); 5975db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang#endif 5985db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang /* Additional optimization is possible here - especially, 5995db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang elimination of trailing zeroes from the match. Leading zeroes 6005db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang have already been optimized. */ 6015db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang match = dict->states[state].match; 6025db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang /* replacing rules not handled by hyphen_hyphenate() */ 6035db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang if (match && !dict->states[state].repl) 6045db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang { 6055db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang offset = i + 1 - strlen (match); 6065db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang#ifdef VERBOSE 6075db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang for (k = 0; k < offset; k++) 6085db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang putchar (' '); 6095db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang printf ("%s\n", match); 6105db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang#endif 6115db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang /* This is a linear search because I tried a binary search and 6125db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang found it to be just a teeny bit slower. */ 6135db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang for (k = 0; match[k]; k++) 6145db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang if (hyphens[offset + k] < match[k]) 6155db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang hyphens[offset + k] = match[k]; 6165db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang } 6175db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang 6185db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang /* KBH: we need this to make sure we keep looking in a word */ 6195db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang /* for patterns even if the current character is not known in state 0 */ 6205db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang /* since patterns for hyphenation may occur anywhere in the word */ 6215db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang try_next_letter: ; 6225db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang 6235db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang } 6245db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang#ifdef VERBOSE 6255db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang for (i = 0; i < j; i++) 6265db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang putchar (hyphens[i]); 6275db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang putchar ('\n'); 6285db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang#endif 6295db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang 6305db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang for (i = 0; i < j - 4; i++) 6315db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang#if 0 6325db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang if (hyphens[i + 1] & 1) 6335db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang hyphens[i] = '-'; 6345db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang#else 6355db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang hyphens[i] = hyphens[i + 1]; 6365db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang#endif 6375db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang hyphens[0] = '0'; 6385db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang for (; i < word_size; i++) 6395db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang hyphens[i] = '0'; 6405db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang hyphens[word_size] = '\0'; 6415db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang 6425db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang if (prep_word != prep_word_buf) 6435db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang hnj_free (prep_word); 6445db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang 6455db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang return 0; 6465db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang} 6475db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang 6485db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang/* character length of the first n byte of the input word */ 6495db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wangint hnj_hyphen_strnlen(const char * word, int n, int utf8) 6505db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang{ 6515db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang int i = 0; 6525db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang int j = 0; 6535db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang while (j < n && word[j] != '\0') { 6545db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang i++; 6555db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang for (j++; utf8 && (word[j] & 0xc0) == 0x80; j++); 6565db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang } 6575db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang return i; 6585db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang} 6595db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang 6605db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wangint hnj_hyphen_lhmin(int utf8, const char *word, int word_size, char * hyphens, 6615db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang char *** rep, int ** pos, int ** cut, int lhmin) 6625db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang{ 6635db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang int i, j; 6645db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang for (i = 1, j = 0; i < lhmin && word[j] != '\0'; i++) do { 6655db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang // check length of the non-standard part 6665db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang if (*rep && *pos && *cut && (*rep)[j]) { 6675db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang char * rh = strchr((*rep)[j], '='); 6685db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang if (rh && (hnj_hyphen_strnlen(word, j - (*pos)[j] + 1, utf8) + 6695db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang hnj_hyphen_strnlen((*rep)[j], rh - (*rep)[j], utf8)) < lhmin) { 6705db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang free((*rep)[j]); 6715db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang (*rep)[j] = NULL; 6725db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang hyphens[j] = '0'; 6735db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang } 6745db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang } else { 6755db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang hyphens[j] = '0'; 6765db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang } 6775db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang j++; 6785db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang } while (utf8 && (word[j + 1] & 0xc0) == 0xc0); 6795db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang return 0; 6805db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang} 6815db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang 6825db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wangint hnj_hyphen_rhmin(int utf8, const char *word, int word_size, char * hyphens, 6835db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang char *** rep, int ** pos, int ** cut, int rhmin) 6845db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang{ 6855db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang int i; 6865db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang int j = word_size - 2; 6875db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang for (i = 1; i < rhmin && j > 0; j--) { 6885db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang // check length of the non-standard part 6895db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang if (*rep && *pos && *cut && (*rep)[j]) { 6905db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang char * rh = strchr((*rep)[j], '='); 6915db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang if (rh && (hnj_hyphen_strnlen(word + j - (*pos)[j] + (*cut)[j] + 1, 100, utf8) + 6925db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang hnj_hyphen_strnlen(rh + 1, strlen(rh + 1), utf8)) < rhmin) { 6935db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang free((*rep)[j]); 6945db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang (*rep)[j] = NULL; 6955db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang hyphens[j] = '0'; 6965db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang } 6975db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang } else { 6985db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang hyphens[j] = '0'; 6995db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang } 7005db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang if (!utf8 || (word[j] & 0xc0) != 0xc0) i++; 7015db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang } 7025db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang return 0; 7035db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang} 7045db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang 7055db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang// recursive function for compound level hyphenation 7065db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wangint hnj_hyphen_hyph_(HyphenDict *dict, const char *word, int word_size, 7075db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang char * hyphens, char *** rep, int ** pos, int ** cut, 7085db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang int clhmin, int crhmin, int lend, int rend) 7095db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang{ 7105db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang char prep_word_buf[MAX_WORD]; 7115db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang char *prep_word; 7125db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang int i, j, k; 7135db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang int state; 7145db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang char ch; 7155db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang HyphenState *hstate; 7165db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang char *match; 7175db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang char *repl; 7185db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang signed char replindex; 7195db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang signed char replcut; 7205db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang int offset; 7215db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang int matchlen_buf[MAX_CHARS]; 7225db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang int matchindex_buf[MAX_CHARS]; 7235db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang char * matchrepl_buf[MAX_CHARS]; 7245db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang int * matchlen; 7255db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang int * matchindex; 7265db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang char ** matchrepl; 7275db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang int isrepl = 0; 7285db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang int nHyphCount; 7295db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang 7305db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang if (word_size + 3 < MAX_CHARS) { 7315db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang prep_word = prep_word_buf; 7325db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang matchlen = matchlen_buf; 7335db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang matchindex = matchindex_buf; 7345db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang matchrepl = matchrepl_buf; 7355db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang } else { 7365db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang prep_word = hnj_malloc (word_size + 3); 7375db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang matchlen = hnj_malloc ((word_size + 3) * sizeof(int)); 7385db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang matchindex = hnj_malloc ((word_size + 3) * sizeof(int)); 7395db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang matchrepl = hnj_malloc ((word_size + 3) * sizeof(char *)); 7405db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang } 7415db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang 7425db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang j = 0; 7435db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang prep_word[j++] = '.'; 7445db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang 7455db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang for (i = 0; i < word_size; i++) 7465db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang prep_word[j++] = word[i]; 7475db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang 7485db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang prep_word[j++] = '.'; 7495db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang prep_word[j] = '\0'; 7505db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang 7515db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang for (i = 0; i < j; i++) 7525db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang hyphens[i] = '0'; 7535db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang 7545db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang#ifdef VERBOSE 7555db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang printf ("prep_word = %s\n", prep_word); 7565db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang#endif 7575db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang 7585db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang /* now, run the finite state machine */ 7595db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang state = 0; 7605db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang for (i = 0; i < j; i++) 7615db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang { 7625db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang ch = prep_word[i]; 7635db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang for (;;) 7645db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang { 7655db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang 7665db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang if (state == -1) { 7675db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang /* return 1; */ 7685db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang /* KBH: FIXME shouldn't this be as follows? */ 7695db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang state = 0; 7705db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang goto try_next_letter; 7715db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang } 7725db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang 7735db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang#ifdef VERBOSE 7745db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang char *state_str; 7755db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang state_str = get_state_str (state); 7765db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang 7775db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang for (k = 0; k < i - strlen (state_str); k++) 7785db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang putchar (' '); 7795db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang printf ("%s", state_str); 7805db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang#endif 7815db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang 7825db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang hstate = &dict->states[state]; 7835db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang for (k = 0; k < hstate->num_trans; k++) 7845db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang if (hstate->trans[k].ch == ch) 7855db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang { 7865db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang state = hstate->trans[k].new_state; 7875db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang goto found_state; 7885db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang } 7895db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang state = hstate->fallback_state; 7905db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang#ifdef VERBOSE 7915db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang printf (" falling back, fallback_state %d\n", state); 7925db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang#endif 7935db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang } 7945db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang found_state: 7955db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang#ifdef VERBOSE 7965db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang printf ("found state %d\n",state); 7975db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang#endif 7985db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang /* Additional optimization is possible here - especially, 7995db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang elimination of trailing zeroes from the match. Leading zeroes 8005db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang have already been optimized. */ 8015db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang match = dict->states[state].match; 8025db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang repl = dict->states[state].repl; 8035db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang replindex = dict->states[state].replindex; 8045db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang replcut = dict->states[state].replcut; 8055db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang /* replacing rules not handled by hyphen_hyphenate() */ 8065db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang if (match) 8075db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang { 8085db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang offset = i + 1 - strlen (match); 8095db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang#ifdef VERBOSE 8105db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang for (k = 0; k < offset; k++) 8115db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang putchar (' '); 8125db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang printf ("%s (%s)\n", match, repl); 8135db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang#endif 8145db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang if (repl) { 8155db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang if (!isrepl) for(; isrepl < word_size; isrepl++) { 8165db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang matchrepl[isrepl] = NULL; 8175db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang matchindex[isrepl] = -1; 8185db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang } 8195db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang matchlen[offset + replindex] = replcut; 8205db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang } 8215db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang /* This is a linear search because I tried a binary search and 8225db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang found it to be just a teeny bit slower. */ 8235db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang for (k = 0; match[k]; k++) { 8245db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang if ((hyphens[offset + k] < match[k])) { 8255db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang hyphens[offset + k] = match[k]; 8265db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang if (match[k]&1) { 8275db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang matchrepl[offset + k] = repl; 8285db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang if (repl && (k >= replindex) && (k <= replindex + replcut)) { 8295db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang matchindex[offset + replindex] = offset + k; 8305db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang } 8315db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang } 8325db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang } 8335db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang } 8345db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang 8355db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang } 8365db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang 8375db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang /* KBH: we need this to make sure we keep looking in a word */ 8385db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang /* for patterns even if the current character is not known in state 0 */ 8395db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang /* since patterns for hyphenation may occur anywhere in the word */ 8405db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang try_next_letter: ; 8415db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang 8425db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang } 8435db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang#ifdef VERBOSE 8445db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang for (i = 0; i < j; i++) 8455db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang putchar (hyphens[i]); 8465db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang putchar ('\n'); 8475db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang#endif 8485db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang 8495db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang for (i = 0; i < j - 3; i++) 8505db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang#if 0 8515db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang if (hyphens[i + 1] & 1) 8525db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang hyphens[i] = '-'; 8535db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang#else 8545db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang hyphens[i] = hyphens[i + 1]; 8555db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang#endif 8565db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang for (; i < word_size; i++) 8575db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang hyphens[i] = '0'; 8585db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang hyphens[word_size] = '\0'; 8595db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang 8605db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang /* now create a new char string showing hyphenation positions */ 8615db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang /* count the hyphens and allocate space for the new hyphenated string */ 8625db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang nHyphCount = 0; 8635db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang for (i = 0; i < word_size; i++) 8645db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang if (hyphens[i]&1) 8655db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang nHyphCount++; 8665db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang j = 0; 8675db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang for (i = 0; i < word_size; i++) { 8685db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang if (isrepl && (matchindex[i] >= 0) && matchrepl[matchindex[i]]) { 8695db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang if (rep && pos && cut) { 8705db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang if (!*rep && !*pos && !*cut) { 8715db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang int k; 8725db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang *rep = (char **) malloc(sizeof(char *) * word_size); 8735db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang *pos = (int *) malloc(sizeof(int) * word_size); 8745db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang *cut = (int *) malloc(sizeof(int) * word_size); 8755db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang for (k = 0; k < word_size; k++) { 8765db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang (*rep)[k] = NULL; 8775db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang (*pos)[k] = 0; 8785db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang (*cut)[k] = 0; 8795db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang } 8805db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang } 8815db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang (*rep)[matchindex[i] - 1] = hnj_strdup(matchrepl[matchindex[i]]); 8825db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang (*pos)[matchindex[i] - 1] = matchindex[i] - i; 8835db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang (*cut)[matchindex[i] - 1] = matchlen[i]; 8845db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang } 8855db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang j += strlen(matchrepl[matchindex[i]]); 8865db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang i += matchlen[i] - 1; 8875db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang } 8885db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang } 8895db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang 8905db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang if (matchrepl != matchrepl_buf) { 8915db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang hnj_free (matchrepl); 8925db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang hnj_free (matchlen); 8935db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang hnj_free (matchindex); 8945db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang } 8955db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang 8965db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang // recursive hyphenation of the first (compound) level segments 8975db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang if (dict->nextlevel) { 8985db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang char * rep2_buf[MAX_WORD]; 8995db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang int pos2_buf[MAX_WORD]; 9005db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang int cut2_buf[MAX_WORD]; 9015db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang char hyphens2_buf[MAX_WORD]; 9025db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang char ** rep2; 9035db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang int * pos2; 9045db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang int * cut2; 9055db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang char * hyphens2; 9065db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang int begin = 0; 9075db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang if (word_size < MAX_CHARS) { 9085db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang rep2 = rep2_buf; 9095db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang pos2 = pos2_buf; 9105db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang cut2 = cut2_buf; 9115db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang hyphens2 = hyphens2_buf; 9125db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang } else { 9135db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang rep2 = hnj_malloc (word_size * sizeof(char *)); 9145db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang pos2 = hnj_malloc (word_size * sizeof(int)); 9155db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang cut2 = hnj_malloc (word_size * sizeof(int)); 9165db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang hyphens2 = hnj_malloc (word_size); 9175db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang } 9185db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang for (i = 0; i < word_size; i++) rep2[i] = NULL; 9194760752af1c9b7507b51917ff4e4d8eb0491e353Shimeng (Simon) Wang for (i = 0; i < word_size; i++) 9204760752af1c9b7507b51917ff4e4d8eb0491e353Shimeng (Simon) Wang if (hyphens[i]&1 || (begin > 0 && i + 1 == word_size)) { 9215db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang if (i - begin > 1) { 9225db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang int hyph = 0; 9235db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang prep_word[i + 2] = '\0'; 9245db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang /* non-standard hyphenation at compound boundary (Schiffahrt) */ 9255db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang if (*rep && *pos && *cut && (*rep)[i]) { 9265db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang char * l = strchr((*rep)[i], '='); 9275db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang strcpy(prep_word + 2 + i - (*pos)[i], (*rep)[i]); 9285db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang if (l) { 9295db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang hyph = (l - (*rep)[i]) - (*pos)[i]; 9305db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang prep_word[2 + i + hyph] = '\0'; 9315db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang } 9325db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang } 9335db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang hnj_hyphen_hyph_(dict, prep_word + begin + 1, i - begin + 1 + hyph, 9345db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang hyphens2, &rep2, &pos2, &cut2, clhmin, 9355db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang crhmin, (begin > 0 ? 0 : lend), (hyphens[i]&1 ? 0 : rend)); 9365db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang for (j = 0; j < i - begin - 1; j++) { 9375db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang hyphens[begin + j] = hyphens2[j]; 9385db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang if (rep2[j] && rep && pos && cut) { 9395db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang if (!*rep && !*pos && !*cut) { 9405db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang int k; 9415db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang *rep = (char **) malloc(sizeof(char *) * word_size); 9425db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang *pos = (int *) malloc(sizeof(int) * word_size); 9435db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang *cut = (int *) malloc(sizeof(int) * word_size); 9445db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang for (k = 0; k < word_size; k++) { 9455db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang (*rep)[k] = NULL; 9465db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang (*pos)[k] = 0; 9475db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang (*cut)[k] = 0; 9485db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang } 9495db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang } 9505db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang (*rep)[begin + j] = rep2[j]; 9515db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang (*pos)[begin + j] = pos2[j]; 9525db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang (*cut)[begin + j] = cut2[j]; 9535db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang } 9545db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang } 9555db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang prep_word[i + 2] = word[i + 1]; 9565db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang if (*rep && *pos && *cut && (*rep)[i]) { 9575db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang strcpy(prep_word + 1, word); 9585db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang } 9595db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang } 9605db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang begin = i + 1; 9615db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang for (j = 0; j < word_size; j++) rep2[j] = NULL; 9625db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang } 9635db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang 9645db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang // non-compound 9655db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang if (begin == 0) { 9665db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang hnj_hyphen_hyph_(dict->nextlevel, word, word_size, 9675db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang hyphens, rep, pos, cut, clhmin, crhmin, lend, rend); 9685db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang if (!lend) hnj_hyphen_lhmin(dict->utf8, word, word_size, hyphens, 9695db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang rep, pos, cut, clhmin); 9705db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang if (!rend) hnj_hyphen_rhmin(dict->utf8, word, word_size, hyphens, 9715db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang rep, pos, cut, crhmin); 9725db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang } 9735db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang 9745db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang if (rep2 != rep2_buf) { 9755db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang free(rep2); 9765db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang free(cut2); 9775db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang free(pos2); 9785db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang free(hyphens2); 9795db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang } 9805db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang } 9815db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang 9825db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang if (prep_word != prep_word_buf) hnj_free (prep_word); 9835db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang return 0; 9845db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang} 9855db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang 9865db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang/* UTF-8 normalization of hyphen and non-standard positions */ 9875db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wangint hnj_hyphen_norm(const char *word, int word_size, char * hyphens, 9885db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang char *** rep, int ** pos, int ** cut) 9895db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang{ 9905db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang if ((((unsigned char) word[0]) >> 6) == 2) { 9915db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang fprintf(stderr, "error - bad, non UTF-8 input: %s\n", word); 9925db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang return 1; 9935db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang } 9945db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang 9955db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang /* calculate UTF-8 character positions */ 9965db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang int i, j, k; 9975db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang for (i = 0, j = -1; i < word_size; i++) { 9985db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang /* beginning of an UTF-8 character (not '10' start bits) */ 9995db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang if ((((unsigned char) word[i]) >> 6) != 2) j++; 10005db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang hyphens[j] = hyphens[i]; 10015db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang if (rep && pos && cut && *rep && *pos && *cut) { 10025db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang int l = (*pos)[i]; 10035db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang (*pos)[j] = 0; 10045db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang for (k = 0; k < l; k++) { 10055db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang if ((((unsigned char) word[i - k]) >> 6) != 2) (*pos)[j]++; 10065db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang } 10075db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang k = i - l + 1; 10085db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang l = k + (*cut)[i]; 10095db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang (*cut)[j] = 0; 10105db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang for (; k < l; k++) { 10115db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang if ((((unsigned char) word[k]) >> 6) != 2) (*cut)[j]++; 10125db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang } 10135db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang (*rep)[j] = (*rep)[i]; 10145db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang if (j < i) { 10155db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang (*rep)[i] = NULL; 10165db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang (*pos)[i] = 0; 10175db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang (*cut)[i] = 0; 10185db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang } 10195db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang } 10205db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang } 10215db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang hyphens[j + 1] = '\0'; 10225db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang return 0; 10235db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang} 10245db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang 10255db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang/* get the word with all possible hyphenations (output: hyphword) */ 10265db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wangvoid hnj_hyphen_hyphword(const char * word, int l, const char * hyphens, 10275db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang char * hyphword, char *** rep, int ** pos, int ** cut) 10285db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang{ 10295db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang int i, j; 10305db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang for (i = 0, j = 0; i < l; i++, j++) { 10315db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang if (hyphens[i]&1) { 10325db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang hyphword[j] = word[i]; 10335db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang if (*rep && *pos && *cut && (*rep)[i]) { 10345db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang strcpy(hyphword + j - (*pos)[i] + 1, (*rep)[i]); 10355db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang j += strlen((*rep)[i]) - (*pos)[i]; 10365db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang i += (*cut)[i] - (*pos)[i]; 10375db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang } else hyphword[++j] = '='; 10385db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang } else hyphword[j] = word[i]; 10395db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang } 10405db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang hyphword[j] = '\0'; 10415db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang} 10425db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang 10435db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang 10445db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang/* main api function with default hyphenmin parameters */ 10455db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wangint hnj_hyphen_hyphenate2 (HyphenDict *dict, 10465db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang const char *word, int word_size, char * hyphens, 10475db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang char *hyphword, char *** rep, int ** pos, int ** cut) 10485db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang{ 10495db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang hnj_hyphen_hyph_(dict, word, word_size, hyphens, rep, pos, cut, 10505db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang dict->clhmin, dict->crhmin, 1, 1); 10515db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang hnj_hyphen_lhmin(dict->utf8, word, word_size, 10525db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang hyphens, rep, pos, cut, (dict->lhmin > 0 ? dict->lhmin : 2)); 10535db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang hnj_hyphen_rhmin(dict->utf8, word, word_size, 10545db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang hyphens, rep, pos, cut, (dict->rhmin > 0 ? dict->rhmin : 2)); 10555db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang if (hyphword) hnj_hyphen_hyphword(word, word_size, hyphens, hyphword, rep, pos, cut); 10565db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang if (dict->utf8) return hnj_hyphen_norm(word, word_size, hyphens, rep, pos, cut); 10575db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang return 0; 10585db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang} 10595db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang 10605db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang/* previous main api function with hyphenmin parameters */ 10615db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wangint hnj_hyphen_hyphenate3 (HyphenDict *dict, 10625db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang const char *word, int word_size, char * hyphens, 10635db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang char *hyphword, char *** rep, int ** pos, int ** cut, 10645db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang int lhmin, int rhmin, int clhmin, int crhmin) 10655db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang{ 10665db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang lhmin = (lhmin > 0 ? lhmin : dict->lhmin); 10675db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang rhmin = (rhmin > 0 ? rhmin : dict->rhmin); 10685db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang hnj_hyphen_hyph_(dict, word, word_size, hyphens, rep, pos, cut, 10695db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang clhmin, crhmin, 1, 1); 10705db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang hnj_hyphen_lhmin(dict->utf8, word, word_size, hyphens, 10715db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang rep, pos, cut, (lhmin > 0 ? lhmin : 2)); 10725db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang hnj_hyphen_rhmin(dict->utf8, word, word_size, hyphens, 10735db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang rep, pos, cut, (rhmin > 0 ? rhmin : 2)); 10745db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang if (hyphword) hnj_hyphen_hyphword(word, word_size, hyphens, hyphword, rep, pos, cut); 10755db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang if (dict->utf8) return hnj_hyphen_norm(word, word_size, hyphens, rep, pos, cut); 10765db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang return 0; 10775db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang} 1078