15db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang#include <string.h> 25db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang#include <stdlib.h> 35db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang#include <stdio.h> 45db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang 55db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang#include "hyphen.h" 65db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang#include "csutil.h" 75db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang 85db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang#define BUFSIZE 1000 95db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang 105db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wangvoid help() { 115db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang fprintf(stderr,"correct syntax is:\n"); 125db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang fprintf(stderr,"example [-d | -dd] hyphen_dictionary_file file_of_words_to_check\n"); 135db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang fprintf(stderr,"-o = use old algorithm (without non-standard hyphenation)\n"); 145db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang fprintf(stderr,"-d = hyphenation with listing of the possible hyphenations\n"); 155db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang} 165db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang 175db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang/* get a pointer to the nth 8-bit or UTF-8 character of the word */ 185db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wangchar * hindex(char * word, int n, int utf8) { 195db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang int j = 0; 205db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang while (j < n) { 215db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang j++; 225db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang word++; 235db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang while (utf8 && ((((unsigned char) *word) >> 6) == 2)) word++; 245db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang } 255db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang return word; 265db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang} 275db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang 285db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang/* list possible hyphenations with -dd option (example for the usage of the hyphenate2() function) */ 295db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wangvoid single_hyphenations(char * word, char * hyphen, char ** rep, int * pos, int * cut, int utf8) { 305db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang int i, k, j = 0; 315db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang char r; 325db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang for (i = 0; (i + 1) < strlen(word); i++) { 335db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang if (utf8 && ((((unsigned char) word[i]) >> 6) == 2)) continue; 345db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang if ((hyphen[j] & 1)) { 355db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang if (rep && rep[j]) { 365db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang k = hindex(word, j - pos[j] + 1, utf8) - word; 375db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang r = word[k]; 385db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang word[k] = 0; 395db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang printf(" - %s%s", word, rep[j]); 405db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang word[k] = r; 415db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang printf("%s\n", hindex(word + k, cut[j], utf8)); 425db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang } else { 435db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang k = hindex(word, j + 1, utf8) - word; 445db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang r = word[k]; 455db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang word[k] = 0; 465db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang printf(" - %s=", word); 475db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang word[k] = r; 485db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang printf("%s\n", word + k); 495db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang } 505db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang } 515db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang j++; 525db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang } 535db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang} 545db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang 555db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wangint 565db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wangmain(int argc, char** argv) 575db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang{ 585db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang 595db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang HyphenDict *dict; 605db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang int df; 615db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang int wtc; 625db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang FILE* wtclst; 635db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang int k, n, i, j, c; 645db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang char buf[BUFSIZE + 1]; 655db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang int nHyphCount; 665db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang char *hyphens; 675db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang char *lcword; 685db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang char *hyphword; 695db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang char hword[BUFSIZE * 2]; 705db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang int arg = 1; 715db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang int optd = 1; 725db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang int optdd = 0; 735db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang char ** rep; 745db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang int * pos; 755db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang int * cut; 765db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang 775db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang /* first parse the command line options */ 785db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang /* arg1 - hyphen dictionary file, arg2 - file of words to check */ 795db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang 805db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang if (argv[arg]) { 815db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang if (strcmp(argv[arg], "-o") == 0) { 825db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang optd = 0; 835db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang arg++; 845db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang } 855db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang if (argv[arg] && strcmp(argv[arg], "-d") == 0) { 865db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang optd = 1; 875db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang optdd = 1; 885db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang arg++; 895db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang } 905db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang } 915db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang 925db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang if (argv[arg]) { 935db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang df = arg++; 945db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang } else { 955db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang help(); 965db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang exit(1); 975db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang } 985db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang 995db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang if (argv[arg]) { 1005db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang wtc = arg++; 1015db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang } else { 1025db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang help(); 1035db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang exit(1); 1045db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang } 1055db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang 1065db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang /* load the hyphenation dictionary */ 1075db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang if ((dict = hnj_hyphen_load(argv[df])) == NULL) { 1085db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang fprintf(stderr, "Couldn't find file %s\n", argv[df]); 1095db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang fflush(stderr); 1105db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang exit(1); 1115db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang } 1125db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang 1135db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang /* open the words to check list */ 1145db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang wtclst = fopen(argv[wtc],"r"); 1155db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang if (!wtclst) { 1165db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang fprintf(stderr,"Error - could not open file of words to check\n"); 1175db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang exit(1); 1185db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang } 1195db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang 1205db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang 1215db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang /* now read each word from the wtc file */ 1225db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang while(fgets(buf,BUFSIZE,wtclst)) { 1235db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang k = strlen(buf); 1245db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang if (buf[k - 1] == '\n') buf[k - 1] = '\0'; 1255db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang if (*buf && buf[k - 2] == '\r') buf[k-- - 2] = '\0'; 1265db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang 1275db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang /* set aside some buffers to hold lower cased */ 1285db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang /* and hyphen information */ 1295db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang lcword = (char *) malloc(k+1); 1305db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang hyphens = (char *)malloc(k+5); 1315db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang if (dict->utf8) { 1325db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang strcpy(lcword, buf); 1335db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang } else { 1345db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang enmkallsmall(lcword,buf,dict->cset); 1355db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang } 1365db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang 1375db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang /* first remove any trailing periods */ 1385db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang n = k-1; 1395db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang while((n >=0) && (lcword[n] == '.')) n--; 1405db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang n++; 1415db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang 1425db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang /* now actually try to hyphenate the word */ 1435db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang 1445db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang rep = NULL; 1455db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang pos = NULL; 1465db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang cut = NULL; 1475db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang hword[0] = '\0'; 1485db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang 1495db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang if ((!optd && hnj_hyphen_hyphenate(dict, lcword, n-1, hyphens)) || 1505db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang (optd && hnj_hyphen_hyphenate2(dict, lcword, n-1, hyphens, hword, &rep, &pos, &cut))) { 1515db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang free(hyphens); 1525db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang free(lcword); 1535db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang fprintf(stderr, "hyphenation error\n"); 1545db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang exit(1); 1555db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang } 1565db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang 1575db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang if (!optd) { 1585db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang /* now backfill hyphens[] for any removed periods */ 1595db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang for (c = n; c < k; c++) hyphens[c] = '0'; 1605db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang hyphens[k] = '\0'; 1615db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang 1625db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang /* now create a new char string showing hyphenation positions */ 1635db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang /* count the hyphens and allocate space for the new hypehanted string */ 1645db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang nHyphCount = 0; 1655db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang for (i = 0; i < n; i++) 1665db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang if (hyphens[i]&1) 1675db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang nHyphCount++; 1685db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang hyphword = (char *) malloc(k+1+nHyphCount); 1695db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang j = 0; 1705db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang for (i = 0; i < n; i++) { 1715db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang hyphword[j++] = buf[i]; 1725db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang if (hyphens[i]&1) { 1735db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang hyphword[j++] = '-'; 1745db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang } 1755db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang } 1765db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang hyphword[j] = '\0'; 1775db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang fprintf(stdout,"%s\n",hyphword); 1785db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang fflush(stdout); 1795db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang free(hyphword); 1805db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang } else { 1815db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang fprintf(stdout,"%s\n", hword); 1825db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang if (optdd) single_hyphenations(lcword, hyphens, rep, pos, cut, dict->utf8); 1835db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang if (rep) { 1845db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang for (i = 0; i < n - 1; i++) { 1855db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang if (rep[i]) free(rep[i]); 1865db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang } 1875db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang free(rep); 1885db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang free(pos); 1895db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang free(cut); 1905db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang } 1915db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang } 1925db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang free(hyphens); 1935db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang free(lcword); 1945db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang } 1955db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang 1965db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang fclose(wtclst); 1975db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang hnj_hyphen_free(dict); 1985db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang return 0; 1995db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang} 200