15db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang#include <string.h>
25db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang#include <stdlib.h>
35db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang#include <stdio.h>
45db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang
55db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang#include "hyphen.h"
65db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang#include "csutil.h"
75db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang
85db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang#define BUFSIZE 1000
95db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang
105db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wangvoid help() {
115db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    fprintf(stderr,"correct syntax is:\n");
125db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    fprintf(stderr,"example [-d | -dd] hyphen_dictionary_file file_of_words_to_check\n");
135db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    fprintf(stderr,"-o = use old algorithm (without non-standard hyphenation)\n");
145db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    fprintf(stderr,"-d = hyphenation with listing of the possible hyphenations\n");
155db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang}
165db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang
175db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang/* get a pointer to the nth 8-bit or UTF-8 character of the word */
185db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wangchar * hindex(char * word, int n, int utf8) {
195db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    int j = 0;
205db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    while (j < n) {
215db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang        j++;
225db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang        word++;
235db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang        while (utf8 && ((((unsigned char) *word) >> 6) == 2)) word++;
245db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    }
255db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    return word;
265db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang}
275db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang
285db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang/* list possible hyphenations with -dd option (example for the usage of the hyphenate2() function) */
295db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wangvoid single_hyphenations(char * word, char * hyphen, char ** rep, int * pos, int * cut, int utf8) {
305db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    int i, k, j = 0;
315db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    char r;
325db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    for (i = 0; (i + 1) < strlen(word); i++) {
335db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang        if (utf8 && ((((unsigned char) word[i]) >> 6) == 2)) continue;
345db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang        if ((hyphen[j] & 1)) {
355db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang            if (rep && rep[j]) {
365db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang              k = hindex(word, j - pos[j] + 1, utf8) - word;
375db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang              r = word[k];
385db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang              word[k] = 0;
395db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang              printf(" - %s%s", word, rep[j]);
405db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang              word[k] = r;
415db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang              printf("%s\n", hindex(word + k, cut[j], utf8));
425db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang            } else {
435db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang              k = hindex(word, j + 1, utf8) - word;
445db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang              r = word[k];
455db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang              word[k] = 0;
465db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang              printf(" - %s=", word);
475db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang              word[k] = r;
485db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang              printf("%s\n", word + k);
495db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang            }
505db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang        }
515db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang        j++;
525db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    }
535db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang}
545db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang
555db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wangint
565db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wangmain(int argc, char** argv)
575db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang{
585db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang
595db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    HyphenDict *dict;
605db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    int df;
615db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    int wtc;
625db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    FILE* wtclst;
635db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    int k, n, i, j, c;
645db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    char buf[BUFSIZE + 1];
655db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    int  nHyphCount;
665db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    char *hyphens;
675db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    char *lcword;
685db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    char *hyphword;
695db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    char hword[BUFSIZE * 2];
705db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    int arg = 1;
715db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    int optd = 1;
725db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    int optdd = 0;
735db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    char ** rep;
745db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    int * pos;
755db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    int * cut;
765db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang
775db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang  /* first parse the command line options */
785db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang  /* arg1 - hyphen dictionary file, arg2 - file of words to check */
795db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang
805db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang  if (argv[arg]) {
815db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang       if (strcmp(argv[arg], "-o") == 0) {
825db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang            optd = 0;
835db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang            arg++;
845db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang       }
855db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang       if (argv[arg] && strcmp(argv[arg], "-d") == 0) {
865db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang            optd = 1;
875db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang            optdd = 1;
885db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang            arg++;
895db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang       }
905db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang  }
915db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang
925db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang  if (argv[arg]) {
935db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang       df = arg++;
945db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang  } else {
955db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    help();
965db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    exit(1);
975db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang  }
985db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang
995db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang  if (argv[arg]) {
1005db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang       wtc = arg++;
1015db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang  } else {
1025db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    help();
1035db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    exit(1);
1045db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang  }
1055db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang
1065db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang  /* load the hyphenation dictionary */
1075db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang  if ((dict = hnj_hyphen_load(argv[df])) == NULL) {
1085db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang       fprintf(stderr, "Couldn't find file %s\n", argv[df]);
1095db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang       fflush(stderr);
1105db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang       exit(1);
1115db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang  }
1125db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang
1135db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang  /* open the words to check list */
1145db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang  wtclst = fopen(argv[wtc],"r");
1155db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang  if (!wtclst) {
1165db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    fprintf(stderr,"Error - could not open file of words to check\n");
1175db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    exit(1);
1185db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang  }
1195db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang
1205db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang
1215db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang  /* now read each word from the wtc file */
1225db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    while(fgets(buf,BUFSIZE,wtclst)) {
1235db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang       k = strlen(buf);
1245db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang       if (buf[k - 1] == '\n') buf[k - 1] = '\0';
1255db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang       if (*buf && buf[k - 2] == '\r') buf[k-- - 2] = '\0';
1265db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang
1275db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang       /* set aside some buffers to hold lower cased */
1285db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang       /* and hyphen information */
1295db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang       lcword = (char *) malloc(k+1);
1305db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang       hyphens = (char *)malloc(k+5);
1315db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang       if (dict->utf8) {
1325db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang         strcpy(lcword, buf);
1335db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang       } else {
1345db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang         enmkallsmall(lcword,buf,dict->cset);
1355db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang       }
1365db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang
1375db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang       /* first remove any trailing periods */
1385db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang       n = k-1;
1395db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang       while((n >=0) && (lcword[n] == '.')) n--;
1405db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang       n++;
1415db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang
1425db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang       /* now actually try to hyphenate the word */
1435db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang
1445db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang       rep = NULL;
1455db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang       pos = NULL;
1465db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang       cut = NULL;
1475db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang       hword[0] = '\0';
1485db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang
1495db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang       if ((!optd && hnj_hyphen_hyphenate(dict, lcword, n-1, hyphens)) ||
1505db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang	    (optd && hnj_hyphen_hyphenate2(dict, lcword, n-1, hyphens, hword, &rep, &pos, &cut))) {
1515db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang             free(hyphens);
1525db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang             free(lcword);
1535db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang             fprintf(stderr, "hyphenation error\n");
1545db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang             exit(1);
1555db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang       }
1565db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang
1575db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang       if (!optd) {
1585db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang         /* now backfill hyphens[] for any removed periods */
1595db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang         for (c = n; c < k; c++) hyphens[c] = '0';
1605db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang         hyphens[k] = '\0';
1615db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang
1625db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang         /* now create a new char string showing hyphenation positions */
1635db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang         /* count the hyphens and allocate space for the new hypehanted string */
1645db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang         nHyphCount = 0;
1655db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang         for (i = 0; i < n; i++)
1665db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang           if (hyphens[i]&1)
1675db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang             nHyphCount++;
1685db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang         hyphword = (char *) malloc(k+1+nHyphCount);
1695db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang         j = 0;
1705db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang         for (i = 0; i < n; i++) {
1715db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang	   hyphword[j++] = buf[i];
1725db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang           if (hyphens[i]&1) {
1735db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang	      hyphword[j++] = '-';
1745db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang	   }
1755db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang         }
1765db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang         hyphword[j] = '\0';
1775db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang         fprintf(stdout,"%s\n",hyphword);
1785db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang         fflush(stdout);
1795db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang         free(hyphword);
1805db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang      } else {
1815db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang         fprintf(stdout,"%s\n", hword);
1825db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang         if (optdd) single_hyphenations(lcword, hyphens, rep, pos, cut, dict->utf8);
1835db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang         if (rep) {
1845db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang            for (i = 0; i < n - 1; i++) {
1855db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang                if (rep[i]) free(rep[i]);
1865db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang            }
1875db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang            free(rep);
1885db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang            free(pos);
1895db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang            free(cut);
1905db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang         }
1915db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang      }
1925db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang      free(hyphens);
1935db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang      free(lcword);
1945db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    }
1955db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang
1965db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    fclose(wtclst);
1975db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    hnj_hyphen_free(dict);
1985db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang    return 0;
1995db78df27806d2eb07c14f86623a906df914b952Shimeng (Simon) Wang}
200