1f0be43de02a1e07308d3d95408349c3c7f973430Raph Levien#include <stdio.h>
2f0be43de02a1e07308d3d95408349c3c7f973430Raph Levien#include <sys/stat.h>
3f0be43de02a1e07308d3d95408349c3c7f973430Raph Levien#include <string.h>
4f0be43de02a1e07308d3d95408349c3c7f973430Raph Levien
5f0be43de02a1e07308d3d95408349c3c7f973430Raph Levien#include "utils/Log.h"
6f0be43de02a1e07308d3d95408349c3c7f973430Raph Levien
7f0be43de02a1e07308d3d95408349c3c7f973430Raph Levien#include <vector>
8f0be43de02a1e07308d3d95408349c3c7f973430Raph Levien#include <minikin/Hyphenator.h>
9f0be43de02a1e07308d3d95408349c3c7f973430Raph Levien
10f0be43de02a1e07308d3d95408349c3c7f973430Raph Levienusing android::Hyphenator;
11f0be43de02a1e07308d3d95408349c3c7f973430Raph Levien
12f0be43de02a1e07308d3d95408349c3c7f973430Raph LevienHyphenator* loadHybFile(const char* fn) {
13f0be43de02a1e07308d3d95408349c3c7f973430Raph Levien    struct stat statbuf;
14f0be43de02a1e07308d3d95408349c3c7f973430Raph Levien    int status = stat(fn, &statbuf);
15f0be43de02a1e07308d3d95408349c3c7f973430Raph Levien    if (status < 0) {
16f0be43de02a1e07308d3d95408349c3c7f973430Raph Levien        fprintf(stderr, "error opening %s\n", fn);
17f0be43de02a1e07308d3d95408349c3c7f973430Raph Levien        return nullptr;
18f0be43de02a1e07308d3d95408349c3c7f973430Raph Levien    }
19f0be43de02a1e07308d3d95408349c3c7f973430Raph Levien    size_t size = statbuf.st_size;
20f0be43de02a1e07308d3d95408349c3c7f973430Raph Levien    FILE* f = fopen(fn, "rb");
21f0be43de02a1e07308d3d95408349c3c7f973430Raph Levien    if (f == NULL) {
22f0be43de02a1e07308d3d95408349c3c7f973430Raph Levien        fprintf(stderr, "error opening %s\n", fn);
23f0be43de02a1e07308d3d95408349c3c7f973430Raph Levien        return nullptr;
24f0be43de02a1e07308d3d95408349c3c7f973430Raph Levien    }
25f0be43de02a1e07308d3d95408349c3c7f973430Raph Levien    uint8_t* buf = new uint8_t[size];
26f0be43de02a1e07308d3d95408349c3c7f973430Raph Levien    size_t read_size = fread(buf, 1, size, f);
27f0be43de02a1e07308d3d95408349c3c7f973430Raph Levien    if (read_size < size) {
28f0be43de02a1e07308d3d95408349c3c7f973430Raph Levien        fprintf(stderr, "error reading %s\n", fn);
29f0be43de02a1e07308d3d95408349c3c7f973430Raph Levien        delete[] buf;
30f0be43de02a1e07308d3d95408349c3c7f973430Raph Levien        return nullptr;
31f0be43de02a1e07308d3d95408349c3c7f973430Raph Levien    }
32f0be43de02a1e07308d3d95408349c3c7f973430Raph Levien    return Hyphenator::loadBinary(buf);
33f0be43de02a1e07308d3d95408349c3c7f973430Raph Levien}
34f0be43de02a1e07308d3d95408349c3c7f973430Raph Levien
35f0be43de02a1e07308d3d95408349c3c7f973430Raph Levienint main(int argc, char** argv) {
36f0be43de02a1e07308d3d95408349c3c7f973430Raph Levien    Hyphenator* hyph = loadHybFile("/tmp/en.hyb");  // should also be configurable
37f0be43de02a1e07308d3d95408349c3c7f973430Raph Levien    std::vector<uint8_t> result;
38f0be43de02a1e07308d3d95408349c3c7f973430Raph Levien    std::vector<uint16_t> word;
39f0be43de02a1e07308d3d95408349c3c7f973430Raph Levien    if (argc < 2) {
40f0be43de02a1e07308d3d95408349c3c7f973430Raph Levien        fprintf(stderr, "usage: hyphtool word\n");
41f0be43de02a1e07308d3d95408349c3c7f973430Raph Levien        return 1;
42f0be43de02a1e07308d3d95408349c3c7f973430Raph Levien    }
43f0be43de02a1e07308d3d95408349c3c7f973430Raph Levien    char* asciiword = argv[1];
44f0be43de02a1e07308d3d95408349c3c7f973430Raph Levien    size_t len = strlen(asciiword);
45f0be43de02a1e07308d3d95408349c3c7f973430Raph Levien    for (size_t i = 0; i < len; i++) {
46f0be43de02a1e07308d3d95408349c3c7f973430Raph Levien        uint32_t c = asciiword[i];
47f0be43de02a1e07308d3d95408349c3c7f973430Raph Levien        if (c == '-') {
48f0be43de02a1e07308d3d95408349c3c7f973430Raph Levien            c = 0x00AD;
49f0be43de02a1e07308d3d95408349c3c7f973430Raph Levien        }
50f0be43de02a1e07308d3d95408349c3c7f973430Raph Levien        // ASCII (or possibly ISO Latin 1), but kinda painful to do utf conversion :(
51f0be43de02a1e07308d3d95408349c3c7f973430Raph Levien        word.push_back(c);
52f0be43de02a1e07308d3d95408349c3c7f973430Raph Levien    }
53f0be43de02a1e07308d3d95408349c3c7f973430Raph Levien    hyph->hyphenate(&result, word.data(), word.size());
54f0be43de02a1e07308d3d95408349c3c7f973430Raph Levien    for (size_t i = 0; i < len; i++) {
55f0be43de02a1e07308d3d95408349c3c7f973430Raph Levien        if (result[i] != 0) {
56f0be43de02a1e07308d3d95408349c3c7f973430Raph Levien            printf("-");
57f0be43de02a1e07308d3d95408349c3c7f973430Raph Levien        }
58f0be43de02a1e07308d3d95408349c3c7f973430Raph Levien        printf("%c", word[i]);
59f0be43de02a1e07308d3d95408349c3c7f973430Raph Levien    }
60f0be43de02a1e07308d3d95408349c3c7f973430Raph Levien    printf("\n");
61f0be43de02a1e07308d3d95408349c3c7f973430Raph Levien    return 0;
62f0be43de02a1e07308d3d95408349c3c7f973430Raph Levien}
63