1f0be43de02a1e07308d3d95408349c3c7f973430Raph Levien#include <stdio.h> 2f0be43de02a1e07308d3d95408349c3c7f973430Raph Levien#include <sys/stat.h> 3f0be43de02a1e07308d3d95408349c3c7f973430Raph Levien#include <string.h> 4f0be43de02a1e07308d3d95408349c3c7f973430Raph Levien 5f0be43de02a1e07308d3d95408349c3c7f973430Raph Levien#include "utils/Log.h" 6f0be43de02a1e07308d3d95408349c3c7f973430Raph Levien 7f0be43de02a1e07308d3d95408349c3c7f973430Raph Levien#include <vector> 8f0be43de02a1e07308d3d95408349c3c7f973430Raph Levien#include <minikin/Hyphenator.h> 9f0be43de02a1e07308d3d95408349c3c7f973430Raph Levien 10f0be43de02a1e07308d3d95408349c3c7f973430Raph Levienusing android::Hyphenator; 11f0be43de02a1e07308d3d95408349c3c7f973430Raph Levien 12f0be43de02a1e07308d3d95408349c3c7f973430Raph LevienHyphenator* loadHybFile(const char* fn) { 13f0be43de02a1e07308d3d95408349c3c7f973430Raph Levien struct stat statbuf; 14f0be43de02a1e07308d3d95408349c3c7f973430Raph Levien int status = stat(fn, &statbuf); 15f0be43de02a1e07308d3d95408349c3c7f973430Raph Levien if (status < 0) { 16f0be43de02a1e07308d3d95408349c3c7f973430Raph Levien fprintf(stderr, "error opening %s\n", fn); 17f0be43de02a1e07308d3d95408349c3c7f973430Raph Levien return nullptr; 18f0be43de02a1e07308d3d95408349c3c7f973430Raph Levien } 19f0be43de02a1e07308d3d95408349c3c7f973430Raph Levien size_t size = statbuf.st_size; 20f0be43de02a1e07308d3d95408349c3c7f973430Raph Levien FILE* f = fopen(fn, "rb"); 21f0be43de02a1e07308d3d95408349c3c7f973430Raph Levien if (f == NULL) { 22f0be43de02a1e07308d3d95408349c3c7f973430Raph Levien fprintf(stderr, "error opening %s\n", fn); 23f0be43de02a1e07308d3d95408349c3c7f973430Raph Levien return nullptr; 24f0be43de02a1e07308d3d95408349c3c7f973430Raph Levien } 25f0be43de02a1e07308d3d95408349c3c7f973430Raph Levien uint8_t* buf = new uint8_t[size]; 26f0be43de02a1e07308d3d95408349c3c7f973430Raph Levien size_t read_size = fread(buf, 1, size, f); 27f0be43de02a1e07308d3d95408349c3c7f973430Raph Levien if (read_size < size) { 28f0be43de02a1e07308d3d95408349c3c7f973430Raph Levien fprintf(stderr, "error reading %s\n", fn); 29f0be43de02a1e07308d3d95408349c3c7f973430Raph Levien delete[] buf; 30f0be43de02a1e07308d3d95408349c3c7f973430Raph Levien return nullptr; 31f0be43de02a1e07308d3d95408349c3c7f973430Raph Levien } 32f0be43de02a1e07308d3d95408349c3c7f973430Raph Levien return Hyphenator::loadBinary(buf); 33f0be43de02a1e07308d3d95408349c3c7f973430Raph Levien} 34f0be43de02a1e07308d3d95408349c3c7f973430Raph Levien 35f0be43de02a1e07308d3d95408349c3c7f973430Raph Levienint main(int argc, char** argv) { 36f0be43de02a1e07308d3d95408349c3c7f973430Raph Levien Hyphenator* hyph = loadHybFile("/tmp/en.hyb"); // should also be configurable 37f0be43de02a1e07308d3d95408349c3c7f973430Raph Levien std::vector<uint8_t> result; 38f0be43de02a1e07308d3d95408349c3c7f973430Raph Levien std::vector<uint16_t> word; 39f0be43de02a1e07308d3d95408349c3c7f973430Raph Levien if (argc < 2) { 40f0be43de02a1e07308d3d95408349c3c7f973430Raph Levien fprintf(stderr, "usage: hyphtool word\n"); 41f0be43de02a1e07308d3d95408349c3c7f973430Raph Levien return 1; 42f0be43de02a1e07308d3d95408349c3c7f973430Raph Levien } 43f0be43de02a1e07308d3d95408349c3c7f973430Raph Levien char* asciiword = argv[1]; 44f0be43de02a1e07308d3d95408349c3c7f973430Raph Levien size_t len = strlen(asciiword); 45f0be43de02a1e07308d3d95408349c3c7f973430Raph Levien for (size_t i = 0; i < len; i++) { 46f0be43de02a1e07308d3d95408349c3c7f973430Raph Levien uint32_t c = asciiword[i]; 47f0be43de02a1e07308d3d95408349c3c7f973430Raph Levien if (c == '-') { 48f0be43de02a1e07308d3d95408349c3c7f973430Raph Levien c = 0x00AD; 49f0be43de02a1e07308d3d95408349c3c7f973430Raph Levien } 50f0be43de02a1e07308d3d95408349c3c7f973430Raph Levien // ASCII (or possibly ISO Latin 1), but kinda painful to do utf conversion :( 51f0be43de02a1e07308d3d95408349c3c7f973430Raph Levien word.push_back(c); 52f0be43de02a1e07308d3d95408349c3c7f973430Raph Levien } 53f0be43de02a1e07308d3d95408349c3c7f973430Raph Levien hyph->hyphenate(&result, word.data(), word.size()); 54f0be43de02a1e07308d3d95408349c3c7f973430Raph Levien for (size_t i = 0; i < len; i++) { 55f0be43de02a1e07308d3d95408349c3c7f973430Raph Levien if (result[i] != 0) { 56f0be43de02a1e07308d3d95408349c3c7f973430Raph Levien printf("-"); 57f0be43de02a1e07308d3d95408349c3c7f973430Raph Levien } 58f0be43de02a1e07308d3d95408349c3c7f973430Raph Levien printf("%c", word[i]); 59f0be43de02a1e07308d3d95408349c3c7f973430Raph Levien } 60f0be43de02a1e07308d3d95408349c3c7f973430Raph Levien printf("\n"); 61f0be43de02a1e07308d3d95408349c3c7f973430Raph Levien return 0; 62f0be43de02a1e07308d3d95408349c3c7f973430Raph Levien} 63