1ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* 2ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ******************************************************************************** 3ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Copyright (C) 2005-2006, International Business Machines 4ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Corporation and others. All Rights Reserved. 5ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ******************************************************************************** 6ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 7ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 8ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/utypes.h" 9ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/ucsdet.h" 10ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 11ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include <string.h> 12ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include <stdio.h> 13ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 14ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define BUFFER_SIZE 8192 15ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 16ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruint main(int argc, char *argv[]) 17ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{ 18ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru static char buffer[BUFFER_SIZE]; 19ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t arg; 20ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 21ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if( argc <= 1 ) { 22ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru printf("Usage: %s [filename]...\n", argv[0]); 23ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return -1; 24ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 25ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 26ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru for(arg = 1; arg < argc; arg += 1) { 27ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru FILE *file; 28ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru char *filename = argv[arg]; 29ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t inputLength, match, matchCount = 0; 30ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UCharsetDetector* csd; 31ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const UCharsetMatch **csm; 32ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 33ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 34ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (arg > 1) { 35ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru printf("\n"); 36ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 37ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 38ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru file = fopen(filename, "rb"); 39ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 40ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (file == NULL) { 41ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru printf("Cannot open file \"%s\"\n\n", filename); 42ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru continue; 43ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 44ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 45ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru printf("%s:\n", filename); 46ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 47ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru inputLength = (int32_t) fread(buffer, 1, BUFFER_SIZE, file); 48ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 49ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fclose(file); 50ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 51ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru csd = ucsdet_open(&status); 52ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ucsdet_setText(csd, buffer, inputLength, &status); 53ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 54ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru csm = ucsdet_detectAll(csd, &matchCount, &status); 55ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 56ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru for(match = 0; match < matchCount; match += 1) { 57ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const char *name = ucsdet_getName(csm[match], &status); 58ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const char *lang = ucsdet_getLanguage(csm[match], &status); 59ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t confidence = ucsdet_getConfidence(csm[match], &status); 60ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 61ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (lang == NULL || strlen(lang) == 0) { 62ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru lang = "**"; 63ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 64ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 65ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru printf("%s (%s) %d\n", name, lang, confidence); 66ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 67ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 68ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ucsdet_close(csd); 69ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 70ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 71ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return 0; 72ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 73ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 74