1ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* 2ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ****************************************************************************** 364339d36f8bd4db5025fe2988eda22b491a9219cFredrik Roubert * Copyright (C) 2016 and later: Unicode, Inc. and others. * 464339d36f8bd4db5025fe2988eda22b491a9219cFredrik Roubert * License & terms of use: http://www.unicode.org/copyright.html#License * 564339d36f8bd4db5025fe2988eda22b491a9219cFredrik Roubert ****************************************************************************** 664339d36f8bd4db5025fe2988eda22b491a9219cFredrik Roubert ****************************************************************************** 7ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Copyright (C) 1998-2005, International Business Machines Corporation and * 8ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * others. All Rights Reserved. * 9ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ****************************************************************************** 10ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 11ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 12ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include <errno.h> 13ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include <stdio.h> 14ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include <string.h> 15ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 16ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/utypes.h" 17ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/unistr.h" 18ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 19ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "layout/LETypes.h" 20ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 21ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "GUISupport.h" 22ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "UnicodeReader.h" 23ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 24ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define BYTE(b) (((int) b) & 0xFF) 25ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 26ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* 27ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Read the text from a file. The text must start with a Unicode Byte 28ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Order Mark (BOM) so that we know what order to read the bytes in. 29ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 30ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruconst UChar *UnicodeReader::readFile(const char *fileName, GUISupport *guiSupport, int32_t &charCount) 31ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{ 32ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru FILE *f; 33ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t fileSize; 34ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 35ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UChar *charBuffer; 36ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru char *byteBuffer; 37ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru char startBytes[4] = {'\xA5', '\xA5', '\xA5', '\xA5'}; 38ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru char errorMessage[128]; 39ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const char *cp = ""; 40ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t signatureLength = 0; 41ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 42ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru f = fopen(fileName, "rb"); 43ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 44ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if( f == NULL ) { 45ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sprintf(errorMessage,"Couldn't open %s: %s \n", fileName, strerror(errno)); 46ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru guiSupport->postErrorMessage(errorMessage, "Text File Error"); 47ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return 0; 48ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 49ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 50ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fseek(f, 0, SEEK_END); 51ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fileSize = ftell(f); 52ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 53ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fseek(f, 0, SEEK_SET); 54ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fread(startBytes, sizeof(char), 4, f); 55ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 56ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (startBytes[0] == '\xFE' && startBytes[1] == '\xFF') { 57ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cp = "UTF-16BE"; 58ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru signatureLength = 2; 59ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if (startBytes[0] == '\xFF' && startBytes[1] == '\xFE') { 60ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (startBytes[2] == '\x00' && startBytes[3] == '\x00') { 61ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cp = "UTF-32LE"; 62ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru signatureLength = 4; 63ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 64ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cp = "UTF-16LE"; 65ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru signatureLength = 2; 66ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 67ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if (startBytes[0] == '\xEF' && startBytes[1] == '\xBB' && startBytes[2] == '\xBF') { 68ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cp = "UTF-8"; 69ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru signatureLength = 3; 70ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if (startBytes[0] == '\x0E' && startBytes[1] == '\xFE' && startBytes[2] == '\xFF') { 71ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cp = "SCSU"; 72ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru signatureLength = 3; 73ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if (startBytes[0] == '\x00' && startBytes[1] == '\x00' && 74ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru startBytes[2] == '\xFE' && startBytes[3] == '\xFF') { 75ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cp = "UTF-32BE"; 76ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru signatureLength = 4; 77ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 78ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sprintf(errorMessage, "Couldn't detect the encoding of %s: (%2.2X, %2.2X, %2.2X, %2.2X)\n", fileName, 79ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru BYTE(startBytes[0]), BYTE(startBytes[1]), BYTE(startBytes[2]), BYTE(startBytes[3])); 80ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru guiSupport->postErrorMessage(errorMessage, "Text File Error"); 81ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fclose(f); 82ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return 0; 83ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 84ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 85ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fileSize -= signatureLength; 86ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fseek(f, signatureLength, SEEK_SET); 87ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru byteBuffer = new char[fileSize]; 88ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 89ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(byteBuffer == 0) { 90ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sprintf(errorMessage,"Couldn't get memory for reading %s: %s \n", fileName, strerror(errno)); 91ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru guiSupport->postErrorMessage(errorMessage, "Text File Error"); 92ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fclose(f); 93ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return 0; 94ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 95ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 96ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fread(byteBuffer, sizeof(char), fileSize, f); 97ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if( ferror(f) ) { 98ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sprintf(errorMessage,"Couldn't read %s: %s \n", fileName, strerror(errno)); 99ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru guiSupport->postErrorMessage(errorMessage, "Text File Error"); 100ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fclose(f); 101ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru delete[] byteBuffer; 102ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return 0; 103ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 104ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fclose(f); 105ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 106ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UnicodeString myText(byteBuffer, fileSize, cp); 107ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 108ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru delete[] byteBuffer; 109ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 110ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru charCount = myText.length(); 111ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru charBuffer = LE_NEW_ARRAY(UChar, charCount + 1); 112ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(charBuffer == 0) { 113ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sprintf(errorMessage,"Couldn't get memory for reading %s: %s \n", fileName, strerror(errno)); 114ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru guiSupport->postErrorMessage(errorMessage, "Text File Error"); 115ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return 0; 116ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 117ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 118ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru myText.extract(0, myText.length(), charBuffer); 119ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru charBuffer[charCount] = 0; // NULL terminate for easier reading in the debugger 120ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 121ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return charBuffer; 122ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 123ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 124