195c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley/* 295c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley ****************************************************************************** 395c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley * Copyright (C) 1998-2005, International Business Machines Corporation and * 495c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley * others. All Rights Reserved. * 595c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley ****************************************************************************** 695c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley */ 795c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley 895c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley#include <errno.h> 995c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley#include <stdio.h> 1095c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley#include <string.h> 1195c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley 1295c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley#include "unicode/utypes.h" 1395c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley#include "unicode/unistr.h" 1495c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley 1595c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley#include "layout/LETypes.h" 1695c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley 1795c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley#include "GUISupport.h" 1895c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley#include "UnicodeReader.h" 1995c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley 2095c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley#define BYTE(b) (((int) b) & 0xFF) 2195c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley 2295c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley/* 2395c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley * Read the text from a file. The text must start with a Unicode Byte 2495c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley * Order Mark (BOM) so that we know what order to read the bytes in. 2595c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley */ 2695c29f3cd1f6c08c6c0927868683392eea727ccAdam Langleyconst UChar *UnicodeReader::readFile(const char *fileName, GUISupport *guiSupport, int32_t &charCount) 2795c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley{ 2895c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley FILE *f; 2995c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley int32_t fileSize; 3095c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley 3195c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley UChar *charBuffer; 3295c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley char *byteBuffer; 3395c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley char startBytes[4] = {'\xA5', '\xA5', '\xA5', '\xA5'}; 3495c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley char errorMessage[128]; 3595c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley const char *cp = ""; 3695c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley int32_t signatureLength = 0; 3795c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley 3895c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley f = fopen(fileName, "rb"); 3995c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley 4095c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley if( f == NULL ) { 4195c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley sprintf(errorMessage,"Couldn't open %s: %s \n", fileName, strerror(errno)); 4295c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley guiSupport->postErrorMessage(errorMessage, "Text File Error"); 4395c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley return 0; 4495c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley } 4595c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley 4695c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley fseek(f, 0, SEEK_END); 4795c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley fileSize = ftell(f); 4895c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley 4995c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley fseek(f, 0, SEEK_SET); 5095c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley fread(startBytes, sizeof(char), 4, f); 5195c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley 5295c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley if (startBytes[0] == '\xFE' && startBytes[1] == '\xFF') { 5395c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley cp = "UTF-16BE"; 5495c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley signatureLength = 2; 5595c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley } else if (startBytes[0] == '\xFF' && startBytes[1] == '\xFE') { 5695c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley if (startBytes[2] == '\x00' && startBytes[3] == '\x00') { 5795c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley cp = "UTF-32LE"; 5895c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley signatureLength = 4; 5995c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley } else { 6095c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley cp = "UTF-16LE"; 6195c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley signatureLength = 2; 6295c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley } 6395c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley } else if (startBytes[0] == '\xEF' && startBytes[1] == '\xBB' && startBytes[2] == '\xBF') { 6495c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley cp = "UTF-8"; 6595c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley signatureLength = 3; 6695c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley } else if (startBytes[0] == '\x0E' && startBytes[1] == '\xFE' && startBytes[2] == '\xFF') { 6795c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley cp = "SCSU"; 6895c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley signatureLength = 3; 6995c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley } else if (startBytes[0] == '\x00' && startBytes[1] == '\x00' && 7095c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley startBytes[2] == '\xFE' && startBytes[3] == '\xFF') { 7195c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley cp = "UTF-32BE"; 7295c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley signatureLength = 4; 7395c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley } else { 7495c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley sprintf(errorMessage, "Couldn't detect the encoding of %s: (%2.2X, %2.2X, %2.2X, %2.2X)\n", fileName, 7595c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley BYTE(startBytes[0]), BYTE(startBytes[1]), BYTE(startBytes[2]), BYTE(startBytes[3])); 7695c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley guiSupport->postErrorMessage(errorMessage, "Text File Error"); 7795c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley fclose(f); 7895c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley return 0; 7995c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley } 8095c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley 8195c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley fileSize -= signatureLength; 8295c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley fseek(f, signatureLength, SEEK_SET); 8395c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley byteBuffer = new char[fileSize]; 8495c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley 8595c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley if(byteBuffer == 0) { 8695c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley sprintf(errorMessage,"Couldn't get memory for reading %s: %s \n", fileName, strerror(errno)); 8795c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley guiSupport->postErrorMessage(errorMessage, "Text File Error"); 8895c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley fclose(f); 8995c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley return 0; 9095c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley } 9195c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley 9295c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley fread(byteBuffer, sizeof(char), fileSize, f); 9395c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley if( ferror(f) ) { 9495c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley sprintf(errorMessage,"Couldn't read %s: %s \n", fileName, strerror(errno)); 9595c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley guiSupport->postErrorMessage(errorMessage, "Text File Error"); 9695c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley fclose(f); 9795c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley delete[] byteBuffer; 9895c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley return 0; 9995c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley } 10095c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley fclose(f); 10195c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley 10295c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley UnicodeString myText(byteBuffer, fileSize, cp); 10395c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley 10495c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley delete[] byteBuffer; 10595c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley 10695c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley charCount = myText.length(); 10795c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley charBuffer = LE_NEW_ARRAY(UChar, charCount + 1); 10895c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley if(charBuffer == 0) { 10995c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley sprintf(errorMessage,"Couldn't get memory for reading %s: %s \n", fileName, strerror(errno)); 11095c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley guiSupport->postErrorMessage(errorMessage, "Text File Error"); 11195c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley return 0; 11295c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley } 11395c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley 11495c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley myText.extract(0, myText.length(), charBuffer); 11595c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley charBuffer[charCount] = 0; // NULL terminate for easier reading in the debugger 11695c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley 11795c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley return charBuffer; 11895c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley} 11995c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley 12095c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley