1ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/*
2ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ******************************************************************************
3ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Copyright (C) 1998-2005, International Business Machines Corporation and   *
4ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * others. All Rights Reserved.                                               *
5ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ******************************************************************************
6ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */
7ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
8ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include <errno.h>
9ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include <stdio.h>
10ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include <string.h>
11ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
12ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/utypes.h"
13ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/unistr.h"
14ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
15ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "layout/LETypes.h"
16ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
17ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "GUISupport.h"
18ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "UnicodeReader.h"
19ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
20ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define BYTE(b) (((int) b) & 0xFF)
21ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
22ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/*
23ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Read the text from a file. The text must start with a Unicode Byte
24ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Order Mark (BOM) so that we know what order to read the bytes in.
25ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */
26ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruconst UChar *UnicodeReader::readFile(const char *fileName, GUISupport *guiSupport, int32_t &charCount)
27ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{
28ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    FILE *f;
29ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int32_t fileSize;
30ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
31ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UChar *charBuffer;
32ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    char *byteBuffer;
33ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    char startBytes[4] = {'\xA5', '\xA5', '\xA5', '\xA5'};
34ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    char errorMessage[128];
35ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    const char *cp = "";
36ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int32_t signatureLength = 0;
37ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
38ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    f = fopen(fileName, "rb");
39ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
40ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if( f == NULL ) {
41ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        sprintf(errorMessage,"Couldn't open %s: %s \n", fileName, strerror(errno));
42ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        guiSupport->postErrorMessage(errorMessage, "Text File Error");
43ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return 0;
44ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
45ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
46ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    fseek(f, 0, SEEK_END);
47ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    fileSize = ftell(f);
48ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
49ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    fseek(f, 0, SEEK_SET);
50ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    fread(startBytes, sizeof(char), 4, f);
51ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
52ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if (startBytes[0] == '\xFE' && startBytes[1] == '\xFF') {
53ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        cp = "UTF-16BE";
54ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        signatureLength = 2;
55ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    } else if (startBytes[0] == '\xFF' && startBytes[1] == '\xFE') {
56ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if (startBytes[2] == '\x00' && startBytes[3] == '\x00') {
57ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            cp = "UTF-32LE";
58ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            signatureLength = 4;
59ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        } else {
60ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            cp = "UTF-16LE";
61ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            signatureLength = 2;
62ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
63ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    } else if (startBytes[0] == '\xEF' && startBytes[1] == '\xBB' && startBytes[2] == '\xBF') {
64ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        cp = "UTF-8";
65ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        signatureLength = 3;
66ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    } else if (startBytes[0] == '\x0E' && startBytes[1] == '\xFE' && startBytes[2] == '\xFF') {
67ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        cp = "SCSU";
68ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        signatureLength = 3;
69ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    } else if (startBytes[0] == '\x00' && startBytes[1] == '\x00' &&
70ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        startBytes[2] == '\xFE' && startBytes[3] == '\xFF') {
71ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        cp = "UTF-32BE";
72ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        signatureLength = 4;
73ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    } else {
74ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        sprintf(errorMessage, "Couldn't detect the encoding of %s: (%2.2X, %2.2X, %2.2X, %2.2X)\n", fileName,
75ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    BYTE(startBytes[0]), BYTE(startBytes[1]), BYTE(startBytes[2]), BYTE(startBytes[3]));
76ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        guiSupport->postErrorMessage(errorMessage, "Text File Error");
77ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        fclose(f);
78ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return 0;
79ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
80ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
81ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    fileSize -= signatureLength;
82ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    fseek(f, signatureLength, SEEK_SET);
83ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    byteBuffer = new char[fileSize];
84ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
85ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(byteBuffer == 0) {
86ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        sprintf(errorMessage,"Couldn't get memory for reading %s: %s \n", fileName, strerror(errno));
87ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        guiSupport->postErrorMessage(errorMessage, "Text File Error");
88ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        fclose(f);
89ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return 0;
90ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
91ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
92ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    fread(byteBuffer, sizeof(char), fileSize, f);
93ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if( ferror(f) ) {
94ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        sprintf(errorMessage,"Couldn't read %s: %s \n", fileName, strerror(errno));
95ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        guiSupport->postErrorMessage(errorMessage, "Text File Error");
96ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        fclose(f);
97ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        delete[] byteBuffer;
98ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return 0;
99ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
100ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    fclose(f);
101ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
102ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UnicodeString myText(byteBuffer, fileSize, cp);
103ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
104ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    delete[] byteBuffer;
105ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
106ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    charCount = myText.length();
107ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    charBuffer = LE_NEW_ARRAY(UChar, charCount + 1);
108ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(charBuffer == 0) {
109ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        sprintf(errorMessage,"Couldn't get memory for reading %s: %s \n", fileName, strerror(errno));
110ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        guiSupport->postErrorMessage(errorMessage, "Text File Error");
111ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return 0;
112ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
113ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
114ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    myText.extract(0, myText.length(), charBuffer);
115ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    charBuffer[charCount] = 0;    // NULL terminate for easier reading in the debugger
116ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
117ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    return charBuffer;
118ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
119ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
120