1ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/*
2ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ******************************************************************************
364339d36f8bd4db5025fe2988eda22b491a9219cFredrik Roubert * Copyright (C) 2016 and later: Unicode, Inc. and others.                    *
464339d36f8bd4db5025fe2988eda22b491a9219cFredrik Roubert * License & terms of use: http://www.unicode.org/copyright.html#License      *
564339d36f8bd4db5025fe2988eda22b491a9219cFredrik Roubert ******************************************************************************
664339d36f8bd4db5025fe2988eda22b491a9219cFredrik Roubert ******************************************************************************
7ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Copyright (C) 1998-2005, International Business Machines Corporation and   *
8ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * others. All Rights Reserved.                                               *
9ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ******************************************************************************
10ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */
11ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
12ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include <errno.h>
13ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include <stdio.h>
14ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include <string.h>
15ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
16ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/utypes.h"
17ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/unistr.h"
18ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
19ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "layout/LETypes.h"
20ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
21ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "GUISupport.h"
22ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "UnicodeReader.h"
23ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
24ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define BYTE(b) (((int) b) & 0xFF)
25ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
26ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/*
27ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Read the text from a file. The text must start with a Unicode Byte
28ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Order Mark (BOM) so that we know what order to read the bytes in.
29ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */
30ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruconst UChar *UnicodeReader::readFile(const char *fileName, GUISupport *guiSupport, int32_t &charCount)
31ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{
32ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    FILE *f;
33ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int32_t fileSize;
34ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
35ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UChar *charBuffer;
36ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    char *byteBuffer;
37ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    char startBytes[4] = {'\xA5', '\xA5', '\xA5', '\xA5'};
38ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    char errorMessage[128];
39ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    const char *cp = "";
40ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int32_t signatureLength = 0;
41ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
42ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    f = fopen(fileName, "rb");
43ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
44ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if( f == NULL ) {
45ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        sprintf(errorMessage,"Couldn't open %s: %s \n", fileName, strerror(errno));
46ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        guiSupport->postErrorMessage(errorMessage, "Text File Error");
47ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return 0;
48ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
49ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
50ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    fseek(f, 0, SEEK_END);
51ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    fileSize = ftell(f);
52ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
53ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    fseek(f, 0, SEEK_SET);
54ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    fread(startBytes, sizeof(char), 4, f);
55ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
56ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if (startBytes[0] == '\xFE' && startBytes[1] == '\xFF') {
57ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        cp = "UTF-16BE";
58ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        signatureLength = 2;
59ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    } else if (startBytes[0] == '\xFF' && startBytes[1] == '\xFE') {
60ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if (startBytes[2] == '\x00' && startBytes[3] == '\x00') {
61ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            cp = "UTF-32LE";
62ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            signatureLength = 4;
63ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        } else {
64ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            cp = "UTF-16LE";
65ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            signatureLength = 2;
66ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
67ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    } else if (startBytes[0] == '\xEF' && startBytes[1] == '\xBB' && startBytes[2] == '\xBF') {
68ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        cp = "UTF-8";
69ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        signatureLength = 3;
70ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    } else if (startBytes[0] == '\x0E' && startBytes[1] == '\xFE' && startBytes[2] == '\xFF') {
71ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        cp = "SCSU";
72ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        signatureLength = 3;
73ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    } else if (startBytes[0] == '\x00' && startBytes[1] == '\x00' &&
74ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        startBytes[2] == '\xFE' && startBytes[3] == '\xFF') {
75ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        cp = "UTF-32BE";
76ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        signatureLength = 4;
77ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    } else {
78ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        sprintf(errorMessage, "Couldn't detect the encoding of %s: (%2.2X, %2.2X, %2.2X, %2.2X)\n", fileName,
79ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    BYTE(startBytes[0]), BYTE(startBytes[1]), BYTE(startBytes[2]), BYTE(startBytes[3]));
80ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        guiSupport->postErrorMessage(errorMessage, "Text File Error");
81ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        fclose(f);
82ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return 0;
83ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
84ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
85ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    fileSize -= signatureLength;
86ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    fseek(f, signatureLength, SEEK_SET);
87ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    byteBuffer = new char[fileSize];
88ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
89ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(byteBuffer == 0) {
90ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        sprintf(errorMessage,"Couldn't get memory for reading %s: %s \n", fileName, strerror(errno));
91ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        guiSupport->postErrorMessage(errorMessage, "Text File Error");
92ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        fclose(f);
93ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return 0;
94ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
95ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
96ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    fread(byteBuffer, sizeof(char), fileSize, f);
97ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if( ferror(f) ) {
98ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        sprintf(errorMessage,"Couldn't read %s: %s \n", fileName, strerror(errno));
99ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        guiSupport->postErrorMessage(errorMessage, "Text File Error");
100ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        fclose(f);
101ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        delete[] byteBuffer;
102ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return 0;
103ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
104ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    fclose(f);
105ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
106ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UnicodeString myText(byteBuffer, fileSize, cp);
107ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
108ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    delete[] byteBuffer;
109ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
110ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    charCount = myText.length();
111ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    charBuffer = LE_NEW_ARRAY(UChar, charCount + 1);
112ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(charBuffer == 0) {
113ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        sprintf(errorMessage,"Couldn't get memory for reading %s: %s \n", fileName, strerror(errno));
114ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        guiSupport->postErrorMessage(errorMessage, "Text File Error");
115ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return 0;
116ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
117ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
118ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    myText.extract(0, myText.length(), charBuffer);
119ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    charBuffer[charCount] = 0;    // NULL terminate for easier reading in the debugger
120ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
121ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    return charBuffer;
122ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
123ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
124