195c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley/*
295c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley ******************************************************************************
395c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley * Copyright (C) 1998-2005, International Business Machines Corporation and   *
495c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley * others. All Rights Reserved.                                               *
595c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley ******************************************************************************
695c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley */
795c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
895c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley#include <errno.h>
995c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley#include <stdio.h>
1095c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley#include <string.h>
1195c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
1295c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley#include "unicode/utypes.h"
1395c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley#include "unicode/unistr.h"
1495c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
1595c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley#include "layout/LETypes.h"
1695c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
1795c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley#include "GUISupport.h"
1895c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley#include "UnicodeReader.h"
1995c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
2095c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley#define BYTE(b) (((int) b) & 0xFF)
2195c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
2295c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley/*
2395c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley * Read the text from a file. The text must start with a Unicode Byte
2495c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley * Order Mark (BOM) so that we know what order to read the bytes in.
2595c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley */
2695c29f3cd1f6c08c6c0927868683392eea727ccAdam Langleyconst UChar *UnicodeReader::readFile(const char *fileName, GUISupport *guiSupport, int32_t &charCount)
2795c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley{
2895c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    FILE *f;
2995c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    int32_t fileSize;
3095c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
3195c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    UChar *charBuffer;
3295c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    char *byteBuffer;
3395c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    char startBytes[4] = {'\xA5', '\xA5', '\xA5', '\xA5'};
3495c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    char errorMessage[128];
3595c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    const char *cp = "";
3695c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    int32_t signatureLength = 0;
3795c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
3895c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    f = fopen(fileName, "rb");
3995c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
4095c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    if( f == NULL ) {
4195c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley        sprintf(errorMessage,"Couldn't open %s: %s \n", fileName, strerror(errno));
4295c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley        guiSupport->postErrorMessage(errorMessage, "Text File Error");
4395c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley        return 0;
4495c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    }
4595c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
4695c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    fseek(f, 0, SEEK_END);
4795c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    fileSize = ftell(f);
4895c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
4995c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    fseek(f, 0, SEEK_SET);
5095c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    fread(startBytes, sizeof(char), 4, f);
5195c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
5295c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    if (startBytes[0] == '\xFE' && startBytes[1] == '\xFF') {
5395c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley        cp = "UTF-16BE";
5495c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley        signatureLength = 2;
5595c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    } else if (startBytes[0] == '\xFF' && startBytes[1] == '\xFE') {
5695c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley        if (startBytes[2] == '\x00' && startBytes[3] == '\x00') {
5795c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley            cp = "UTF-32LE";
5895c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley            signatureLength = 4;
5995c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley        } else {
6095c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley            cp = "UTF-16LE";
6195c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley            signatureLength = 2;
6295c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley        }
6395c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    } else if (startBytes[0] == '\xEF' && startBytes[1] == '\xBB' && startBytes[2] == '\xBF') {
6495c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley        cp = "UTF-8";
6595c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley        signatureLength = 3;
6695c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    } else if (startBytes[0] == '\x0E' && startBytes[1] == '\xFE' && startBytes[2] == '\xFF') {
6795c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley        cp = "SCSU";
6895c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley        signatureLength = 3;
6995c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    } else if (startBytes[0] == '\x00' && startBytes[1] == '\x00' &&
7095c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley        startBytes[2] == '\xFE' && startBytes[3] == '\xFF') {
7195c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley        cp = "UTF-32BE";
7295c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley        signatureLength = 4;
7395c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    } else {
7495c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley        sprintf(errorMessage, "Couldn't detect the encoding of %s: (%2.2X, %2.2X, %2.2X, %2.2X)\n", fileName,
7595c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley                    BYTE(startBytes[0]), BYTE(startBytes[1]), BYTE(startBytes[2]), BYTE(startBytes[3]));
7695c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley        guiSupport->postErrorMessage(errorMessage, "Text File Error");
7795c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley        fclose(f);
7895c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley        return 0;
7995c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    }
8095c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
8195c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    fileSize -= signatureLength;
8295c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    fseek(f, signatureLength, SEEK_SET);
8395c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    byteBuffer = new char[fileSize];
8495c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
8595c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    if(byteBuffer == 0) {
8695c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley        sprintf(errorMessage,"Couldn't get memory for reading %s: %s \n", fileName, strerror(errno));
8795c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley        guiSupport->postErrorMessage(errorMessage, "Text File Error");
8895c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley        fclose(f);
8995c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley        return 0;
9095c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    }
9195c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
9295c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    fread(byteBuffer, sizeof(char), fileSize, f);
9395c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    if( ferror(f) ) {
9495c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley        sprintf(errorMessage,"Couldn't read %s: %s \n", fileName, strerror(errno));
9595c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley        guiSupport->postErrorMessage(errorMessage, "Text File Error");
9695c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley        fclose(f);
9795c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley        delete[] byteBuffer;
9895c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley        return 0;
9995c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    }
10095c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    fclose(f);
10195c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
10295c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    UnicodeString myText(byteBuffer, fileSize, cp);
10395c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
10495c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    delete[] byteBuffer;
10595c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
10695c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    charCount = myText.length();
10795c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    charBuffer = LE_NEW_ARRAY(UChar, charCount + 1);
10895c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    if(charBuffer == 0) {
10995c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley        sprintf(errorMessage,"Couldn't get memory for reading %s: %s \n", fileName, strerror(errno));
11095c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley        guiSupport->postErrorMessage(errorMessage, "Text File Error");
11195c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley        return 0;
11295c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    }
11395c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
11495c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    myText.extract(0, myText.length(), charBuffer);
11595c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    charBuffer[charCount] = 0;    // NULL terminate for easier reading in the debugger
11695c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
11795c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley    return charBuffer;
11895c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley}
11995c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley
12095c29f3cd1f6c08c6c0927868683392eea727ccAdam Langley