1/*
2 **********************************************************************
3 *   Copyright (C) 2005-2008, International Business Machines
4 *   Corporation and others.  All Rights Reserved.
5 **********************************************************************
6 */
7
8#ifndef __INPUTEXT_H
9#define __INPUTEXT_H
10
11/**
12 * \file
13 * \internal
14 *
15 * This is an internal header for the Character Set Detection code. The
16 * name is probably too generic...
17 */
18
19
20#include "unicode/uobject.h"
21
22#if !UCONFIG_NO_CONVERSION
23
24U_NAMESPACE_BEGIN
25
26class InputText : public UMemory
27{
28    // Prevent copying
29    InputText(const InputText &);
30public:
31    InputText(UErrorCode &status);
32    ~InputText();
33
34    void setText(const char *in, int32_t len);
35    void setDeclaredEncoding(const char *encoding, int32_t len);
36    UBool isSet() const;
37    void MungeInput(UBool fStripTags);
38
39    // The text to be checked.  Markup will have been
40    //   removed if appropriate.
41    uint8_t    *fInputBytes;
42    int32_t     fInputLen;          // Length of the byte data in fInputBytes.
43    // byte frequency statistics for the input text.
44    //   Value is percent, not absolute.
45    //   Value is rounded up, so zero really means zero occurences.
46    int16_t  *fByteStats;
47    UBool     fC1Bytes;          // True if any bytes in the range 0x80 - 0x9F are in the input;false by default
48    char     *fDeclaredEncoding;
49
50    const uint8_t           *fRawInput;     // Original, untouched input bytes.
51    //  If user gave us a byte array, this is it.
52    //  If user gave us a stream, it's read to a
53    //   buffer here.
54    int32_t                  fRawLength;    // Length of data in fRawInput array.
55
56};
57
58U_NAMESPACE_END
59
60#endif
61#endif /* __INPUTEXT_H */
62