1e2b43843fd12783188edd2c54188ea8d26864788Vijay Venkatraman/*
2e2b43843fd12783188edd2c54188ea8d26864788Vijay Venkatraman * Copyright (C) 2013 The Android Open Source Project
3e2b43843fd12783188edd2c54188ea8d26864788Vijay Venkatraman *
4e2b43843fd12783188edd2c54188ea8d26864788Vijay Venkatraman * Licensed under the Apache License, Version 2.0 (the "License");
5e2b43843fd12783188edd2c54188ea8d26864788Vijay Venkatraman * you may not use this file except in compliance with the License.
6e2b43843fd12783188edd2c54188ea8d26864788Vijay Venkatraman * You may obtain a copy of the License at
7e2b43843fd12783188edd2c54188ea8d26864788Vijay Venkatraman *
8e2b43843fd12783188edd2c54188ea8d26864788Vijay Venkatraman *      http://www.apache.org/licenses/LICENSE-2.0
9e2b43843fd12783188edd2c54188ea8d26864788Vijay Venkatraman *
10e2b43843fd12783188edd2c54188ea8d26864788Vijay Venkatraman * Unless required by applicable law or agreed to in writing, software
11e2b43843fd12783188edd2c54188ea8d26864788Vijay Venkatraman * distributed under the License is distributed on an "AS IS" BASIS,
12e2b43843fd12783188edd2c54188ea8d26864788Vijay Venkatraman * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13e2b43843fd12783188edd2c54188ea8d26864788Vijay Venkatraman * See the License for the specific language governing permissions and
14e2b43843fd12783188edd2c54188ea8d26864788Vijay Venkatraman * limitations under the License.
15e2b43843fd12783188edd2c54188ea8d26864788Vijay Venkatraman */
16e2b43843fd12783188edd2c54188ea8d26864788Vijay Venkatraman
17e2b43843fd12783188edd2c54188ea8d26864788Vijay Venkatraman#ifndef _CHARACTER_ENCODING_DETECTOR_H
18e2b43843fd12783188edd2c54188ea8d26864788Vijay Venkatraman#define _CHARACTER_ENCODING_DETECTOR_H
19e2b43843fd12783188edd2c54188ea8d26864788Vijay Venkatraman
20e2b43843fd12783188edd2c54188ea8d26864788Vijay Venkatraman#include <media/mediascanner.h>
21e2b43843fd12783188edd2c54188ea8d26864788Vijay Venkatraman
22e2b43843fd12783188edd2c54188ea8d26864788Vijay Venkatraman#include "StringArray.h"
23e2b43843fd12783188edd2c54188ea8d26864788Vijay Venkatraman
24e2b43843fd12783188edd2c54188ea8d26864788Vijay Venkatraman#include "unicode/ucnv.h"
25e2b43843fd12783188edd2c54188ea8d26864788Vijay Venkatraman#include "unicode/ucsdet.h"
26e2b43843fd12783188edd2c54188ea8d26864788Vijay Venkatraman#include "unicode/ustring.h"
27e2b43843fd12783188edd2c54188ea8d26864788Vijay Venkatraman
28e2b43843fd12783188edd2c54188ea8d26864788Vijay Venkatramannamespace android {
29e2b43843fd12783188edd2c54188ea8d26864788Vijay Venkatraman
30e2b43843fd12783188edd2c54188ea8d26864788Vijay Venkatramanclass CharacterEncodingDetector {
31e2b43843fd12783188edd2c54188ea8d26864788Vijay Venkatraman
32e2b43843fd12783188edd2c54188ea8d26864788Vijay Venkatraman    public:
33e2b43843fd12783188edd2c54188ea8d26864788Vijay Venkatraman    CharacterEncodingDetector();
34e2b43843fd12783188edd2c54188ea8d26864788Vijay Venkatraman        ~CharacterEncodingDetector();
35e2b43843fd12783188edd2c54188ea8d26864788Vijay Venkatraman
36e2b43843fd12783188edd2c54188ea8d26864788Vijay Venkatraman        void addTag(const char *name, const char *value);
37e2b43843fd12783188edd2c54188ea8d26864788Vijay Venkatraman        size_t size();
38e2b43843fd12783188edd2c54188ea8d26864788Vijay Venkatraman
39e2b43843fd12783188edd2c54188ea8d26864788Vijay Venkatraman        void detectAndConvert();
40e2b43843fd12783188edd2c54188ea8d26864788Vijay Venkatraman        status_t getTag(int index, const char **name, const char**value);
41e2b43843fd12783188edd2c54188ea8d26864788Vijay Venkatraman
42e2b43843fd12783188edd2c54188ea8d26864788Vijay Venkatraman    private:
43e2b43843fd12783188edd2c54188ea8d26864788Vijay Venkatraman        const UCharsetMatch *getPreferred(
44e2b43843fd12783188edd2c54188ea8d26864788Vijay Venkatraman                const char *input, size_t len,
45e2b43843fd12783188edd2c54188ea8d26864788Vijay Venkatraman                const UCharsetMatch** ucma, size_t matches,
46e2b43843fd12783188edd2c54188ea8d26864788Vijay Venkatraman                bool *goodmatch, int *highestmatch);
47e2b43843fd12783188edd2c54188ea8d26864788Vijay Venkatraman
48e2b43843fd12783188edd2c54188ea8d26864788Vijay Venkatraman        bool isFrequent(const uint16_t *values, uint32_t c);
49e2b43843fd12783188edd2c54188ea8d26864788Vijay Venkatraman
50e2b43843fd12783188edd2c54188ea8d26864788Vijay Venkatraman        // cached name and value strings, for native encoding support.
51e2b43843fd12783188edd2c54188ea8d26864788Vijay Venkatraman        // TODO: replace these with byte blob arrays that don't require the data to be
52e2b43843fd12783188edd2c54188ea8d26864788Vijay Venkatraman        // singlenullbyte-terminated
53e2b43843fd12783188edd2c54188ea8d26864788Vijay Venkatraman        StringArray     mNames;
54e2b43843fd12783188edd2c54188ea8d26864788Vijay Venkatraman        StringArray     mValues;
55e2b43843fd12783188edd2c54188ea8d26864788Vijay Venkatraman
56e2b43843fd12783188edd2c54188ea8d26864788Vijay Venkatraman        UConverter*     mUtf8Conv;
57e2b43843fd12783188edd2c54188ea8d26864788Vijay Venkatraman};
58e2b43843fd12783188edd2c54188ea8d26864788Vijay Venkatraman
59e2b43843fd12783188edd2c54188ea8d26864788Vijay Venkatraman
60e2b43843fd12783188edd2c54188ea8d26864788Vijay Venkatraman
61e2b43843fd12783188edd2c54188ea8d26864788Vijay Venkatraman};  // namespace android
62e2b43843fd12783188edd2c54188ea8d26864788Vijay Venkatraman
63e2b43843fd12783188edd2c54188ea8d26864788Vijay Venkatraman#endif
64