VCardSourceDetector.java revision 58610106ce61adad9b1caa1fe9f7925c3e938bab
14199c54c527330ac01699b176e7bca186a3aa3a4Daisuke Miyakawa/*
24199c54c527330ac01699b176e7bca186a3aa3a4Daisuke Miyakawa * Copyright (C) 2009 The Android Open Source Project
34199c54c527330ac01699b176e7bca186a3aa3a4Daisuke Miyakawa *
44199c54c527330ac01699b176e7bca186a3aa3a4Daisuke Miyakawa * Licensed under the Apache License, Version 2.0 (the "License");
54199c54c527330ac01699b176e7bca186a3aa3a4Daisuke Miyakawa * you may not use this file except in compliance with the License.
64199c54c527330ac01699b176e7bca186a3aa3a4Daisuke Miyakawa * You may obtain a copy of the License at
74199c54c527330ac01699b176e7bca186a3aa3a4Daisuke Miyakawa *
84199c54c527330ac01699b176e7bca186a3aa3a4Daisuke Miyakawa *      http://www.apache.org/licenses/LICENSE-2.0
94199c54c527330ac01699b176e7bca186a3aa3a4Daisuke Miyakawa *
104199c54c527330ac01699b176e7bca186a3aa3a4Daisuke Miyakawa * Unless required by applicable law or agreed to in writing, software
114199c54c527330ac01699b176e7bca186a3aa3a4Daisuke Miyakawa * distributed under the License is distributed on an "AS IS" BASIS,
124199c54c527330ac01699b176e7bca186a3aa3a4Daisuke Miyakawa * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
134199c54c527330ac01699b176e7bca186a3aa3a4Daisuke Miyakawa * See the License for the specific language governing permissions and
144199c54c527330ac01699b176e7bca186a3aa3a4Daisuke Miyakawa * limitations under the License.
154199c54c527330ac01699b176e7bca186a3aa3a4Daisuke Miyakawa */
164199c54c527330ac01699b176e7bca186a3aa3a4Daisuke Miyakawapackage com.android.vcard;
174199c54c527330ac01699b176e7bca186a3aa3a4Daisuke Miyakawa
184199c54c527330ac01699b176e7bca186a3aa3a4Daisuke Miyakawaimport android.text.TextUtils;
194199c54c527330ac01699b176e7bca186a3aa3a4Daisuke Miyakawa
204199c54c527330ac01699b176e7bca186a3aa3a4Daisuke Miyakawaimport java.util.Arrays;
214199c54c527330ac01699b176e7bca186a3aa3a4Daisuke Miyakawaimport java.util.HashSet;
224199c54c527330ac01699b176e7bca186a3aa3a4Daisuke Miyakawaimport java.util.List;
234199c54c527330ac01699b176e7bca186a3aa3a4Daisuke Miyakawaimport java.util.Set;
244199c54c527330ac01699b176e7bca186a3aa3a4Daisuke Miyakawa
254199c54c527330ac01699b176e7bca186a3aa3a4Daisuke Miyakawa/**
264199c54c527330ac01699b176e7bca186a3aa3a4Daisuke Miyakawa * <p>
274199c54c527330ac01699b176e7bca186a3aa3a4Daisuke Miyakawa * The class which tries to detects the source of a vCard file from its contents.
284199c54c527330ac01699b176e7bca186a3aa3a4Daisuke Miyakawa * </p>
294199c54c527330ac01699b176e7bca186a3aa3a4Daisuke Miyakawa * <p>
304199c54c527330ac01699b176e7bca186a3aa3a4Daisuke Miyakawa * The specification of vCard (including both 2.1 and 3.0) is not so strict as to
314199c54c527330ac01699b176e7bca186a3aa3a4Daisuke Miyakawa * guess its format just by reading beginning few lines (usually we can, but in
324199c54c527330ac01699b176e7bca186a3aa3a4Daisuke Miyakawa * some most pessimistic case, we cannot until at almost the end of the file).
334199c54c527330ac01699b176e7bca186a3aa3a4Daisuke Miyakawa * Also we cannot store all vCard entries in memory, while there's no specification
344199c54c527330ac01699b176e7bca186a3aa3a4Daisuke Miyakawa * how big the vCard entry would become after the parse.
354199c54c527330ac01699b176e7bca186a3aa3a4Daisuke Miyakawa * </p>
364199c54c527330ac01699b176e7bca186a3aa3a4Daisuke Miyakawa * <p>
374199c54c527330ac01699b176e7bca186a3aa3a4Daisuke Miyakawa * This class is usually used for the "first scan", in which we can understand which vCard
384199c54c527330ac01699b176e7bca186a3aa3a4Daisuke Miyakawa * version is used (and how many entries exist in a file).
394199c54c527330ac01699b176e7bca186a3aa3a4Daisuke Miyakawa * </p>
404199c54c527330ac01699b176e7bca186a3aa3a4Daisuke Miyakawa */
414199c54c527330ac01699b176e7bca186a3aa3a4Daisuke Miyakawapublic class VCardSourceDetector implements VCardInterpreter {
424199c54c527330ac01699b176e7bca186a3aa3a4Daisuke Miyakawa    private static Set<String> APPLE_SIGNS = new HashSet<String>(Arrays.asList(
434199c54c527330ac01699b176e7bca186a3aa3a4Daisuke Miyakawa            "X-PHONETIC-FIRST-NAME", "X-PHONETIC-MIDDLE-NAME", "X-PHONETIC-LAST-NAME",
444199c54c527330ac01699b176e7bca186a3aa3a4Daisuke Miyakawa            "X-ABADR", "X-ABUID"));
454199c54c527330ac01699b176e7bca186a3aa3a4Daisuke Miyakawa
464199c54c527330ac01699b176e7bca186a3aa3a4Daisuke Miyakawa    private static Set<String> JAPANESE_MOBILE_PHONE_SIGNS = new HashSet<String>(Arrays.asList(
474199c54c527330ac01699b176e7bca186a3aa3a4Daisuke Miyakawa            "X-GNO", "X-GN", "X-REDUCTION"));
484199c54c527330ac01699b176e7bca186a3aa3a4Daisuke Miyakawa
494199c54c527330ac01699b176e7bca186a3aa3a4Daisuke Miyakawa    private static Set<String> WINDOWS_MOBILE_PHONE_SIGNS = new HashSet<String>(Arrays.asList(
504199c54c527330ac01699b176e7bca186a3aa3a4Daisuke Miyakawa            "X-MICROSOFT-ASST_TEL", "X-MICROSOFT-ASSISTANT", "X-MICROSOFT-OFFICELOC"));
514199c54c527330ac01699b176e7bca186a3aa3a4Daisuke Miyakawa
524199c54c527330ac01699b176e7bca186a3aa3a4Daisuke Miyakawa    // Note: these signes appears before the signs of the other type (e.g. "X-GN").
534199c54c527330ac01699b176e7bca186a3aa3a4Daisuke Miyakawa    // In other words, Japanese FOMA mobile phones are detected as FOMA, not JAPANESE_MOBILE_PHONES.
544199c54c527330ac01699b176e7bca186a3aa3a4Daisuke Miyakawa    private static Set<String> FOMA_SIGNS = new HashSet<String>(Arrays.asList(
554199c54c527330ac01699b176e7bca186a3aa3a4Daisuke Miyakawa            "X-SD-VERN", "X-SD-FORMAT_VER", "X-SD-CATEGORIES", "X-SD-CLASS", "X-SD-DCREATED",
564199c54c527330ac01699b176e7bca186a3aa3a4Daisuke Miyakawa            "X-SD-DESCRIPTION"));
574199c54c527330ac01699b176e7bca186a3aa3a4Daisuke Miyakawa    private static String TYPE_FOMA_CHARSET_SIGN = "X-SD-CHAR_CODE";
584199c54c527330ac01699b176e7bca186a3aa3a4Daisuke Miyakawa
5958610106ce61adad9b1caa1fe9f7925c3e938babDaisuke Miyakawa    /**
6058610106ce61adad9b1caa1fe9f7925c3e938babDaisuke Miyakawa     * Represents that no estimation is available. Users of this class is able to this
6158610106ce61adad9b1caa1fe9f7925c3e938babDaisuke Miyakawa     * constant when you don't want to let a vCard parser rely on estimation for parse type.
6258610106ce61adad9b1caa1fe9f7925c3e938babDaisuke Miyakawa     */
6358610106ce61adad9b1caa1fe9f7925c3e938babDaisuke Miyakawa    public static final int PARSE_TYPE_UNKNOWN = 0;
644199c54c527330ac01699b176e7bca186a3aa3a4Daisuke Miyakawa
654199c54c527330ac01699b176e7bca186a3aa3a4Daisuke Miyakawa    // For Apple's software, which does not mean this type is effective for all its products.
664199c54c527330ac01699b176e7bca186a3aa3a4Daisuke Miyakawa    // We confirmed they usually use UTF-8, but not sure about vCard type.
674199c54c527330ac01699b176e7bca186a3aa3a4Daisuke Miyakawa    private static final int PARSE_TYPE_APPLE = 1;
684199c54c527330ac01699b176e7bca186a3aa3a4Daisuke Miyakawa    // For Japanese mobile phones, which are usually using Shift_JIS as a charset.
694199c54c527330ac01699b176e7bca186a3aa3a4Daisuke Miyakawa    private static final int PARSE_TYPE_MOBILE_PHONE_JP = 2;
704199c54c527330ac01699b176e7bca186a3aa3a4Daisuke Miyakawa    // For some of mobile phones released from DoCoMo, which use nested vCard.
714199c54c527330ac01699b176e7bca186a3aa3a4Daisuke Miyakawa    private static final int PARSE_TYPE_DOCOMO_TORELATE_NEST = 3;
724199c54c527330ac01699b176e7bca186a3aa3a4Daisuke Miyakawa    // For Japanese Windows Mobel phones. It's version is supposed to be 6.5.
734199c54c527330ac01699b176e7bca186a3aa3a4Daisuke Miyakawa    private static final int PARSE_TYPE_WINDOWS_MOBILE_V65_JP = 4;
744199c54c527330ac01699b176e7bca186a3aa3a4Daisuke Miyakawa
754199c54c527330ac01699b176e7bca186a3aa3a4Daisuke Miyakawa    private int mParseType = 0;  // Not sure.
764199c54c527330ac01699b176e7bca186a3aa3a4Daisuke Miyakawa
774199c54c527330ac01699b176e7bca186a3aa3a4Daisuke Miyakawa    // Some mobile phones (like FOMA) tells us the charset of the data.
784199c54c527330ac01699b176e7bca186a3aa3a4Daisuke Miyakawa    private boolean mNeedParseSpecifiedCharset;
794199c54c527330ac01699b176e7bca186a3aa3a4Daisuke Miyakawa    private String mSpecifiedCharset;
804199c54c527330ac01699b176e7bca186a3aa3a4Daisuke Miyakawa
814199c54c527330ac01699b176e7bca186a3aa3a4Daisuke Miyakawa    public void start() {
824199c54c527330ac01699b176e7bca186a3aa3a4Daisuke Miyakawa    }
834199c54c527330ac01699b176e7bca186a3aa3a4Daisuke Miyakawa
844199c54c527330ac01699b176e7bca186a3aa3a4Daisuke Miyakawa    public void end() {
854199c54c527330ac01699b176e7bca186a3aa3a4Daisuke Miyakawa    }
864199c54c527330ac01699b176e7bca186a3aa3a4Daisuke Miyakawa
874199c54c527330ac01699b176e7bca186a3aa3a4Daisuke Miyakawa    public void startEntry() {
884199c54c527330ac01699b176e7bca186a3aa3a4Daisuke Miyakawa    }
894199c54c527330ac01699b176e7bca186a3aa3a4Daisuke Miyakawa
904199c54c527330ac01699b176e7bca186a3aa3a4Daisuke Miyakawa    public void startProperty() {
914199c54c527330ac01699b176e7bca186a3aa3a4Daisuke Miyakawa        mNeedParseSpecifiedCharset = false;
924199c54c527330ac01699b176e7bca186a3aa3a4Daisuke Miyakawa    }
934199c54c527330ac01699b176e7bca186a3aa3a4Daisuke Miyakawa
944199c54c527330ac01699b176e7bca186a3aa3a4Daisuke Miyakawa    public void endProperty() {
954199c54c527330ac01699b176e7bca186a3aa3a4Daisuke Miyakawa    }
964199c54c527330ac01699b176e7bca186a3aa3a4Daisuke Miyakawa
974199c54c527330ac01699b176e7bca186a3aa3a4Daisuke Miyakawa    public void endEntry() {
984199c54c527330ac01699b176e7bca186a3aa3a4Daisuke Miyakawa    }
994199c54c527330ac01699b176e7bca186a3aa3a4Daisuke Miyakawa
1004199c54c527330ac01699b176e7bca186a3aa3a4Daisuke Miyakawa    public void propertyGroup(String group) {
1014199c54c527330ac01699b176e7bca186a3aa3a4Daisuke Miyakawa    }
1024199c54c527330ac01699b176e7bca186a3aa3a4Daisuke Miyakawa
1034199c54c527330ac01699b176e7bca186a3aa3a4Daisuke Miyakawa    public void propertyName(String name) {
1044199c54c527330ac01699b176e7bca186a3aa3a4Daisuke Miyakawa        if (name.equalsIgnoreCase(TYPE_FOMA_CHARSET_SIGN)) {
1054199c54c527330ac01699b176e7bca186a3aa3a4Daisuke Miyakawa            mParseType = PARSE_TYPE_DOCOMO_TORELATE_NEST;
1064199c54c527330ac01699b176e7bca186a3aa3a4Daisuke Miyakawa            // Probably Shift_JIS is used, but we should double confirm.
1074199c54c527330ac01699b176e7bca186a3aa3a4Daisuke Miyakawa            mNeedParseSpecifiedCharset = true;
1084199c54c527330ac01699b176e7bca186a3aa3a4Daisuke Miyakawa            return;
1094199c54c527330ac01699b176e7bca186a3aa3a4Daisuke Miyakawa        }
1104199c54c527330ac01699b176e7bca186a3aa3a4Daisuke Miyakawa        if (mParseType != PARSE_TYPE_UNKNOWN) {
1114199c54c527330ac01699b176e7bca186a3aa3a4Daisuke Miyakawa            return;
1124199c54c527330ac01699b176e7bca186a3aa3a4Daisuke Miyakawa        }
1134199c54c527330ac01699b176e7bca186a3aa3a4Daisuke Miyakawa        if (WINDOWS_MOBILE_PHONE_SIGNS.contains(name)) {
1144199c54c527330ac01699b176e7bca186a3aa3a4Daisuke Miyakawa            mParseType = PARSE_TYPE_WINDOWS_MOBILE_V65_JP;
1154199c54c527330ac01699b176e7bca186a3aa3a4Daisuke Miyakawa        } else if (FOMA_SIGNS.contains(name)) {
1164199c54c527330ac01699b176e7bca186a3aa3a4Daisuke Miyakawa            mParseType = PARSE_TYPE_DOCOMO_TORELATE_NEST;
1174199c54c527330ac01699b176e7bca186a3aa3a4Daisuke Miyakawa        } else if (JAPANESE_MOBILE_PHONE_SIGNS.contains(name)) {
1184199c54c527330ac01699b176e7bca186a3aa3a4Daisuke Miyakawa            mParseType = PARSE_TYPE_MOBILE_PHONE_JP;
1194199c54c527330ac01699b176e7bca186a3aa3a4Daisuke Miyakawa        } else if (APPLE_SIGNS.contains(name)) {
1204199c54c527330ac01699b176e7bca186a3aa3a4Daisuke Miyakawa            mParseType = PARSE_TYPE_APPLE;
1214199c54c527330ac01699b176e7bca186a3aa3a4Daisuke Miyakawa        }
1224199c54c527330ac01699b176e7bca186a3aa3a4Daisuke Miyakawa    }
1234199c54c527330ac01699b176e7bca186a3aa3a4Daisuke Miyakawa
1244199c54c527330ac01699b176e7bca186a3aa3a4Daisuke Miyakawa    public void propertyParamType(String type) {
1254199c54c527330ac01699b176e7bca186a3aa3a4Daisuke Miyakawa    }
1264199c54c527330ac01699b176e7bca186a3aa3a4Daisuke Miyakawa
1274199c54c527330ac01699b176e7bca186a3aa3a4Daisuke Miyakawa    public void propertyParamValue(String value) {
1284199c54c527330ac01699b176e7bca186a3aa3a4Daisuke Miyakawa    }
1294199c54c527330ac01699b176e7bca186a3aa3a4Daisuke Miyakawa
1304199c54c527330ac01699b176e7bca186a3aa3a4Daisuke Miyakawa    public void propertyValues(List<String> values) {
1314199c54c527330ac01699b176e7bca186a3aa3a4Daisuke Miyakawa        if (mNeedParseSpecifiedCharset && values.size() > 0) {
1324199c54c527330ac01699b176e7bca186a3aa3a4Daisuke Miyakawa            mSpecifiedCharset = values.get(0);
1334199c54c527330ac01699b176e7bca186a3aa3a4Daisuke Miyakawa        }
1344199c54c527330ac01699b176e7bca186a3aa3a4Daisuke Miyakawa    }
1354199c54c527330ac01699b176e7bca186a3aa3a4Daisuke Miyakawa
1364199c54c527330ac01699b176e7bca186a3aa3a4Daisuke Miyakawa    /**
1374199c54c527330ac01699b176e7bca186a3aa3a4Daisuke Miyakawa     * @return The available type can be used with vCard parser. You probably need to
1384199c54c527330ac01699b176e7bca186a3aa3a4Daisuke Miyakawa     * use {{@link #getEstimatedCharset()} to understand the charset to be used.
1394199c54c527330ac01699b176e7bca186a3aa3a4Daisuke Miyakawa     */
1404199c54c527330ac01699b176e7bca186a3aa3a4Daisuke Miyakawa    public int getEstimatedType() {
1414199c54c527330ac01699b176e7bca186a3aa3a4Daisuke Miyakawa        switch (mParseType) {
1424199c54c527330ac01699b176e7bca186a3aa3a4Daisuke Miyakawa            case PARSE_TYPE_DOCOMO_TORELATE_NEST:
1434199c54c527330ac01699b176e7bca186a3aa3a4Daisuke Miyakawa                return VCardConfig.VCARD_TYPE_DOCOMO | VCardConfig.FLAG_TORELATE_NEST;
1444199c54c527330ac01699b176e7bca186a3aa3a4Daisuke Miyakawa            case PARSE_TYPE_MOBILE_PHONE_JP:
1454199c54c527330ac01699b176e7bca186a3aa3a4Daisuke Miyakawa                return VCardConfig.VCARD_TYPE_V21_JAPANESE_MOBILE;
1464199c54c527330ac01699b176e7bca186a3aa3a4Daisuke Miyakawa            case PARSE_TYPE_APPLE:
1474199c54c527330ac01699b176e7bca186a3aa3a4Daisuke Miyakawa            case PARSE_TYPE_WINDOWS_MOBILE_V65_JP:
1484199c54c527330ac01699b176e7bca186a3aa3a4Daisuke Miyakawa            default:
1494199c54c527330ac01699b176e7bca186a3aa3a4Daisuke Miyakawa                return VCardConfig.VCARD_TYPE_UNKNOWN;
1504199c54c527330ac01699b176e7bca186a3aa3a4Daisuke Miyakawa        }
1514199c54c527330ac01699b176e7bca186a3aa3a4Daisuke Miyakawa    }
1524199c54c527330ac01699b176e7bca186a3aa3a4Daisuke Miyakawa
1534199c54c527330ac01699b176e7bca186a3aa3a4Daisuke Miyakawa    /**
1544199c54c527330ac01699b176e7bca186a3aa3a4Daisuke Miyakawa     * <p>
1554199c54c527330ac01699b176e7bca186a3aa3a4Daisuke Miyakawa     * Returns charset String guessed from the source's properties.
1564199c54c527330ac01699b176e7bca186a3aa3a4Daisuke Miyakawa     * This method must be called after parsing target file(s).
1574199c54c527330ac01699b176e7bca186a3aa3a4Daisuke Miyakawa     * </p>
1584199c54c527330ac01699b176e7bca186a3aa3a4Daisuke Miyakawa     * @return Charset String. Null is returned if guessing the source fails.
1594199c54c527330ac01699b176e7bca186a3aa3a4Daisuke Miyakawa     */
1604199c54c527330ac01699b176e7bca186a3aa3a4Daisuke Miyakawa    public String getEstimatedCharset() {
1614199c54c527330ac01699b176e7bca186a3aa3a4Daisuke Miyakawa        if (TextUtils.isEmpty(mSpecifiedCharset)) {
1624199c54c527330ac01699b176e7bca186a3aa3a4Daisuke Miyakawa            return mSpecifiedCharset;
1634199c54c527330ac01699b176e7bca186a3aa3a4Daisuke Miyakawa        }
1644199c54c527330ac01699b176e7bca186a3aa3a4Daisuke Miyakawa        switch (mParseType) {
1654199c54c527330ac01699b176e7bca186a3aa3a4Daisuke Miyakawa            case PARSE_TYPE_WINDOWS_MOBILE_V65_JP:
1664199c54c527330ac01699b176e7bca186a3aa3a4Daisuke Miyakawa            case PARSE_TYPE_DOCOMO_TORELATE_NEST:
1674199c54c527330ac01699b176e7bca186a3aa3a4Daisuke Miyakawa            case PARSE_TYPE_MOBILE_PHONE_JP:
1684199c54c527330ac01699b176e7bca186a3aa3a4Daisuke Miyakawa                return "SHIFT_JIS";
1694199c54c527330ac01699b176e7bca186a3aa3a4Daisuke Miyakawa            case PARSE_TYPE_APPLE:
1704199c54c527330ac01699b176e7bca186a3aa3a4Daisuke Miyakawa                return "UTF-8";
1714199c54c527330ac01699b176e7bca186a3aa3a4Daisuke Miyakawa            default:
1724199c54c527330ac01699b176e7bca186a3aa3a4Daisuke Miyakawa                return null;
1734199c54c527330ac01699b176e7bca186a3aa3a4Daisuke Miyakawa        }
1744199c54c527330ac01699b176e7bca186a3aa3a4Daisuke Miyakawa    }
1754199c54c527330ac01699b176e7bca186a3aa3a4Daisuke Miyakawa}
176