1/*
2 * Copyright (C) 2008 The Guava Authors
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17package com.google.thirdparty.publicsuffix;
18
19import com.google.common.annotations.GwtCompatible;
20import com.google.common.base.Joiner;
21import com.google.common.collect.ImmutableMap;
22import com.google.common.collect.Lists;
23
24import java.util.List;
25
26/**
27 * Parser for a map of reversed domain names stored as a serialized radix tree.
28 */
29@GwtCompatible
30class TrieParser {
31
32  private static final Joiner PREFIX_JOINER = Joiner.on("");
33
34  /**
35   * Parses a serialized trie representation of a map of reversed public
36   * suffixes into an immutable map of public suffixes.
37   */
38  static ImmutableMap<String, PublicSuffixType> parseTrie(CharSequence encoded) {
39    ImmutableMap.Builder<String, PublicSuffixType> builder = ImmutableMap.builder();
40    int encodedLen = encoded.length();
41    int idx = 0;
42    while (idx < encodedLen) {
43      idx += doParseTrieToBuilder(
44          Lists.<CharSequence>newLinkedList(),
45          encoded.subSequence(idx, encodedLen),
46          builder);
47    }
48    return builder.build();
49  }
50
51  /**
52   * Parses a trie node and returns the number of characters consumed.
53   *
54   * @param stack The prefixes that preceed the characters represented by this
55   *     node. Each entry of the stack is in reverse order.
56   * @param encoded The serialized trie.
57   * @param builder A map builder to which all entries will be added.
58   * @return The number of characters consumed from {@code encoded}.
59   */
60  private static int doParseTrieToBuilder(
61      List<CharSequence> stack,
62      CharSequence encoded,
63      ImmutableMap.Builder<String, PublicSuffixType> builder) {
64
65    int encodedLen = encoded.length();
66    int idx = 0;
67    char c = '\0';
68
69    // Read all of the characters for this node.
70    for ( ; idx < encodedLen; idx++) {
71      c = encoded.charAt(idx);
72      if (c == '&' || c == '?' || c == '!' || c == ':' || c == ',') {
73        break;
74      }
75    }
76
77    stack.add(0, reverse(encoded.subSequence(0, idx)));
78
79    if (c == '!' || c == '?' || c == ':' || c == ',') {
80      // '!' represents an interior node that represents an ICANN entry in the map.
81      // '?' represents a leaf node, which represents an ICANN entry in map.
82      // ':' represents an interior node that represents a private entry in the map
83      // ',' represents a leaf node, which represents a private entry in the map.
84      String domain = PREFIX_JOINER.join(stack);
85      if (domain.length() > 0) {
86        builder.put(domain, PublicSuffixType.fromCode(c));
87      }
88    }
89    idx++;
90
91    if (c != '?' && c != ',') {
92      while (idx < encodedLen) {
93        // Read all the children
94        idx += doParseTrieToBuilder(stack, encoded.subSequence(idx, encodedLen), builder);
95        if (encoded.charAt(idx) == '?' || encoded.charAt(idx) == ',') {
96          // An extra '?' or ',' after a child node indicates the end of all children of this node.
97          idx++;
98          break;
99        }
100      }
101    }
102    stack.remove(0);
103    return idx;
104  }
105
106  /**
107   * Reverses a character sequence. This is borrowed from
108   * https://code.google.com/p/google-web-toolkit/source/detail?r=11591#
109   * and can be replaced with a simple {@code StringBuffer#reverse} once GWT 2.6 is available.
110   */
111  private static CharSequence reverse(CharSequence s) {
112    int length = s.length();
113    if (length <= 1) {
114      return s;
115    }
116
117    char[] buffer = new char[length];
118    buffer[0] = s.charAt(length - 1);
119
120    for (int i = 1; i < length; i++) {
121      buffer[i] = s.charAt(length - 1 - i);
122      if (Character.isSurrogatePair(buffer[i], buffer[i - 1])) {
123        swap(buffer, i - 1, i);
124      }
125    }
126
127    return new String(buffer);
128  }
129
130  private static void swap(char[] buffer, int f, int s) {
131    char tmp = buffer[f];
132    buffer[f] = buffer[s];
133    buffer[s] = tmp;
134  }
135}
136