1// Copyright 2013 the V8 project authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#ifndef V8_URI_H_
6#define V8_URI_H_
7
8#include "src/v8.h"
9
10#include "src/conversions.h"
11#include "src/string-search.h"
12#include "src/utils.h"
13
14namespace v8 {
15namespace internal {
16
17
18template <typename Char>
19static INLINE(Vector<const Char> GetCharVector(Handle<String> string));
20
21
22template <>
23Vector<const uint8_t> GetCharVector(Handle<String> string) {
24  String::FlatContent flat = string->GetFlatContent();
25  ASSERT(flat.IsAscii());
26  return flat.ToOneByteVector();
27}
28
29
30template <>
31Vector<const uc16> GetCharVector(Handle<String> string) {
32  String::FlatContent flat = string->GetFlatContent();
33  ASSERT(flat.IsTwoByte());
34  return flat.ToUC16Vector();
35}
36
37
38class URIUnescape : public AllStatic {
39 public:
40  template<typename Char>
41  MUST_USE_RESULT static MaybeHandle<String> Unescape(Isolate* isolate,
42                                                      Handle<String> source);
43
44 private:
45  static const signed char kHexValue['g'];
46
47  template<typename Char>
48  MUST_USE_RESULT static MaybeHandle<String> UnescapeSlow(
49      Isolate* isolate, Handle<String> string, int start_index);
50
51  static INLINE(int TwoDigitHex(uint16_t character1, uint16_t character2));
52
53  template <typename Char>
54  static INLINE(int UnescapeChar(Vector<const Char> vector,
55                                 int i,
56                                 int length,
57                                 int* step));
58};
59
60
61const signed char URIUnescape::kHexValue[] = {
62    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
63    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
64    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
65    -0,  1,  2,  3,  4,  5,  6,  7,  8,  9, -1, -1, -1, -1, -1, -1,
66    -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1,
67    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
68    -1, 10, 11, 12, 13, 14, 15 };
69
70
71template<typename Char>
72MaybeHandle<String> URIUnescape::Unescape(Isolate* isolate,
73                                          Handle<String> source) {
74  int index;
75  { DisallowHeapAllocation no_allocation;
76    StringSearch<uint8_t, Char> search(isolate, STATIC_ASCII_VECTOR("%"));
77    index = search.Search(GetCharVector<Char>(source), 0);
78    if (index < 0) return source;
79  }
80  return UnescapeSlow<Char>(isolate, source, index);
81}
82
83
84template <typename Char>
85MaybeHandle<String> URIUnescape::UnescapeSlow(
86    Isolate* isolate, Handle<String> string, int start_index) {
87  bool one_byte = true;
88  int length = string->length();
89
90  int unescaped_length = 0;
91  { DisallowHeapAllocation no_allocation;
92    Vector<const Char> vector = GetCharVector<Char>(string);
93    for (int i = start_index; i < length; unescaped_length++) {
94      int step;
95      if (UnescapeChar(vector, i, length, &step) >
96              String::kMaxOneByteCharCode) {
97        one_byte = false;
98      }
99      i += step;
100    }
101  }
102
103  ASSERT(start_index < length);
104  Handle<String> first_part =
105      isolate->factory()->NewProperSubString(string, 0, start_index);
106
107  int dest_position = 0;
108  Handle<String> second_part;
109  ASSERT(unescaped_length <= String::kMaxLength);
110  if (one_byte) {
111    Handle<SeqOneByteString> dest = isolate->factory()->NewRawOneByteString(
112        unescaped_length).ToHandleChecked();
113    DisallowHeapAllocation no_allocation;
114    Vector<const Char> vector = GetCharVector<Char>(string);
115    for (int i = start_index; i < length; dest_position++) {
116      int step;
117      dest->SeqOneByteStringSet(dest_position,
118                                UnescapeChar(vector, i, length, &step));
119      i += step;
120    }
121    second_part = dest;
122  } else {
123    Handle<SeqTwoByteString> dest = isolate->factory()->NewRawTwoByteString(
124        unescaped_length).ToHandleChecked();
125    DisallowHeapAllocation no_allocation;
126    Vector<const Char> vector = GetCharVector<Char>(string);
127    for (int i = start_index; i < length; dest_position++) {
128      int step;
129      dest->SeqTwoByteStringSet(dest_position,
130                                UnescapeChar(vector, i, length, &step));
131      i += step;
132    }
133    second_part = dest;
134  }
135  return isolate->factory()->NewConsString(first_part, second_part);
136}
137
138
139int URIUnescape::TwoDigitHex(uint16_t character1, uint16_t character2) {
140  if (character1 > 'f') return -1;
141  int hi = kHexValue[character1];
142  if (hi == -1) return -1;
143  if (character2 > 'f') return -1;
144  int lo = kHexValue[character2];
145  if (lo == -1) return -1;
146  return (hi << 4) + lo;
147}
148
149
150template <typename Char>
151int URIUnescape::UnescapeChar(Vector<const Char> vector,
152                              int i,
153                              int length,
154                              int* step) {
155  uint16_t character = vector[i];
156  int32_t hi = 0;
157  int32_t lo = 0;
158  if (character == '%' &&
159      i <= length - 6 &&
160      vector[i + 1] == 'u' &&
161      (hi = TwoDigitHex(vector[i + 2],
162                        vector[i + 3])) != -1 &&
163      (lo = TwoDigitHex(vector[i + 4],
164                        vector[i + 5])) != -1) {
165    *step = 6;
166    return (hi << 8) + lo;
167  } else if (character == '%' &&
168      i <= length - 3 &&
169      (lo = TwoDigitHex(vector[i + 1],
170                        vector[i + 2])) != -1) {
171    *step = 3;
172    return lo;
173  } else {
174    *step = 1;
175    return character;
176  }
177}
178
179
180class URIEscape : public AllStatic {
181 public:
182  template<typename Char>
183  MUST_USE_RESULT static MaybeHandle<String> Escape(Isolate* isolate,
184                                                    Handle<String> string);
185
186 private:
187  static const char kHexChars[17];
188  static const char kNotEscaped[256];
189
190  static bool IsNotEscaped(uint16_t c) { return kNotEscaped[c] != 0; }
191};
192
193
194const char URIEscape::kHexChars[] = "0123456789ABCDEF";
195
196
197// kNotEscaped is generated by the following:
198//
199// #!/bin/perl
200// for (my $i = 0; $i < 256; $i++) {
201//   print "\n" if $i % 16 == 0;
202//   my $c = chr($i);
203//   my $escaped = 1;
204//   $escaped = 0 if $c =~ m#[A-Za-z0-9@*_+./-]#;
205//   print $escaped ? "0, " : "1, ";
206// }
207
208const char URIEscape::kNotEscaped[] = {
209    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
210    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
211    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1,
212    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
213    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
214    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1,
215    0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
216    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
217    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
218    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
219    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
220    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
221    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
222    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
223    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
224    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
225
226
227template<typename Char>
228MaybeHandle<String> URIEscape::Escape(Isolate* isolate, Handle<String> string) {
229  ASSERT(string->IsFlat());
230  int escaped_length = 0;
231  int length = string->length();
232
233  { DisallowHeapAllocation no_allocation;
234    Vector<const Char> vector = GetCharVector<Char>(string);
235    for (int i = 0; i < length; i++) {
236      uint16_t c = vector[i];
237      if (c >= 256) {
238        escaped_length += 6;
239      } else if (IsNotEscaped(c)) {
240        escaped_length++;
241      } else {
242        escaped_length += 3;
243      }
244
245      // We don't allow strings that are longer than a maximal length.
246      ASSERT(String::kMaxLength < 0x7fffffff - 6);  // Cannot overflow.
247      if (escaped_length > String::kMaxLength) break;  // Provoke exception.
248    }
249  }
250
251  // No length change implies no change.  Return original string if no change.
252  if (escaped_length == length) return string;
253
254  Handle<SeqOneByteString> dest;
255  ASSIGN_RETURN_ON_EXCEPTION(
256      isolate, dest,
257      isolate->factory()->NewRawOneByteString(escaped_length),
258      String);
259  int dest_position = 0;
260
261  { DisallowHeapAllocation no_allocation;
262    Vector<const Char> vector = GetCharVector<Char>(string);
263    for (int i = 0; i < length; i++) {
264      uint16_t c = vector[i];
265      if (c >= 256) {
266        dest->SeqOneByteStringSet(dest_position, '%');
267        dest->SeqOneByteStringSet(dest_position+1, 'u');
268        dest->SeqOneByteStringSet(dest_position+2, kHexChars[c >> 12]);
269        dest->SeqOneByteStringSet(dest_position+3, kHexChars[(c >> 8) & 0xf]);
270        dest->SeqOneByteStringSet(dest_position+4, kHexChars[(c >> 4) & 0xf]);
271        dest->SeqOneByteStringSet(dest_position+5, kHexChars[c & 0xf]);
272        dest_position += 6;
273      } else if (IsNotEscaped(c)) {
274        dest->SeqOneByteStringSet(dest_position, c);
275        dest_position++;
276      } else {
277        dest->SeqOneByteStringSet(dest_position, '%');
278        dest->SeqOneByteStringSet(dest_position+1, kHexChars[c >> 4]);
279        dest->SeqOneByteStringSet(dest_position+2, kHexChars[c & 0xf]);
280        dest_position += 3;
281      }
282    }
283  }
284
285  return dest;
286}
287
288} }  // namespace v8::internal
289
290#endif  // V8_URI_H_
291