1// Copyright 2006-2008 the V8 project authors. All rights reserved.
2// Redistribution and use in source and binary forms, with or without
3// modification, are permitted provided that the following conditions are
4// met:
5//
6//     * Redistributions of source code must retain the above copyright
7//       notice, this list of conditions and the following disclaimer.
8//     * Redistributions in binary form must reproduce the above
9//       copyright notice, this list of conditions and the following
10//       disclaimer in the documentation and/or other materials provided
11//       with the distribution.
12//     * Neither the name of Google Inc. nor the names of its
13//       contributors may be used to endorse or promote products derived
14//       from this software without specific prior written permission.
15//
16// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27
28// This file contains support for URI manipulations written in
29// JavaScript.
30
31// Expect $String = global.String;
32
33// Lazily initialized.
34var hexCharArray = 0;
35var hexCharCodeArray = 0;
36
37
38function URIAddEncodedOctetToBuffer(octet, result, index) {
39  result[index++] = 37; // Char code of '%'.
40  result[index++] = hexCharCodeArray[octet >> 4];
41  result[index++] = hexCharCodeArray[octet & 0x0F];
42  return index;
43}
44
45
46function URIEncodeOctets(octets, result, index) {
47  if (hexCharCodeArray === 0) {
48    hexCharCodeArray = [48, 49, 50, 51, 52, 53, 54, 55, 56, 57,
49                        65, 66, 67, 68, 69, 70];
50  }
51  index = URIAddEncodedOctetToBuffer(octets[0], result, index);
52  if (octets[1]) index = URIAddEncodedOctetToBuffer(octets[1], result, index);
53  if (octets[2]) index = URIAddEncodedOctetToBuffer(octets[2], result, index);
54  if (octets[3]) index = URIAddEncodedOctetToBuffer(octets[3], result, index);
55  return index;
56}
57
58
59function URIEncodeSingle(cc, result, index) {
60  var x = (cc >> 12) & 0xF;
61  var y = (cc >> 6) & 63;
62  var z = cc & 63;
63  var octets = new $Array(3);
64  if (cc <= 0x007F) {
65    octets[0] = cc;
66  } else if (cc <= 0x07FF) {
67    octets[0] = y + 192;
68    octets[1] = z + 128;
69  } else {
70    octets[0] = x + 224;
71    octets[1] = y + 128;
72    octets[2] = z + 128;
73  }
74  return URIEncodeOctets(octets, result, index);
75}
76
77
78function URIEncodePair(cc1 , cc2, result, index) {
79  var u = ((cc1 >> 6) & 0xF) + 1;
80  var w = (cc1 >> 2) & 0xF;
81  var x = cc1 & 3;
82  var y = (cc2 >> 6) & 0xF;
83  var z = cc2 & 63;
84  var octets = new $Array(4);
85  octets[0] = (u >> 2) + 240;
86  octets[1] = (((u & 3) << 4) | w) + 128;
87  octets[2] = ((x << 4) | y) + 128;
88  octets[3] = z + 128;
89  return URIEncodeOctets(octets, result, index);
90}
91
92
93function URIHexCharsToCharCode(highChar, lowChar) {
94  var highCode = HexValueOf(highChar);
95  var lowCode = HexValueOf(lowChar);
96  if (highCode == -1 || lowCode == -1) {
97    throw new $URIError("URI malformed");
98  }
99  return (highCode << 4) | lowCode;
100}
101
102
103function URIDecodeOctets(octets, result, index) {
104  var value;
105  var o0 = octets[0];
106  if (o0 < 0x80) {
107    value = o0;
108  } else if (o0 < 0xc2) {
109    throw new $URIError("URI malformed");
110  } else {
111    var o1 = octets[1];
112    if (o0 < 0xe0) {
113      var a = o0 & 0x1f;
114      if ((o1 < 0x80) || (o1 > 0xbf)) {
115        throw new $URIError("URI malformed");
116      }
117      var b = o1 & 0x3f;
118      value = (a << 6) + b;
119      if (value < 0x80 || value > 0x7ff) {
120        throw new $URIError("URI malformed");
121      }
122    } else {
123      var o2 = octets[2];
124      if (o0 < 0xf0) {
125        var a = o0 & 0x0f;
126        if ((o1 < 0x80) || (o1 > 0xbf)) {
127          throw new $URIError("URI malformed");
128        }
129        var b = o1 & 0x3f;
130        if ((o2 < 0x80) || (o2 > 0xbf)) {
131          throw new $URIError("URI malformed");
132        }
133        var c = o2 & 0x3f;
134        value = (a << 12) + (b << 6) + c;
135        if ((value < 0x800) || (value > 0xffff)) {
136          throw new $URIError("URI malformed");
137        }
138      } else {
139        var o3 = octets[3];
140        if (o0 < 0xf8) {
141          var a = (o0 & 0x07);
142          if ((o1 < 0x80) || (o1 > 0xbf)) {
143            throw new $URIError("URI malformed");
144          }
145          var b = (o1 & 0x3f);
146          if ((o2 < 0x80) || (o2 > 0xbf)) {
147            throw new $URIError("URI malformed");
148          }
149          var c = (o2 & 0x3f);
150          if ((o3 < 0x80) || (o3 > 0xbf)) {
151            throw new $URIError("URI malformed");
152          }
153          var d = (o3 & 0x3f);
154          value = (a << 18) + (b << 12) + (c << 6) + d;
155          if ((value < 0x10000) || (value > 0x10ffff)) {
156            throw new $URIError("URI malformed");
157          }
158        } else {
159          throw new $URIError("URI malformed");
160        }
161      }
162    }
163  }
164  if (0xD800 <= value && value <= 0xDFFF) {
165    throw new $URIError("URI malformed");
166  }
167  if (value < 0x10000) {
168    result[index++] = value;
169    return index;
170  } else {
171    result[index++] = (value >> 10) + 0xd7c0;
172    result[index++] = (value & 0x3ff) + 0xdc00;
173    return index;
174  }
175}
176
177
178// ECMA-262, section 15.1.3
179function Encode(uri, unescape) {
180  var uriLength = uri.length;
181  // We are going to pass result to %StringFromCharCodeArray
182  // which does not expect any getters/setters installed
183  // on the incoming array.
184  var result = new InternalArray(uriLength);
185  var index = 0;
186  for (var k = 0; k < uriLength; k++) {
187    var cc1 = uri.charCodeAt(k);
188    if (unescape(cc1)) {
189      result[index++] = cc1;
190    } else {
191      if (cc1 >= 0xDC00 && cc1 <= 0xDFFF) throw new $URIError("URI malformed");
192      if (cc1 < 0xD800 || cc1 > 0xDBFF) {
193        index = URIEncodeSingle(cc1, result, index);
194      } else {
195        k++;
196        if (k == uriLength) throw new $URIError("URI malformed");
197        var cc2 = uri.charCodeAt(k);
198        if (cc2 < 0xDC00 || cc2 > 0xDFFF) throw new $URIError("URI malformed");
199        index = URIEncodePair(cc1, cc2, result, index);
200      }
201    }
202  }
203  return %StringFromCharCodeArray(result);
204}
205
206
207// ECMA-262, section 15.1.3
208function Decode(uri, reserved) {
209  var uriLength = uri.length;
210  // We are going to pass result to %StringFromCharCodeArray
211  // which does not expect any getters/setters installed
212  // on the incoming array.
213  var result = new InternalArray(uriLength);
214  var index = 0;
215  for (var k = 0; k < uriLength; k++) {
216    var ch = uri.charAt(k);
217    if (ch == '%') {
218      if (k + 2 >= uriLength) throw new $URIError("URI malformed");
219      var cc = URIHexCharsToCharCode(uri.charCodeAt(++k), uri.charCodeAt(++k));
220      if (cc >> 7) {
221        var n = 0;
222        while (((cc << ++n) & 0x80) != 0) { }
223        if (n == 1 || n > 4) throw new $URIError("URI malformed");
224        var octets = new $Array(n);
225        octets[0] = cc;
226        if (k + 3 * (n - 1) >= uriLength) throw new $URIError("URI malformed");
227        for (var i = 1; i < n; i++) {
228          if (uri.charAt(++k) != '%') throw new $URIError("URI malformed");
229          octets[i] = URIHexCharsToCharCode(uri.charCodeAt(++k),
230                                            uri.charCodeAt(++k));
231        }
232        index = URIDecodeOctets(octets, result, index);
233      } else {
234        if (reserved(cc)) {
235          result[index++] = 37; // Char code of '%'.
236          result[index++] = uri.charCodeAt(k - 1);
237          result[index++] = uri.charCodeAt(k);
238        } else {
239          result[index++] = cc;
240        }
241      }
242    } else {
243      result[index++] = ch.charCodeAt(0);
244    }
245  }
246  result.length = index;
247  return %StringFromCharCodeArray(result);
248}
249
250
251// ECMA-262 - 15.1.3.1.
252function URIDecode(uri) {
253  var reservedPredicate = function(cc) {
254    // #$
255    if (35 <= cc && cc <= 36) return true;
256    // &
257    if (cc == 38) return true;
258    // +,
259    if (43 <= cc && cc <= 44) return true;
260    // /
261    if (cc == 47) return true;
262    // :;
263    if (58 <= cc && cc <= 59) return true;
264    // =
265    if (cc == 61) return true;
266    // ?@
267    if (63 <= cc && cc <= 64) return true;
268
269    return false;
270  };
271  var string = ToString(uri);
272  return Decode(string, reservedPredicate);
273}
274
275
276// ECMA-262 - 15.1.3.2.
277function URIDecodeComponent(component) {
278  var reservedPredicate = function(cc) { return false; };
279  var string = ToString(component);
280  return Decode(string, reservedPredicate);
281}
282
283
284// Does the char code correspond to an alpha-numeric char.
285function isAlphaNumeric(cc) {
286  // a - z
287  if (97 <= cc && cc <= 122) return true;
288  // A - Z
289  if (65 <= cc && cc <= 90) return true;
290  // 0 - 9
291  if (48 <= cc && cc <= 57) return true;
292
293  return false;
294}
295
296
297// ECMA-262 - 15.1.3.3.
298function URIEncode(uri) {
299  var unescapePredicate = function(cc) {
300    if (isAlphaNumeric(cc)) return true;
301    // !
302    if (cc == 33) return true;
303    // #$
304    if (35 <= cc && cc <= 36) return true;
305    // &'()*+,-./
306    if (38 <= cc && cc <= 47) return true;
307    // :;
308    if (58 <= cc && cc <= 59) return true;
309    // =
310    if (cc == 61) return true;
311    // ?@
312    if (63 <= cc && cc <= 64) return true;
313    // _
314    if (cc == 95) return true;
315    // ~
316    if (cc == 126) return true;
317
318    return false;
319  };
320
321  var string = ToString(uri);
322  return Encode(string, unescapePredicate);
323}
324
325
326// ECMA-262 - 15.1.3.4
327function URIEncodeComponent(component) {
328  var unescapePredicate = function(cc) {
329    if (isAlphaNumeric(cc)) return true;
330    // !
331    if (cc == 33) return true;
332    // '()*
333    if (39 <= cc && cc <= 42) return true;
334    // -.
335    if (45 <= cc && cc <= 46) return true;
336    // _
337    if (cc == 95) return true;
338    // ~
339    if (cc == 126) return true;
340
341    return false;
342  };
343
344  var string = ToString(component);
345  return Encode(string, unescapePredicate);
346}
347
348
349function HexValueOf(code) {
350  // 0-9
351  if (code >= 48 && code <= 57) return code - 48;
352  // A-F
353  if (code >= 65 && code <= 70) return code - 55;
354  // a-f
355  if (code >= 97 && code <= 102) return code - 87;
356
357  return -1;
358}
359
360
361// Convert a character code to 4-digit hex string representation
362// 64 -> 0040, 62234 -> F31A.
363function CharCodeToHex4Str(cc) {
364  var r = "";
365  if (hexCharArray === 0) {
366    hexCharArray = ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9",
367                    "A", "B", "C", "D", "E", "F"];
368  }
369  for (var i = 0; i < 4; ++i) {
370    var c = hexCharArray[cc & 0x0F];
371    r = c + r;
372    cc = cc >>> 4;
373  }
374  return r;
375}
376
377
378// Returns true if all digits in string s are valid hex numbers
379function IsValidHex(s) {
380  for (var i = 0; i < s.length; ++i) {
381    var cc = s.charCodeAt(i);
382    if ((48 <= cc && cc <= 57) ||
383        (65 <= cc && cc <= 70) ||
384        (97 <= cc && cc <= 102)) {
385      // '0'..'9', 'A'..'F' and 'a' .. 'f'.
386    } else {
387      return false;
388    }
389  }
390  return true;
391}
392
393
394// ECMA-262 - B.2.1.
395function URIEscape(str) {
396  var s = ToString(str);
397  return %URIEscape(s);
398}
399
400
401// ECMA-262 - B.2.2.
402function URIUnescape(str) {
403  var s = ToString(str);
404  return %URIUnescape(s);
405}
406
407
408// -------------------------------------------------------------------
409
410function SetUpUri() {
411  %CheckIsBootstrapping();
412  // Set up non-enumerable URI functions on the global object and set
413  // their names.
414  InstallFunctions(global, DONT_ENUM, $Array(
415    "escape", URIEscape,
416    "unescape", URIUnescape,
417    "decodeURI", URIDecode,
418    "decodeURIComponent", URIDecodeComponent,
419    "encodeURI", URIEncode,
420    "encodeURIComponent", URIEncodeComponent
421  ));
422}
423
424SetUpUri();
425