1// Copyright 2006-2008 the V8 project authors. All rights reserved.
2// Redistribution and use in source and binary forms, with or without
3// modification, are permitted provided that the following conditions are
4// met:
5//
6//     * Redistributions of source code must retain the above copyright
7//       notice, this list of conditions and the following disclaimer.
8//     * Redistributions in binary form must reproduce the above
9//       copyright notice, this list of conditions and the following
10//       disclaimer in the documentation and/or other materials provided
11//       with the distribution.
12//     * Neither the name of Google Inc. nor the names of its
13//       contributors may be used to endorse or promote products derived
14//       from this software without specific prior written permission.
15//
16// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27
28// This file relies on the fact that the following declaration has been made
29// in runtime.js:
30// var $Array = global.Array;
31
32// -------------------------------------------------------------------
33
34// This file contains support for URI manipulations written in
35// JavaScript.
36
37// Lazily initialized.
38var hexCharArray = 0;
39var hexCharCodeArray = 0;
40
41
42function URIAddEncodedOctetToBuffer(octet, result, index) {
43  result[index++] = 37; // Char code of '%'.
44  result[index++] = hexCharCodeArray[octet >> 4];
45  result[index++] = hexCharCodeArray[octet & 0x0F];
46  return index;
47}
48
49
50function URIEncodeOctets(octets, result, index) {
51  if (hexCharCodeArray === 0) {
52    hexCharCodeArray = [48, 49, 50, 51, 52, 53, 54, 55, 56, 57,
53                        65, 66, 67, 68, 69, 70];
54  }
55  index = URIAddEncodedOctetToBuffer(octets[0], result, index);
56  if (octets[1]) index = URIAddEncodedOctetToBuffer(octets[1], result, index);
57  if (octets[2]) index = URIAddEncodedOctetToBuffer(octets[2], result, index);
58  if (octets[3]) index = URIAddEncodedOctetToBuffer(octets[3], result, index);
59  return index;
60}
61
62
63function URIEncodeSingle(cc, result, index) {
64  var x = (cc >> 12) & 0xF;
65  var y = (cc >> 6) & 63;
66  var z = cc & 63;
67  var octets = new $Array(3);
68  if (cc <= 0x007F) {
69    octets[0] = cc;
70  } else if (cc <= 0x07FF) {
71    octets[0] = y + 192;
72    octets[1] = z + 128;
73  } else {
74    octets[0] = x + 224;
75    octets[1] = y + 128;
76    octets[2] = z + 128;
77  }
78  return URIEncodeOctets(octets, result, index);
79}
80
81
82function URIEncodePair(cc1 , cc2, result, index) {
83  var u = ((cc1 >> 6) & 0xF) + 1;
84  var w = (cc1 >> 2) & 0xF;
85  var x = cc1 & 3;
86  var y = (cc2 >> 6) & 0xF;
87  var z = cc2 & 63;
88  var octets = new $Array(4);
89  octets[0] = (u >> 2) + 240;
90  octets[1] = (((u & 3) << 4) | w) + 128;
91  octets[2] = ((x << 4) | y) + 128;
92  octets[3] = z + 128;
93  return URIEncodeOctets(octets, result, index);
94}
95
96
97function URIHexCharsToCharCode(highChar, lowChar) {
98  var highCode = HexValueOf(highChar);
99  var lowCode = HexValueOf(lowChar);
100  if (highCode == -1 || lowCode == -1) {
101    throw new $URIError("URI malformed");
102  }
103  return (highCode << 4) | lowCode;
104}
105
106
107function URIDecodeOctets(octets, result, index) {
108  var value;
109  var o0 = octets[0];
110  if (o0 < 0x80) {
111    value = o0;
112  } else if (o0 < 0xc2) {
113    throw new $URIError("URI malformed");
114  } else {
115    var o1 = octets[1];
116    if (o0 < 0xe0) {
117      var a = o0 & 0x1f;
118      if ((o1 < 0x80) || (o1 > 0xbf)) {
119        throw new $URIError("URI malformed");
120      }
121      var b = o1 & 0x3f;
122      value = (a << 6) + b;
123      if (value < 0x80 || value > 0x7ff) {
124        throw new $URIError("URI malformed");
125      }
126    } else {
127      var o2 = octets[2];
128      if (o0 < 0xf0) {
129        var a = o0 & 0x0f;
130        if ((o1 < 0x80) || (o1 > 0xbf)) {
131          throw new $URIError("URI malformed");
132        }
133        var b = o1 & 0x3f;
134        if ((o2 < 0x80) || (o2 > 0xbf)) {
135          throw new $URIError("URI malformed");
136        }
137        var c = o2 & 0x3f;
138        value = (a << 12) + (b << 6) + c;
139        if ((value < 0x800) || (value > 0xffff)) {
140          throw new $URIError("URI malformed");
141        }
142      } else {
143        var o3 = octets[3];
144        if (o0 < 0xf8) {
145          var a = (o0 & 0x07);
146          if ((o1 < 0x80) || (o1 > 0xbf)) {
147            throw new $URIError("URI malformed");
148          }
149          var b = (o1 & 0x3f);
150          if ((o2 < 0x80) || (o2 > 0xbf)) {
151            throw new $URIError("URI malformed");
152          }
153          var c = (o2 & 0x3f);
154          if ((o3 < 0x80) || (o3 > 0xbf)) {
155            throw new $URIError("URI malformed");
156          }
157          var d = (o3 & 0x3f);
158          value = (a << 18) + (b << 12) + (c << 6) + d;
159          if ((value < 0x10000) || (value > 0x10ffff)) {
160            throw new $URIError("URI malformed");
161          }
162        } else {
163          throw new $URIError("URI malformed");
164        }
165      }
166    }
167  }
168  if (0xD800 <= value && value <= 0xDFFF) {
169    throw new $URIError("URI malformed");
170  }
171  if (value < 0x10000) {
172    %_TwoByteSeqStringSetChar(result, index++, value);
173    return index;
174  } else {
175    %_TwoByteSeqStringSetChar(result, index++, (value >> 10) + 0xd7c0);
176    %_TwoByteSeqStringSetChar(result, index++, (value & 0x3ff) + 0xdc00);
177    return index;
178  }
179}
180
181
182// ECMA-262, section 15.1.3
183function Encode(uri, unescape) {
184  var uriLength = uri.length;
185  var array = new InternalArray(uriLength);
186  var index = 0;
187  for (var k = 0; k < uriLength; k++) {
188    var cc1 = uri.charCodeAt(k);
189    if (unescape(cc1)) {
190      array[index++] = cc1;
191    } else {
192      if (cc1 >= 0xDC00 && cc1 <= 0xDFFF) throw new $URIError("URI malformed");
193      if (cc1 < 0xD800 || cc1 > 0xDBFF) {
194        index = URIEncodeSingle(cc1, array, index);
195      } else {
196        k++;
197        if (k == uriLength) throw new $URIError("URI malformed");
198        var cc2 = uri.charCodeAt(k);
199        if (cc2 < 0xDC00 || cc2 > 0xDFFF) throw new $URIError("URI malformed");
200        index = URIEncodePair(cc1, cc2, array, index);
201      }
202    }
203  }
204
205  var result = %NewString(array.length, NEW_ONE_BYTE_STRING);
206  for (var i = 0; i < array.length; i++) {
207    %_OneByteSeqStringSetChar(result, i, array[i]);
208  }
209  return result;
210}
211
212
213// ECMA-262, section 15.1.3
214function Decode(uri, reserved) {
215  var uriLength = uri.length;
216  var one_byte = %NewString(uriLength, NEW_ONE_BYTE_STRING);
217  var index = 0;
218  var k = 0;
219
220  // Optimistically assume ascii string.
221  for ( ; k < uriLength; k++) {
222    var code = uri.charCodeAt(k);
223    if (code == 37) {  // '%'
224      if (k + 2 >= uriLength) throw new $URIError("URI malformed");
225      var cc = URIHexCharsToCharCode(uri.charCodeAt(k+1), uri.charCodeAt(k+2));
226      if (cc >> 7) break;  // Assumption wrong, two byte string.
227      if (reserved(cc)) {
228        %_OneByteSeqStringSetChar(one_byte, index++, 37);  // '%'.
229        %_OneByteSeqStringSetChar(one_byte, index++, uri.charCodeAt(k+1));
230        %_OneByteSeqStringSetChar(one_byte, index++, uri.charCodeAt(k+2));
231      } else {
232        %_OneByteSeqStringSetChar(one_byte, index++, cc);
233      }
234      k += 2;
235    } else {
236      if (code > 0x7f) break;  // Assumption wrong, two byte string.
237      %_OneByteSeqStringSetChar(one_byte, index++, code);
238    }
239  }
240
241  one_byte = %TruncateString(one_byte, index);
242  if (k == uriLength) return one_byte;
243
244  // Write into two byte string.
245  var two_byte = %NewString(uriLength - k, NEW_TWO_BYTE_STRING);
246  index = 0;
247
248  for ( ; k < uriLength; k++) {
249    var code = uri.charCodeAt(k);
250    if (code == 37) {  // '%'
251      if (k + 2 >= uriLength) throw new $URIError("URI malformed");
252      var cc = URIHexCharsToCharCode(uri.charCodeAt(++k), uri.charCodeAt(++k));
253      if (cc >> 7) {
254        var n = 0;
255        while (((cc << ++n) & 0x80) != 0) { }
256        if (n == 1 || n > 4) throw new $URIError("URI malformed");
257        var octets = new $Array(n);
258        octets[0] = cc;
259        if (k + 3 * (n - 1) >= uriLength) throw new $URIError("URI malformed");
260        for (var i = 1; i < n; i++) {
261          if (uri.charAt(++k) != '%') throw new $URIError("URI malformed");
262          octets[i] = URIHexCharsToCharCode(uri.charCodeAt(++k),
263                                            uri.charCodeAt(++k));
264        }
265        index = URIDecodeOctets(octets, two_byte, index);
266      } else  if (reserved(cc)) {
267        %_TwoByteSeqStringSetChar(two_byte, index++, 37);  // '%'.
268        %_TwoByteSeqStringSetChar(two_byte, index++, uri.charCodeAt(k - 1));
269        %_TwoByteSeqStringSetChar(two_byte, index++, uri.charCodeAt(k));
270      } else {
271        %_TwoByteSeqStringSetChar(two_byte, index++, cc);
272      }
273    } else {
274      %_TwoByteSeqStringSetChar(two_byte, index++, code);
275    }
276  }
277
278  two_byte = %TruncateString(two_byte, index);
279  return one_byte + two_byte;
280}
281
282
283// ECMA-262 - 15.1.3.1.
284function URIDecode(uri) {
285  var reservedPredicate = function(cc) {
286    // #$
287    if (35 <= cc && cc <= 36) return true;
288    // &
289    if (cc == 38) return true;
290    // +,
291    if (43 <= cc && cc <= 44) return true;
292    // /
293    if (cc == 47) return true;
294    // :;
295    if (58 <= cc && cc <= 59) return true;
296    // =
297    if (cc == 61) return true;
298    // ?@
299    if (63 <= cc && cc <= 64) return true;
300
301    return false;
302  };
303  var string = ToString(uri);
304  return Decode(string, reservedPredicate);
305}
306
307
308// ECMA-262 - 15.1.3.2.
309function URIDecodeComponent(component) {
310  var reservedPredicate = function(cc) { return false; };
311  var string = ToString(component);
312  return Decode(string, reservedPredicate);
313}
314
315
316// Does the char code correspond to an alpha-numeric char.
317function isAlphaNumeric(cc) {
318  // a - z
319  if (97 <= cc && cc <= 122) return true;
320  // A - Z
321  if (65 <= cc && cc <= 90) return true;
322  // 0 - 9
323  if (48 <= cc && cc <= 57) return true;
324
325  return false;
326}
327
328
329// ECMA-262 - 15.1.3.3.
330function URIEncode(uri) {
331  var unescapePredicate = function(cc) {
332    if (isAlphaNumeric(cc)) return true;
333    // !
334    if (cc == 33) return true;
335    // #$
336    if (35 <= cc && cc <= 36) return true;
337    // &'()*+,-./
338    if (38 <= cc && cc <= 47) return true;
339    // :;
340    if (58 <= cc && cc <= 59) return true;
341    // =
342    if (cc == 61) return true;
343    // ?@
344    if (63 <= cc && cc <= 64) return true;
345    // _
346    if (cc == 95) return true;
347    // ~
348    if (cc == 126) return true;
349
350    return false;
351  };
352
353  var string = ToString(uri);
354  return Encode(string, unescapePredicate);
355}
356
357
358// ECMA-262 - 15.1.3.4
359function URIEncodeComponent(component) {
360  var unescapePredicate = function(cc) {
361    if (isAlphaNumeric(cc)) return true;
362    // !
363    if (cc == 33) return true;
364    // '()*
365    if (39 <= cc && cc <= 42) return true;
366    // -.
367    if (45 <= cc && cc <= 46) return true;
368    // _
369    if (cc == 95) return true;
370    // ~
371    if (cc == 126) return true;
372
373    return false;
374  };
375
376  var string = ToString(component);
377  return Encode(string, unescapePredicate);
378}
379
380
381function HexValueOf(code) {
382  // 0-9
383  if (code >= 48 && code <= 57) return code - 48;
384  // A-F
385  if (code >= 65 && code <= 70) return code - 55;
386  // a-f
387  if (code >= 97 && code <= 102) return code - 87;
388
389  return -1;
390}
391
392
393// Convert a character code to 4-digit hex string representation
394// 64 -> 0040, 62234 -> F31A.
395function CharCodeToHex4Str(cc) {
396  var r = "";
397  if (hexCharArray === 0) {
398    hexCharArray = ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9",
399                    "A", "B", "C", "D", "E", "F"];
400  }
401  for (var i = 0; i < 4; ++i) {
402    var c = hexCharArray[cc & 0x0F];
403    r = c + r;
404    cc = cc >>> 4;
405  }
406  return r;
407}
408
409
410// Returns true if all digits in string s are valid hex numbers
411function IsValidHex(s) {
412  for (var i = 0; i < s.length; ++i) {
413    var cc = s.charCodeAt(i);
414    if ((48 <= cc && cc <= 57) ||
415        (65 <= cc && cc <= 70) ||
416        (97 <= cc && cc <= 102)) {
417      // '0'..'9', 'A'..'F' and 'a' .. 'f'.
418    } else {
419      return false;
420    }
421  }
422  return true;
423}
424
425
426// ECMA-262 - B.2.1.
427function URIEscape(str) {
428  var s = ToString(str);
429  return %URIEscape(s);
430}
431
432
433// ECMA-262 - B.2.2.
434function URIUnescape(str) {
435  var s = ToString(str);
436  return %URIUnescape(s);
437}
438
439
440// -------------------------------------------------------------------
441
442function SetUpUri() {
443  %CheckIsBootstrapping();
444
445  // Set up non-enumerable URI functions on the global object and set
446  // their names.
447  InstallFunctions(global, DONT_ENUM, $Array(
448    "escape", URIEscape,
449    "unescape", URIUnescape,
450    "decodeURI", URIDecode,
451    "decodeURIComponent", URIDecodeComponent,
452    "encodeURI", URIEncode,
453    "encodeURIComponent", URIEncodeComponent
454  ));
455}
456
457SetUpUri();
458