uri.h revision b8a8cc1952d61a2f3a2568848933943a543b5d3e
1// Copyright 2013 the V8 project authors. All rights reserved. 2// Use of this source code is governed by a BSD-style license that can be 3// found in the LICENSE file. 4 5#ifndef V8_URI_H_ 6#define V8_URI_H_ 7 8#include "src/v8.h" 9 10#include "src/conversions.h" 11#include "src/string-search.h" 12#include "src/utils.h" 13 14namespace v8 { 15namespace internal { 16 17 18template <typename Char> 19static INLINE(Vector<const Char> GetCharVector(Handle<String> string)); 20 21 22template <> 23Vector<const uint8_t> GetCharVector(Handle<String> string) { 24 String::FlatContent flat = string->GetFlatContent(); 25 DCHECK(flat.IsOneByte()); 26 return flat.ToOneByteVector(); 27} 28 29 30template <> 31Vector<const uc16> GetCharVector(Handle<String> string) { 32 String::FlatContent flat = string->GetFlatContent(); 33 DCHECK(flat.IsTwoByte()); 34 return flat.ToUC16Vector(); 35} 36 37 38class URIUnescape : public AllStatic { 39 public: 40 template<typename Char> 41 MUST_USE_RESULT static MaybeHandle<String> Unescape(Isolate* isolate, 42 Handle<String> source); 43 44 private: 45 static const signed char kHexValue['g']; 46 47 template<typename Char> 48 MUST_USE_RESULT static MaybeHandle<String> UnescapeSlow( 49 Isolate* isolate, Handle<String> string, int start_index); 50 51 static INLINE(int TwoDigitHex(uint16_t character1, uint16_t character2)); 52 53 template <typename Char> 54 static INLINE(int UnescapeChar(Vector<const Char> vector, 55 int i, 56 int length, 57 int* step)); 58}; 59 60 61const signed char URIUnescape::kHexValue[] = { 62 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 63 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 64 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 65 -0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1, 66 -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, 67 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 68 -1, 10, 11, 12, 13, 14, 15 }; 69 70 71template<typename Char> 72MaybeHandle<String> URIUnescape::Unescape(Isolate* isolate, 73 Handle<String> source) { 74 int index; 75 { DisallowHeapAllocation no_allocation; 76 StringSearch<uint8_t, Char> search(isolate, STATIC_CHAR_VECTOR("%")); 77 index = search.Search(GetCharVector<Char>(source), 0); 78 if (index < 0) return source; 79 } 80 return UnescapeSlow<Char>(isolate, source, index); 81} 82 83 84template <typename Char> 85MaybeHandle<String> URIUnescape::UnescapeSlow( 86 Isolate* isolate, Handle<String> string, int start_index) { 87 bool one_byte = true; 88 int length = string->length(); 89 90 int unescaped_length = 0; 91 { DisallowHeapAllocation no_allocation; 92 Vector<const Char> vector = GetCharVector<Char>(string); 93 for (int i = start_index; i < length; unescaped_length++) { 94 int step; 95 if (UnescapeChar(vector, i, length, &step) > 96 String::kMaxOneByteCharCode) { 97 one_byte = false; 98 } 99 i += step; 100 } 101 } 102 103 DCHECK(start_index < length); 104 Handle<String> first_part = 105 isolate->factory()->NewProperSubString(string, 0, start_index); 106 107 int dest_position = 0; 108 Handle<String> second_part; 109 DCHECK(unescaped_length <= String::kMaxLength); 110 if (one_byte) { 111 Handle<SeqOneByteString> dest = isolate->factory()->NewRawOneByteString( 112 unescaped_length).ToHandleChecked(); 113 DisallowHeapAllocation no_allocation; 114 Vector<const Char> vector = GetCharVector<Char>(string); 115 for (int i = start_index; i < length; dest_position++) { 116 int step; 117 dest->SeqOneByteStringSet(dest_position, 118 UnescapeChar(vector, i, length, &step)); 119 i += step; 120 } 121 second_part = dest; 122 } else { 123 Handle<SeqTwoByteString> dest = isolate->factory()->NewRawTwoByteString( 124 unescaped_length).ToHandleChecked(); 125 DisallowHeapAllocation no_allocation; 126 Vector<const Char> vector = GetCharVector<Char>(string); 127 for (int i = start_index; i < length; dest_position++) { 128 int step; 129 dest->SeqTwoByteStringSet(dest_position, 130 UnescapeChar(vector, i, length, &step)); 131 i += step; 132 } 133 second_part = dest; 134 } 135 return isolate->factory()->NewConsString(first_part, second_part); 136} 137 138 139int URIUnescape::TwoDigitHex(uint16_t character1, uint16_t character2) { 140 if (character1 > 'f') return -1; 141 int hi = kHexValue[character1]; 142 if (hi == -1) return -1; 143 if (character2 > 'f') return -1; 144 int lo = kHexValue[character2]; 145 if (lo == -1) return -1; 146 return (hi << 4) + lo; 147} 148 149 150template <typename Char> 151int URIUnescape::UnescapeChar(Vector<const Char> vector, 152 int i, 153 int length, 154 int* step) { 155 uint16_t character = vector[i]; 156 int32_t hi = 0; 157 int32_t lo = 0; 158 if (character == '%' && 159 i <= length - 6 && 160 vector[i + 1] == 'u' && 161 (hi = TwoDigitHex(vector[i + 2], 162 vector[i + 3])) != -1 && 163 (lo = TwoDigitHex(vector[i + 4], 164 vector[i + 5])) != -1) { 165 *step = 6; 166 return (hi << 8) + lo; 167 } else if (character == '%' && 168 i <= length - 3 && 169 (lo = TwoDigitHex(vector[i + 1], 170 vector[i + 2])) != -1) { 171 *step = 3; 172 return lo; 173 } else { 174 *step = 1; 175 return character; 176 } 177} 178 179 180class URIEscape : public AllStatic { 181 public: 182 template<typename Char> 183 MUST_USE_RESULT static MaybeHandle<String> Escape(Isolate* isolate, 184 Handle<String> string); 185 186 private: 187 static const char kHexChars[17]; 188 static const char kNotEscaped[256]; 189 190 static bool IsNotEscaped(uint16_t c) { return kNotEscaped[c] != 0; } 191}; 192 193 194const char URIEscape::kHexChars[] = "0123456789ABCDEF"; 195 196 197// kNotEscaped is generated by the following: 198// 199// #!/bin/perl 200// for (my $i = 0; $i < 256; $i++) { 201// print "\n" if $i % 16 == 0; 202// my $c = chr($i); 203// my $escaped = 1; 204// $escaped = 0 if $c =~ m#[A-Za-z0-9@*_+./-]#; 205// print $escaped ? "0, " : "1, "; 206// } 207 208const char URIEscape::kNotEscaped[] = { 209 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 210 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 211 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 212 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 213 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 214 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 215 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 216 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 217 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 218 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 219 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 220 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 221 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 222 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 223 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 224 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; 225 226 227template<typename Char> 228MaybeHandle<String> URIEscape::Escape(Isolate* isolate, Handle<String> string) { 229 DCHECK(string->IsFlat()); 230 int escaped_length = 0; 231 int length = string->length(); 232 233 { DisallowHeapAllocation no_allocation; 234 Vector<const Char> vector = GetCharVector<Char>(string); 235 for (int i = 0; i < length; i++) { 236 uint16_t c = vector[i]; 237 if (c >= 256) { 238 escaped_length += 6; 239 } else if (IsNotEscaped(c)) { 240 escaped_length++; 241 } else { 242 escaped_length += 3; 243 } 244 245 // We don't allow strings that are longer than a maximal length. 246 DCHECK(String::kMaxLength < 0x7fffffff - 6); // Cannot overflow. 247 if (escaped_length > String::kMaxLength) break; // Provoke exception. 248 } 249 } 250 251 // No length change implies no change. Return original string if no change. 252 if (escaped_length == length) return string; 253 254 Handle<SeqOneByteString> dest; 255 ASSIGN_RETURN_ON_EXCEPTION( 256 isolate, dest, 257 isolate->factory()->NewRawOneByteString(escaped_length), 258 String); 259 int dest_position = 0; 260 261 { DisallowHeapAllocation no_allocation; 262 Vector<const Char> vector = GetCharVector<Char>(string); 263 for (int i = 0; i < length; i++) { 264 uint16_t c = vector[i]; 265 if (c >= 256) { 266 dest->SeqOneByteStringSet(dest_position, '%'); 267 dest->SeqOneByteStringSet(dest_position+1, 'u'); 268 dest->SeqOneByteStringSet(dest_position+2, kHexChars[c >> 12]); 269 dest->SeqOneByteStringSet(dest_position+3, kHexChars[(c >> 8) & 0xf]); 270 dest->SeqOneByteStringSet(dest_position+4, kHexChars[(c >> 4) & 0xf]); 271 dest->SeqOneByteStringSet(dest_position+5, kHexChars[c & 0xf]); 272 dest_position += 6; 273 } else if (IsNotEscaped(c)) { 274 dest->SeqOneByteStringSet(dest_position, c); 275 dest_position++; 276 } else { 277 dest->SeqOneByteStringSet(dest_position, '%'); 278 dest->SeqOneByteStringSet(dest_position+1, kHexChars[c >> 4]); 279 dest->SeqOneByteStringSet(dest_position+2, kHexChars[c & 0xf]); 280 dest_position += 3; 281 } 282 } 283 } 284 285 return dest; 286} 287 288} } // namespace v8::internal 289 290#endif // V8_URI_H_ 291