utf.cc revision cc2f2393e69a9b1425bad1a89f41aaaf8c38f9e2
1/* 2 * Copyright (C) 2011 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17#include "utf.h" 18 19#include "base/logging.h" 20#include "mirror/array.h" 21#include "mirror/object-inl.h" 22#include "utf-inl.h" 23 24namespace art { 25 26size_t CountModifiedUtf8Chars(const char* utf8) { 27 size_t len = 0; 28 int ic; 29 while ((ic = *utf8++) != '\0') { 30 len++; 31 if ((ic & 0x80) == 0) { 32 // one-byte encoding 33 continue; 34 } 35 // two- or three-byte encoding 36 utf8++; 37 if ((ic & 0x20) == 0) { 38 // two-byte encoding 39 continue; 40 } 41 // three-byte encoding 42 utf8++; 43 } 44 return len; 45} 46 47void ConvertModifiedUtf8ToUtf16(uint16_t* utf16_data_out, const char* utf8_data_in) { 48 while (*utf8_data_in != '\0') { 49 *utf16_data_out++ = GetUtf16FromUtf8(&utf8_data_in); 50 } 51} 52 53void ConvertUtf16ToModifiedUtf8(char* utf8_out, const uint16_t* utf16_in, size_t char_count) { 54 while (char_count--) { 55 uint16_t ch = *utf16_in++; 56 if (ch > 0 && ch <= 0x7f) { 57 *utf8_out++ = ch; 58 } else { 59 if (ch > 0x07ff) { 60 *utf8_out++ = (ch >> 12) | 0xe0; 61 *utf8_out++ = ((ch >> 6) & 0x3f) | 0x80; 62 *utf8_out++ = (ch & 0x3f) | 0x80; 63 } else /*(ch > 0x7f || ch == 0)*/ { 64 *utf8_out++ = (ch >> 6) | 0xc0; 65 *utf8_out++ = (ch & 0x3f) | 0x80; 66 } 67 } 68 } 69} 70 71int32_t ComputeUtf16Hash(mirror::CharArray* chars, int32_t offset, 72 size_t char_count) { 73 int32_t hash = 0; 74 for (size_t i = 0; i < char_count; i++) { 75 hash = hash * 31 + chars->Get(offset + i); 76 } 77 return hash; 78} 79 80int32_t ComputeUtf16Hash(const uint16_t* chars, size_t char_count) { 81 int32_t hash = 0; 82 while (char_count--) { 83 hash = hash * 31 + *chars++; 84 } 85 return hash; 86} 87 88int32_t ComputeUtf8Hash(const char* chars) { 89 int32_t hash = 0; 90 while (*chars != '\0') { 91 hash = hash * 31 + GetUtf16FromUtf8(&chars); 92 } 93 return hash; 94} 95 96int CompareModifiedUtf8ToUtf16AsCodePointValues(const char* utf8_1, const uint16_t* utf8_2) { 97 for (;;) { 98 if (*utf8_1 == '\0') { 99 return (*utf8_2 == '\0') ? 0 : -1; 100 } else if (*utf8_2 == '\0') { 101 return 1; 102 } 103 104 int c1 = GetUtf16FromUtf8(&utf8_1); 105 int c2 = *utf8_2; 106 107 if (c1 != c2) { 108 return c1 > c2 ? 1 : -1; 109 } 110 } 111} 112 113size_t CountUtf8Bytes(const uint16_t* chars, size_t char_count) { 114 size_t result = 0; 115 while (char_count--) { 116 uint16_t ch = *chars++; 117 if (ch > 0 && ch <= 0x7f) { 118 ++result; 119 } else { 120 if (ch > 0x7ff) { 121 result += 3; 122 } else { 123 result += 2; 124 } 125 } 126 } 127 return result; 128} 129 130} // namespace art 131