15c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)/* 25c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) * Copyright (C) 2011, 2012 Apple Inc. All rights reserved. 35c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) * Copyright (C) 2011 Nokia Corporation and/or its subsidiary(-ies). 45c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) * 55c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) * This library is free software; you can redistribute it and/or 65c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) * modify it under the terms of the GNU Library General Public 75c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) * License as published by the Free Software Foundation; either 85c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) * version 2 of the License, or (at your option) any later version. 95c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) * 105c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) * This library is distributed in the hope that it will be useful, 115c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) * but WITHOUT ANY WARRANTY; without even the implied warranty of 125c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 135c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) * Library General Public License for more details. 145c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) * 155c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) * You should have received a copy of the GNU Library General Public License 165c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) * along with this library; see the file COPYING.LIB. If not, write to 175c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, 185c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) * Boston, MA 02110-1301, USA. 195c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) * 205c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) */ 215c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) 225c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)#ifndef ASCIIFastPath_h 235c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)#define ASCIIFastPath_h 245c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) 2593ac45cfc74041c8ae536ce58a9534d46db2024eTorne (Richard Coles)#include "wtf/Alignment.h" 2693ac45cfc74041c8ae536ce58a9534d46db2024eTorne (Richard Coles)#include "wtf/CPU.h" 2793ac45cfc74041c8ae536ce58a9534d46db2024eTorne (Richard Coles)#include "wtf/StdLibExtras.h" 2893ac45cfc74041c8ae536ce58a9534d46db2024eTorne (Richard Coles)#include "wtf/unicode/Unicode.h" 2993ac45cfc74041c8ae536ce58a9534d46db2024eTorne (Richard Coles)#include <stdint.h> 3093ac45cfc74041c8ae536ce58a9534d46db2024eTorne (Richard Coles) 315c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)#if OS(DARWIN) && (CPU(X86) || CPU(X86_64)) 325c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)#include <emmintrin.h> 335c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)#endif 345c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) 355c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)namespace WTF { 365c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) 375c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)// Assuming that a pointer is the size of a "machine word", then 385c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)// uintptr_t is an integer type that is also a machine word. 395c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)typedef uintptr_t MachineWord; 405c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)const uintptr_t machineWordAlignmentMask = sizeof(MachineWord) - 1; 415c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) 425c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)inline bool isAlignedToMachineWord(const void* pointer) 435c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles){ 445c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) return !(reinterpret_cast<uintptr_t>(pointer) & machineWordAlignmentMask); 455c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)} 465c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) 475c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)template<typename T> inline T* alignToMachineWord(T* pointer) 485c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles){ 495c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) return reinterpret_cast<T*>(reinterpret_cast<uintptr_t>(pointer) & ~machineWordAlignmentMask); 505c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)} 515c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) 525c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)template<size_t size, typename CharacterType> struct NonASCIIMask; 535c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)template<> struct NonASCIIMask<4, UChar> { 545c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) static inline uint32_t value() { return 0xFF80FF80U; } 555c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)}; 565c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)template<> struct NonASCIIMask<4, LChar> { 575c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) static inline uint32_t value() { return 0x80808080U; } 585c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)}; 595c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)template<> struct NonASCIIMask<8, UChar> { 605c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) static inline uint64_t value() { return 0xFF80FF80FF80FF80ULL; } 615c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)}; 625c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)template<> struct NonASCIIMask<8, LChar> { 635c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) static inline uint64_t value() { return 0x8080808080808080ULL; } 645c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)}; 655c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) 665c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) 675c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)template<typename CharacterType> 685c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)inline bool isAllASCII(MachineWord word) 695c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles){ 705c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) return !(word & NonASCIIMask<sizeof(MachineWord), CharacterType>::value()); 715c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)} 725c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) 735c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)// Note: This function assume the input is likely all ASCII, and 745c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)// does not leave early if it is not the case. 755c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)template<typename CharacterType> 765c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)inline bool charactersAreAllASCII(const CharacterType* characters, size_t length) 775c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles){ 785c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) MachineWord allCharBits = 0; 795c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) const CharacterType* end = characters + length; 805c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) 815c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) // Prologue: align the input. 825c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) while (!isAlignedToMachineWord(characters) && characters != end) { 835c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) allCharBits |= *characters; 845c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) ++characters; 855c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) } 865c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) 875c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) // Compare the values of CPU word size. 885c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) const CharacterType* wordEnd = alignToMachineWord(end); 895c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) const size_t loopIncrement = sizeof(MachineWord) / sizeof(CharacterType); 905c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) while (characters < wordEnd) { 91926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) allCharBits |= *(reinterpret_cast_ptr<const MachineWord*>(characters)); 925c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) characters += loopIncrement; 935c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) } 945c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) 955c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) // Process the remaining bytes. 965c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) while (characters != end) { 975c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) allCharBits |= *characters; 985c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) ++characters; 995c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) } 1005c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) 1015c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) MachineWord nonASCIIBitMask = NonASCIIMask<sizeof(MachineWord), CharacterType>::value(); 1025c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) return !(allCharBits & nonASCIIBitMask); 1035c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)} 1045c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) 1055c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)inline void copyLCharsFromUCharSource(LChar* destination, const UChar* source, size_t length) 1065c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles){ 1075c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)#if OS(DARWIN) && (CPU(X86) || CPU(X86_64)) 1085c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) const uintptr_t memoryAccessSize = 16; // Memory accesses on 16 byte (128 bit) alignment 1095c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) const uintptr_t memoryAccessMask = memoryAccessSize - 1; 1105c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) 1115c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) size_t i = 0; 1125c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) for (;i < length && !isAlignedTo<memoryAccessMask>(&source[i]); ++i) { 1135c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) ASSERT(!(source[i] & 0xff00)); 1145c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) destination[i] = static_cast<LChar>(source[i]); 1155c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) } 1165c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) 1175c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) const uintptr_t sourceLoadSize = 32; // Process 32 bytes (16 UChars) each iteration 118926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) const size_t ucharsPerLoop = sourceLoadSize / sizeof(UChar); 1195c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) if (length > ucharsPerLoop) { 120926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) const size_t endLength = length - ucharsPerLoop + 1; 1215c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) for (; i < endLength; i += ucharsPerLoop) { 1225c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)#ifndef NDEBUG 123926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) for (unsigned checkIndex = 0; checkIndex < ucharsPerLoop; ++checkIndex) 1245c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) ASSERT(!(source[i+checkIndex] & 0xff00)); 1255c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)#endif 1265c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) __m128i first8UChars = _mm_load_si128(reinterpret_cast<const __m128i*>(&source[i])); 1275c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) __m128i second8UChars = _mm_load_si128(reinterpret_cast<const __m128i*>(&source[i+8])); 1285c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) __m128i packedChars = _mm_packus_epi16(first8UChars, second8UChars); 1295c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) _mm_storeu_si128(reinterpret_cast<__m128i*>(&destination[i]), packedChars); 1305c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) } 1315c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) } 1325c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) 1335c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) for (; i < length; ++i) { 1345c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) ASSERT(!(source[i] & 0xff00)); 1355c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) destination[i] = static_cast<LChar>(source[i]); 1365c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) } 13753e740f4a82e17f3ae59772501622dc354e42336Torne (Richard Coles)#elif COMPILER(GCC) && CPU(ARM_NEON) && !(CPU(BIG_ENDIAN) || CPU(MIDDLE_ENDIAN)) && defined(NDEBUG) 1385c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) const LChar* const end = destination + length; 1395c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) const uintptr_t memoryAccessSize = 8; 1405c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) 1415c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) if (length >= (2 * memoryAccessSize) - 1) { 1425c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) // Prefix: align dst on 64 bits. 1435c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) const uintptr_t memoryAccessMask = memoryAccessSize - 1; 144926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles) while (!isAlignedTo<memoryAccessMask>(destination)) 1455c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) *destination++ = static_cast<LChar>(*source++); 1465c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) 1475c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) // Vector interleaved unpack, we only store the lower 8 bits. 1485c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) const uintptr_t lengthLeft = end - destination; 1495c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) const LChar* const simdEnd = end - (lengthLeft % memoryAccessSize); 1505c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) do { 1515c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) asm("vld2.8 { d0-d1 }, [%[SOURCE]] !\n\t" 1525c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) "vst1.8 { d0 }, [%[DESTINATION],:64] !\n\t" 1535c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) : [SOURCE]"+r" (source), [DESTINATION]"+r" (destination) 1545c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) : 1555c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) : "memory", "d0", "d1"); 1565c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) } while (destination != simdEnd); 1575c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) } 1585c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) 1595c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) while (destination != end) 1605c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) *destination++ = static_cast<LChar>(*source++); 1615c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)#else 1625c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) for (size_t i = 0; i < length; ++i) { 1635c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) ASSERT(!(source[i] & 0xff00)); 1645c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) destination[i] = static_cast<LChar>(source[i]); 1655c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) } 1665c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)#endif 1675c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)} 1685c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) 1695c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)} // namespace WTF 1705c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) 1715c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)#endif // ASCIIFastPath_h 172