15c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)/*
25c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) * Copyright (C) 2011, 2012 Apple Inc. All rights reserved.
35c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) * Copyright (C) 2011 Nokia Corporation and/or its subsidiary(-ies).
45c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) *
55c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) * This library is free software; you can redistribute it and/or
65c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) * modify it under the terms of the GNU Library General Public
75c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) * License as published by the Free Software Foundation; either
85c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) * version 2 of the License, or (at your option) any later version.
95c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) *
105c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) * This library is distributed in the hope that it will be useful,
115c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) * but WITHOUT ANY WARRANTY; without even the implied warranty of
125c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
135c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) * Library General Public License for more details.
145c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) *
155c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) * You should have received a copy of the GNU Library General Public License
165c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) * along with this library; see the file COPYING.LIB.  If not, write to
175c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
185c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) * Boston, MA 02110-1301, USA.
195c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) *
205c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) */
215c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)
225c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)#ifndef ASCIIFastPath_h
235c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)#define ASCIIFastPath_h
245c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)
2593ac45cfc74041c8ae536ce58a9534d46db2024eTorne (Richard Coles)#include "wtf/Alignment.h"
2693ac45cfc74041c8ae536ce58a9534d46db2024eTorne (Richard Coles)#include "wtf/CPU.h"
2793ac45cfc74041c8ae536ce58a9534d46db2024eTorne (Richard Coles)#include "wtf/StdLibExtras.h"
2893ac45cfc74041c8ae536ce58a9534d46db2024eTorne (Richard Coles)#include "wtf/unicode/Unicode.h"
2993ac45cfc74041c8ae536ce58a9534d46db2024eTorne (Richard Coles)#include <stdint.h>
3093ac45cfc74041c8ae536ce58a9534d46db2024eTorne (Richard Coles)
315c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)#if OS(DARWIN) && (CPU(X86) || CPU(X86_64))
325c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)#include <emmintrin.h>
335c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)#endif
345c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)
355c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)namespace WTF {
365c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)
375c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)// Assuming that a pointer is the size of a "machine word", then
385c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)// uintptr_t is an integer type that is also a machine word.
395c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)typedef uintptr_t MachineWord;
405c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)const uintptr_t machineWordAlignmentMask = sizeof(MachineWord) - 1;
415c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)
425c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)inline bool isAlignedToMachineWord(const void* pointer)
435c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles){
445c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    return !(reinterpret_cast<uintptr_t>(pointer) & machineWordAlignmentMask);
455c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)}
465c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)
475c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)template<typename T> inline T* alignToMachineWord(T* pointer)
485c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles){
495c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    return reinterpret_cast<T*>(reinterpret_cast<uintptr_t>(pointer) & ~machineWordAlignmentMask);
505c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)}
515c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)
525c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)template<size_t size, typename CharacterType> struct NonASCIIMask;
535c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)template<> struct NonASCIIMask<4, UChar> {
545c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    static inline uint32_t value() { return 0xFF80FF80U; }
555c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)};
565c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)template<> struct NonASCIIMask<4, LChar> {
575c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    static inline uint32_t value() { return 0x80808080U; }
585c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)};
595c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)template<> struct NonASCIIMask<8, UChar> {
605c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    static inline uint64_t value() { return 0xFF80FF80FF80FF80ULL; }
615c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)};
625c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)template<> struct NonASCIIMask<8, LChar> {
635c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    static inline uint64_t value() { return 0x8080808080808080ULL; }
645c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)};
655c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)
665c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)
675c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)template<typename CharacterType>
685c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)inline bool isAllASCII(MachineWord word)
695c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles){
705c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    return !(word & NonASCIIMask<sizeof(MachineWord), CharacterType>::value());
715c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)}
725c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)
735c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)// Note: This function assume the input is likely all ASCII, and
745c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)// does not leave early if it is not the case.
755c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)template<typename CharacterType>
765c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)inline bool charactersAreAllASCII(const CharacterType* characters, size_t length)
775c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles){
785c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    MachineWord allCharBits = 0;
795c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    const CharacterType* end = characters + length;
805c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)
815c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    // Prologue: align the input.
825c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    while (!isAlignedToMachineWord(characters) && characters != end) {
835c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)        allCharBits |= *characters;
845c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)        ++characters;
855c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    }
865c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)
875c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    // Compare the values of CPU word size.
885c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    const CharacterType* wordEnd = alignToMachineWord(end);
895c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    const size_t loopIncrement = sizeof(MachineWord) / sizeof(CharacterType);
905c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    while (characters < wordEnd) {
91926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)        allCharBits |= *(reinterpret_cast_ptr<const MachineWord*>(characters));
925c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)        characters += loopIncrement;
935c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    }
945c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)
955c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    // Process the remaining bytes.
965c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    while (characters != end) {
975c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)        allCharBits |= *characters;
985c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)        ++characters;
995c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    }
1005c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)
1015c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    MachineWord nonASCIIBitMask = NonASCIIMask<sizeof(MachineWord), CharacterType>::value();
1025c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    return !(allCharBits & nonASCIIBitMask);
1035c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)}
1045c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)
1055c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)inline void copyLCharsFromUCharSource(LChar* destination, const UChar* source, size_t length)
1065c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles){
1075c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)#if OS(DARWIN) && (CPU(X86) || CPU(X86_64))
1085c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    const uintptr_t memoryAccessSize = 16; // Memory accesses on 16 byte (128 bit) alignment
1095c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    const uintptr_t memoryAccessMask = memoryAccessSize - 1;
1105c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)
1115c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    size_t i = 0;
1125c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    for (;i < length && !isAlignedTo<memoryAccessMask>(&source[i]); ++i) {
1135c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)        ASSERT(!(source[i] & 0xff00));
1145c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)        destination[i] = static_cast<LChar>(source[i]);
1155c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    }
1165c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)
1175c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    const uintptr_t sourceLoadSize = 32; // Process 32 bytes (16 UChars) each iteration
118926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)    const size_t ucharsPerLoop = sourceLoadSize / sizeof(UChar);
1195c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    if (length > ucharsPerLoop) {
120926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)        const size_t endLength = length - ucharsPerLoop + 1;
1215c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)        for (; i < endLength; i += ucharsPerLoop) {
1225c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)#ifndef NDEBUG
123926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)            for (unsigned checkIndex = 0; checkIndex < ucharsPerLoop; ++checkIndex)
1245c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)                ASSERT(!(source[i+checkIndex] & 0xff00));
1255c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)#endif
1265c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)            __m128i first8UChars = _mm_load_si128(reinterpret_cast<const __m128i*>(&source[i]));
1275c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)            __m128i second8UChars = _mm_load_si128(reinterpret_cast<const __m128i*>(&source[i+8]));
1285c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)            __m128i packedChars = _mm_packus_epi16(first8UChars, second8UChars);
1295c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)            _mm_storeu_si128(reinterpret_cast<__m128i*>(&destination[i]), packedChars);
1305c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)        }
1315c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    }
1325c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)
1335c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    for (; i < length; ++i) {
1345c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)        ASSERT(!(source[i] & 0xff00));
1355c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)        destination[i] = static_cast<LChar>(source[i]);
1365c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    }
13753e740f4a82e17f3ae59772501622dc354e42336Torne (Richard Coles)#elif COMPILER(GCC) && CPU(ARM_NEON) && !(CPU(BIG_ENDIAN) || CPU(MIDDLE_ENDIAN)) && defined(NDEBUG)
1385c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    const LChar* const end = destination + length;
1395c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    const uintptr_t memoryAccessSize = 8;
1405c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)
1415c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    if (length >= (2 * memoryAccessSize) - 1) {
1425c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)        // Prefix: align dst on 64 bits.
1435c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)        const uintptr_t memoryAccessMask = memoryAccessSize - 1;
144926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)        while (!isAlignedTo<memoryAccessMask>(destination))
1455c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)            *destination++ = static_cast<LChar>(*source++);
1465c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)
1475c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)        // Vector interleaved unpack, we only store the lower 8 bits.
1485c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)        const uintptr_t lengthLeft = end - destination;
1495c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)        const LChar* const simdEnd = end - (lengthLeft % memoryAccessSize);
1505c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)        do {
1515c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)            asm("vld2.8   { d0-d1 }, [%[SOURCE]] !\n\t"
1525c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)                "vst1.8   { d0 }, [%[DESTINATION],:64] !\n\t"
1535c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)                : [SOURCE]"+r" (source), [DESTINATION]"+r" (destination)
1545c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)                :
1555c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)                : "memory", "d0", "d1");
1565c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)        } while (destination != simdEnd);
1575c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    }
1585c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)
1595c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    while (destination != end)
1605c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)        *destination++ = static_cast<LChar>(*source++);
1615c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)#else
1625c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    for (size_t i = 0; i < length; ++i) {
1635c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)        ASSERT(!(source[i] & 0xff00));
1645c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)        destination[i] = static_cast<LChar>(source[i]);
1655c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    }
1665c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)#endif
1675c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)}
1685c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)
1695c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)} // namespace WTF
1705c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)
1715c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)#endif // ASCIIFastPath_h
172