1ece0bec0c824e71f062656ed5c727baf2a7bfc90Alexander Kornienko//===--- UnicodeCharRanges.h - Types and functions for character ranges ---===// 2ece0bec0c824e71f062656ed5c727baf2a7bfc90Alexander Kornienko// 3ece0bec0c824e71f062656ed5c727baf2a7bfc90Alexander Kornienko// The LLVM Compiler Infrastructure 4ece0bec0c824e71f062656ed5c727baf2a7bfc90Alexander Kornienko// 5ece0bec0c824e71f062656ed5c727baf2a7bfc90Alexander Kornienko// This file is distributed under the University of Illinois Open Source 6ece0bec0c824e71f062656ed5c727baf2a7bfc90Alexander Kornienko// License. See LICENSE.TXT for details. 7ece0bec0c824e71f062656ed5c727baf2a7bfc90Alexander Kornienko// 8ece0bec0c824e71f062656ed5c727baf2a7bfc90Alexander Kornienko//===----------------------------------------------------------------------===// 9ece0bec0c824e71f062656ed5c727baf2a7bfc90Alexander Kornienko#ifndef LLVM_SUPPORT_UNICODECHARRANGES_H 10ece0bec0c824e71f062656ed5c727baf2a7bfc90Alexander Kornienko#define LLVM_SUPPORT_UNICODECHARRANGES_H 11ece0bec0c824e71f062656ed5c727baf2a7bfc90Alexander Kornienko 12ece0bec0c824e71f062656ed5c727baf2a7bfc90Alexander Kornienko#include "llvm/ADT/ArrayRef.h" 13ece0bec0c824e71f062656ed5c727baf2a7bfc90Alexander Kornienko#include "llvm/ADT/SmallPtrSet.h" 14ece0bec0c824e71f062656ed5c727baf2a7bfc90Alexander Kornienko#include "llvm/Support/Compiler.h" 15ece0bec0c824e71f062656ed5c727baf2a7bfc90Alexander Kornienko#include "llvm/Support/Debug.h" 16ece0bec0c824e71f062656ed5c727baf2a7bfc90Alexander Kornienko#include "llvm/Support/Mutex.h" 17ece0bec0c824e71f062656ed5c727baf2a7bfc90Alexander Kornienko#include "llvm/Support/MutexGuard.h" 18ece0bec0c824e71f062656ed5c727baf2a7bfc90Alexander Kornienko#include "llvm/Support/raw_ostream.h" 192f02ded68a114410f11bc2f4e901d0d8e5850de1Alexander Kornienko#include <algorithm> 20ece0bec0c824e71f062656ed5c727baf2a7bfc90Alexander Kornienko 212f02ded68a114410f11bc2f4e901d0d8e5850de1Alexander Kornienkonamespace llvm { 222f02ded68a114410f11bc2f4e901d0d8e5850de1Alexander Kornienkonamespace sys { 232f02ded68a114410f11bc2f4e901d0d8e5850de1Alexander Kornienko 24dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines#define DEBUG_TYPE "unicode" 25dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines 262f02ded68a114410f11bc2f4e901d0d8e5850de1Alexander Kornienko/// \brief Represents a closed range of Unicode code points [Lower, Upper]. 27ece0bec0c824e71f062656ed5c727baf2a7bfc90Alexander Kornienkostruct UnicodeCharRange { 28ece0bec0c824e71f062656ed5c727baf2a7bfc90Alexander Kornienko uint32_t Lower; 29ece0bec0c824e71f062656ed5c727baf2a7bfc90Alexander Kornienko uint32_t Upper; 30ece0bec0c824e71f062656ed5c727baf2a7bfc90Alexander Kornienko}; 31ece0bec0c824e71f062656ed5c727baf2a7bfc90Alexander Kornienko 322f02ded68a114410f11bc2f4e901d0d8e5850de1Alexander Kornienkoinline bool operator<(uint32_t Value, UnicodeCharRange Range) { 332f02ded68a114410f11bc2f4e901d0d8e5850de1Alexander Kornienko return Value < Range.Lower; 342f02ded68a114410f11bc2f4e901d0d8e5850de1Alexander Kornienko} 352f02ded68a114410f11bc2f4e901d0d8e5850de1Alexander Kornienkoinline bool operator<(UnicodeCharRange Range, uint32_t Value) { 362f02ded68a114410f11bc2f4e901d0d8e5850de1Alexander Kornienko return Range.Upper < Value; 372f02ded68a114410f11bc2f4e901d0d8e5850de1Alexander Kornienko} 38ece0bec0c824e71f062656ed5c727baf2a7bfc90Alexander Kornienko 392f02ded68a114410f11bc2f4e901d0d8e5850de1Alexander Kornienko/// \brief Holds a reference to an ordered array of UnicodeCharRange and allows 402f02ded68a114410f11bc2f4e901d0d8e5850de1Alexander Kornienko/// to quickly check if a code point is contained in the set represented by this 412f02ded68a114410f11bc2f4e901d0d8e5850de1Alexander Kornienko/// array. 422f02ded68a114410f11bc2f4e901d0d8e5850de1Alexander Kornienkoclass UnicodeCharSet { 432f02ded68a114410f11bc2f4e901d0d8e5850de1Alexander Kornienkopublic: 4436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines typedef ArrayRef<UnicodeCharRange> CharRanges; 45ece0bec0c824e71f062656ed5c727baf2a7bfc90Alexander Kornienko 462f02ded68a114410f11bc2f4e901d0d8e5850de1Alexander Kornienko /// \brief Constructs a UnicodeCharSet instance from an array of 472f02ded68a114410f11bc2f4e901d0d8e5850de1Alexander Kornienko /// UnicodeCharRanges. 482f02ded68a114410f11bc2f4e901d0d8e5850de1Alexander Kornienko /// 492f02ded68a114410f11bc2f4e901d0d8e5850de1Alexander Kornienko /// Array pointed by \p Ranges should have the lifetime at least as long as 502f02ded68a114410f11bc2f4e901d0d8e5850de1Alexander Kornienko /// the UnicodeCharSet instance, and should not change. Array is validated by 512f02ded68a114410f11bc2f4e901d0d8e5850de1Alexander Kornienko /// the constructor, so it makes sense to create as few UnicodeCharSet 522f02ded68a114410f11bc2f4e901d0d8e5850de1Alexander Kornienko /// instances per each array of ranges, as possible. 532f02ded68a114410f11bc2f4e901d0d8e5850de1Alexander Kornienko UnicodeCharSet(CharRanges Ranges) : Ranges(Ranges) { 542f02ded68a114410f11bc2f4e901d0d8e5850de1Alexander Kornienko assert(rangesAreValid()); 55ece0bec0c824e71f062656ed5c727baf2a7bfc90Alexander Kornienko } 56ece0bec0c824e71f062656ed5c727baf2a7bfc90Alexander Kornienko 572f02ded68a114410f11bc2f4e901d0d8e5850de1Alexander Kornienko /// \brief Returns true if the character set contains the Unicode code point 582f02ded68a114410f11bc2f4e901d0d8e5850de1Alexander Kornienko /// \p C. 592f02ded68a114410f11bc2f4e901d0d8e5850de1Alexander Kornienko bool contains(uint32_t C) const { 602f02ded68a114410f11bc2f4e901d0d8e5850de1Alexander Kornienko return std::binary_search(Ranges.begin(), Ranges.end(), C); 61ece0bec0c824e71f062656ed5c727baf2a7bfc90Alexander Kornienko } 62ece0bec0c824e71f062656ed5c727baf2a7bfc90Alexander Kornienko 632f02ded68a114410f11bc2f4e901d0d8e5850de1Alexander Kornienkoprivate: 642f02ded68a114410f11bc2f4e901d0d8e5850de1Alexander Kornienko /// \brief Returns true if each of the ranges is a proper closed range 652f02ded68a114410f11bc2f4e901d0d8e5850de1Alexander Kornienko /// [min, max], and if the ranges themselves are ordered and non-overlapping. 662f02ded68a114410f11bc2f4e901d0d8e5850de1Alexander Kornienko bool rangesAreValid() const { 672f02ded68a114410f11bc2f4e901d0d8e5850de1Alexander Kornienko uint32_t Prev = 0; 682f02ded68a114410f11bc2f4e901d0d8e5850de1Alexander Kornienko for (CharRanges::const_iterator I = Ranges.begin(), E = Ranges.end(); 692f02ded68a114410f11bc2f4e901d0d8e5850de1Alexander Kornienko I != E; ++I) { 702f02ded68a114410f11bc2f4e901d0d8e5850de1Alexander Kornienko if (I != Ranges.begin() && Prev >= I->Lower) { 7136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines DEBUG(dbgs() << "Upper bound 0x"); 7236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines DEBUG(dbgs().write_hex(Prev)); 7336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines DEBUG(dbgs() << " should be less than succeeding lower bound 0x"); 7436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines DEBUG(dbgs().write_hex(I->Lower) << "\n"); 752f02ded68a114410f11bc2f4e901d0d8e5850de1Alexander Kornienko return false; 762f02ded68a114410f11bc2f4e901d0d8e5850de1Alexander Kornienko } 772f02ded68a114410f11bc2f4e901d0d8e5850de1Alexander Kornienko if (I->Upper < I->Lower) { 7836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines DEBUG(dbgs() << "Upper bound 0x"); 7936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines DEBUG(dbgs().write_hex(I->Lower)); 8036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines DEBUG(dbgs() << " should not be less than lower bound 0x"); 8136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines DEBUG(dbgs().write_hex(I->Upper) << "\n"); 822f02ded68a114410f11bc2f4e901d0d8e5850de1Alexander Kornienko return false; 832f02ded68a114410f11bc2f4e901d0d8e5850de1Alexander Kornienko } 842f02ded68a114410f11bc2f4e901d0d8e5850de1Alexander Kornienko Prev = I->Upper; 852f02ded68a114410f11bc2f4e901d0d8e5850de1Alexander Kornienko } 86ece0bec0c824e71f062656ed5c727baf2a7bfc90Alexander Kornienko 872f02ded68a114410f11bc2f4e901d0d8e5850de1Alexander Kornienko return true; 882f02ded68a114410f11bc2f4e901d0d8e5850de1Alexander Kornienko } 89ece0bec0c824e71f062656ed5c727baf2a7bfc90Alexander Kornienko 902f02ded68a114410f11bc2f4e901d0d8e5850de1Alexander Kornienko const CharRanges Ranges; 912f02ded68a114410f11bc2f4e901d0d8e5850de1Alexander Kornienko}; 92ece0bec0c824e71f062656ed5c727baf2a7bfc90Alexander Kornienko 93dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines#undef DEBUG_TYPE // "unicode" 94dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines 952f02ded68a114410f11bc2f4e901d0d8e5850de1Alexander Kornienko} // namespace sys 962f02ded68a114410f11bc2f4e901d0d8e5850de1Alexander Kornienko} // namespace llvm 97ece0bec0c824e71f062656ed5c727baf2a7bfc90Alexander Kornienko 98ece0bec0c824e71f062656ed5c727baf2a7bfc90Alexander Kornienko 99ece0bec0c824e71f062656ed5c727baf2a7bfc90Alexander Kornienko#endif // LLVM_SUPPORT_UNICODECHARRANGES_H 100