UnicodeCharRanges.h revision 2f02ded68a114410f11bc2f4e901d0d8e5850de1
1ece0bec0c824e71f062656ed5c727baf2a7bfc90Alexander Kornienko//===--- UnicodeCharRanges.h - Types and functions for character ranges ---===// 2ece0bec0c824e71f062656ed5c727baf2a7bfc90Alexander Kornienko// 3ece0bec0c824e71f062656ed5c727baf2a7bfc90Alexander Kornienko// The LLVM Compiler Infrastructure 4ece0bec0c824e71f062656ed5c727baf2a7bfc90Alexander Kornienko// 5ece0bec0c824e71f062656ed5c727baf2a7bfc90Alexander Kornienko// This file is distributed under the University of Illinois Open Source 6ece0bec0c824e71f062656ed5c727baf2a7bfc90Alexander Kornienko// License. See LICENSE.TXT for details. 7ece0bec0c824e71f062656ed5c727baf2a7bfc90Alexander Kornienko// 8ece0bec0c824e71f062656ed5c727baf2a7bfc90Alexander Kornienko//===----------------------------------------------------------------------===// 9ece0bec0c824e71f062656ed5c727baf2a7bfc90Alexander Kornienko#ifndef LLVM_SUPPORT_UNICODECHARRANGES_H 10ece0bec0c824e71f062656ed5c727baf2a7bfc90Alexander Kornienko#define LLVM_SUPPORT_UNICODECHARRANGES_H 11ece0bec0c824e71f062656ed5c727baf2a7bfc90Alexander Kornienko 12ece0bec0c824e71f062656ed5c727baf2a7bfc90Alexander Kornienko#include "llvm/ADT/ArrayRef.h" 13ece0bec0c824e71f062656ed5c727baf2a7bfc90Alexander Kornienko#include "llvm/ADT/SmallPtrSet.h" 14ece0bec0c824e71f062656ed5c727baf2a7bfc90Alexander Kornienko#include "llvm/Support/Compiler.h" 15ece0bec0c824e71f062656ed5c727baf2a7bfc90Alexander Kornienko#include "llvm/Support/Debug.h" 16ece0bec0c824e71f062656ed5c727baf2a7bfc90Alexander Kornienko#include "llvm/Support/Mutex.h" 17ece0bec0c824e71f062656ed5c727baf2a7bfc90Alexander Kornienko#include "llvm/Support/MutexGuard.h" 18ece0bec0c824e71f062656ed5c727baf2a7bfc90Alexander Kornienko#include "llvm/Support/raw_ostream.h" 19ece0bec0c824e71f062656ed5c727baf2a7bfc90Alexander Kornienko 202f02ded68a114410f11bc2f4e901d0d8e5850de1Alexander Kornienko#include <algorithm> 21ece0bec0c824e71f062656ed5c727baf2a7bfc90Alexander Kornienko 222f02ded68a114410f11bc2f4e901d0d8e5850de1Alexander Kornienkonamespace llvm { 232f02ded68a114410f11bc2f4e901d0d8e5850de1Alexander Kornienkonamespace sys { 242f02ded68a114410f11bc2f4e901d0d8e5850de1Alexander Kornienko 252f02ded68a114410f11bc2f4e901d0d8e5850de1Alexander Kornienko/// \brief Represents a closed range of Unicode code points [Lower, Upper]. 26ece0bec0c824e71f062656ed5c727baf2a7bfc90Alexander Kornienkostruct UnicodeCharRange { 27ece0bec0c824e71f062656ed5c727baf2a7bfc90Alexander Kornienko uint32_t Lower; 28ece0bec0c824e71f062656ed5c727baf2a7bfc90Alexander Kornienko uint32_t Upper; 29ece0bec0c824e71f062656ed5c727baf2a7bfc90Alexander Kornienko}; 30ece0bec0c824e71f062656ed5c727baf2a7bfc90Alexander Kornienko 312f02ded68a114410f11bc2f4e901d0d8e5850de1Alexander Kornienkoinline bool operator<(uint32_t Value, UnicodeCharRange Range) { 322f02ded68a114410f11bc2f4e901d0d8e5850de1Alexander Kornienko return Value < Range.Lower; 332f02ded68a114410f11bc2f4e901d0d8e5850de1Alexander Kornienko} 342f02ded68a114410f11bc2f4e901d0d8e5850de1Alexander Kornienkoinline bool operator<(UnicodeCharRange Range, uint32_t Value) { 352f02ded68a114410f11bc2f4e901d0d8e5850de1Alexander Kornienko return Range.Upper < Value; 362f02ded68a114410f11bc2f4e901d0d8e5850de1Alexander Kornienko} 37ece0bec0c824e71f062656ed5c727baf2a7bfc90Alexander Kornienko 382f02ded68a114410f11bc2f4e901d0d8e5850de1Alexander Kornienko/// \brief Holds a reference to an ordered array of UnicodeCharRange and allows 392f02ded68a114410f11bc2f4e901d0d8e5850de1Alexander Kornienko/// to quickly check if a code point is contained in the set represented by this 402f02ded68a114410f11bc2f4e901d0d8e5850de1Alexander Kornienko/// array. 412f02ded68a114410f11bc2f4e901d0d8e5850de1Alexander Kornienkoclass UnicodeCharSet { 422f02ded68a114410f11bc2f4e901d0d8e5850de1Alexander Kornienkopublic: 432f02ded68a114410f11bc2f4e901d0d8e5850de1Alexander Kornienko typedef llvm::ArrayRef<UnicodeCharRange> CharRanges; 44ece0bec0c824e71f062656ed5c727baf2a7bfc90Alexander Kornienko 452f02ded68a114410f11bc2f4e901d0d8e5850de1Alexander Kornienko /// \brief Constructs a UnicodeCharSet instance from an array of 462f02ded68a114410f11bc2f4e901d0d8e5850de1Alexander Kornienko /// UnicodeCharRanges. 472f02ded68a114410f11bc2f4e901d0d8e5850de1Alexander Kornienko /// 482f02ded68a114410f11bc2f4e901d0d8e5850de1Alexander Kornienko /// Array pointed by \p Ranges should have the lifetime at least as long as 492f02ded68a114410f11bc2f4e901d0d8e5850de1Alexander Kornienko /// the UnicodeCharSet instance, and should not change. Array is validated by 502f02ded68a114410f11bc2f4e901d0d8e5850de1Alexander Kornienko /// the constructor, so it makes sense to create as few UnicodeCharSet 512f02ded68a114410f11bc2f4e901d0d8e5850de1Alexander Kornienko /// instances per each array of ranges, as possible. 522f02ded68a114410f11bc2f4e901d0d8e5850de1Alexander Kornienko#ifdef NDEBUG 532f02ded68a114410f11bc2f4e901d0d8e5850de1Alexander Kornienko LLVM_CONSTEXPR 542f02ded68a114410f11bc2f4e901d0d8e5850de1Alexander Kornienko#endif 552f02ded68a114410f11bc2f4e901d0d8e5850de1Alexander Kornienko UnicodeCharSet(CharRanges Ranges) : Ranges(Ranges) { 562f02ded68a114410f11bc2f4e901d0d8e5850de1Alexander Kornienko assert(rangesAreValid()); 57ece0bec0c824e71f062656ed5c727baf2a7bfc90Alexander Kornienko } 58ece0bec0c824e71f062656ed5c727baf2a7bfc90Alexander Kornienko 592f02ded68a114410f11bc2f4e901d0d8e5850de1Alexander Kornienko /// \brief Returns true if the character set contains the Unicode code point 602f02ded68a114410f11bc2f4e901d0d8e5850de1Alexander Kornienko /// \p C. 612f02ded68a114410f11bc2f4e901d0d8e5850de1Alexander Kornienko bool contains(uint32_t C) const { 622f02ded68a114410f11bc2f4e901d0d8e5850de1Alexander Kornienko return std::binary_search(Ranges.begin(), Ranges.end(), C); 63ece0bec0c824e71f062656ed5c727baf2a7bfc90Alexander Kornienko } 64ece0bec0c824e71f062656ed5c727baf2a7bfc90Alexander Kornienko 652f02ded68a114410f11bc2f4e901d0d8e5850de1Alexander Kornienkoprivate: 662f02ded68a114410f11bc2f4e901d0d8e5850de1Alexander Kornienko /// \brief Returns true if each of the ranges is a proper closed range 672f02ded68a114410f11bc2f4e901d0d8e5850de1Alexander Kornienko /// [min, max], and if the ranges themselves are ordered and non-overlapping. 682f02ded68a114410f11bc2f4e901d0d8e5850de1Alexander Kornienko bool rangesAreValid() const { 692f02ded68a114410f11bc2f4e901d0d8e5850de1Alexander Kornienko uint32_t Prev = 0; 702f02ded68a114410f11bc2f4e901d0d8e5850de1Alexander Kornienko for (CharRanges::const_iterator I = Ranges.begin(), E = Ranges.end(); 712f02ded68a114410f11bc2f4e901d0d8e5850de1Alexander Kornienko I != E; ++I) { 722f02ded68a114410f11bc2f4e901d0d8e5850de1Alexander Kornienko if (I != Ranges.begin() && Prev >= I->Lower) { 732f02ded68a114410f11bc2f4e901d0d8e5850de1Alexander Kornienko DEBUG(llvm::dbgs() << "Upper bound 0x"); 742f02ded68a114410f11bc2f4e901d0d8e5850de1Alexander Kornienko DEBUG(llvm::dbgs().write_hex(Prev)); 752f02ded68a114410f11bc2f4e901d0d8e5850de1Alexander Kornienko DEBUG(llvm::dbgs() << " should be less than succeeding lower bound 0x"); 762f02ded68a114410f11bc2f4e901d0d8e5850de1Alexander Kornienko DEBUG(llvm::dbgs().write_hex(I->Lower) << "\n"); 772f02ded68a114410f11bc2f4e901d0d8e5850de1Alexander Kornienko return false; 782f02ded68a114410f11bc2f4e901d0d8e5850de1Alexander Kornienko } 792f02ded68a114410f11bc2f4e901d0d8e5850de1Alexander Kornienko if (I->Upper < I->Lower) { 802f02ded68a114410f11bc2f4e901d0d8e5850de1Alexander Kornienko DEBUG(llvm::dbgs() << "Upper bound 0x"); 812f02ded68a114410f11bc2f4e901d0d8e5850de1Alexander Kornienko DEBUG(llvm::dbgs().write_hex(I->Lower)); 822f02ded68a114410f11bc2f4e901d0d8e5850de1Alexander Kornienko DEBUG(llvm::dbgs() << " should not be less than lower bound 0x"); 832f02ded68a114410f11bc2f4e901d0d8e5850de1Alexander Kornienko DEBUG(llvm::dbgs().write_hex(I->Upper) << "\n"); 842f02ded68a114410f11bc2f4e901d0d8e5850de1Alexander Kornienko return false; 852f02ded68a114410f11bc2f4e901d0d8e5850de1Alexander Kornienko } 862f02ded68a114410f11bc2f4e901d0d8e5850de1Alexander Kornienko Prev = I->Upper; 872f02ded68a114410f11bc2f4e901d0d8e5850de1Alexander Kornienko } 88ece0bec0c824e71f062656ed5c727baf2a7bfc90Alexander Kornienko 892f02ded68a114410f11bc2f4e901d0d8e5850de1Alexander Kornienko return true; 902f02ded68a114410f11bc2f4e901d0d8e5850de1Alexander Kornienko } 91ece0bec0c824e71f062656ed5c727baf2a7bfc90Alexander Kornienko 922f02ded68a114410f11bc2f4e901d0d8e5850de1Alexander Kornienko const CharRanges Ranges; 932f02ded68a114410f11bc2f4e901d0d8e5850de1Alexander Kornienko}; 94ece0bec0c824e71f062656ed5c727baf2a7bfc90Alexander Kornienko 952f02ded68a114410f11bc2f4e901d0d8e5850de1Alexander Kornienko} // namespace sys 962f02ded68a114410f11bc2f4e901d0d8e5850de1Alexander Kornienko} // namespace llvm 97ece0bec0c824e71f062656ed5c727baf2a7bfc90Alexander Kornienko 98ece0bec0c824e71f062656ed5c727baf2a7bfc90Alexander Kornienko 99ece0bec0c824e71f062656ed5c727baf2a7bfc90Alexander Kornienko#endif // LLVM_SUPPORT_UNICODECHARRANGES_H 100