164339d36f8bd4db5025fe2988eda22b491a9219cFredrik Roubert/*********************************************************************** 264339d36f8bd4db5025fe2988eda22b491a9219cFredrik Roubert * Copyright (C) 2016 and later: Unicode, Inc. and others. 364339d36f8bd4db5025fe2988eda22b491a9219cFredrik Roubert * License & terms of use: http://www.unicode.org/copyright.html#License 464339d36f8bd4db5025fe2988eda22b491a9219cFredrik Roubert *********************************************************************** 5ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * COPYRIGHT: 6ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Copyright (c) 1999-2002, International Business Machines Corporation and 7ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * others. All Rights Reserved. 864339d36f8bd4db5025fe2988eda22b491a9219cFredrik Roubert ***********************************************************************/ 9ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 10ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unaccent.h" 11ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 12ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/** 13ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Constructor 14ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 15ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUnaccentTransliterator::UnaccentTransliterator() : 16ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru normalizer("", Normalizer::DECOMP), 17ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru Transliterator("Unaccent", 0) { 18ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 19ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 20ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/** 21ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Destructor 22ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 23ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUnaccentTransliterator::~UnaccentTransliterator() { 24ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 25ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 26ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/** 27ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Remove accents from a character using Normalizer. 28ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 29ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUChar UnaccentTransliterator::unaccent(UChar c) const { 30ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UnicodeString str(c); 31ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 32ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UnaccentTransliterator* t = (UnaccentTransliterator*)this; 33ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 34ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru t->normalizer.setText(str, status); 35ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (U_FAILURE(status)) { 36ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return c; 37ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 38ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return (UChar) t->normalizer.next(); 39ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 40ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 41ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/** 42ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Implement Transliterator API 43ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 44ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruvoid UnaccentTransliterator::handleTransliterate(Replaceable& text, 45ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UTransPosition& index, 46ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UBool incremental) const { 47ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UnicodeString str("a"); 48ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru while (index.start < index.limit) { 49ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UChar c = text.charAt(index.start); 50ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UChar d = unaccent(c); 51ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (c != d) { 52ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru str.setCharAt(0, d); 53ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru text.handleReplaceBetween(index.start, index.start+1, str); 54ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 55ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru index.start++; 56ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 57ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 58