164339d36f8bd4db5025fe2988eda22b491a9219cFredrik Roubert/***********************************************************************
264339d36f8bd4db5025fe2988eda22b491a9219cFredrik Roubert * Copyright (C) 2016 and later: Unicode, Inc. and others.
364339d36f8bd4db5025fe2988eda22b491a9219cFredrik Roubert * License & terms of use: http://www.unicode.org/copyright.html#License
464339d36f8bd4db5025fe2988eda22b491a9219cFredrik Roubert ***********************************************************************
5ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * COPYRIGHT:
6ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Copyright (c) 1999-2002, International Business Machines Corporation and
7ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * others. All Rights Reserved.
864339d36f8bd4db5025fe2988eda22b491a9219cFredrik Roubert ***********************************************************************/
9ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
10ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unaccent.h"
11ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
12ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/**
13ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Constructor
14ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */
15ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUnaccentTransliterator::UnaccentTransliterator() :
16ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    normalizer("", Normalizer::DECOMP),
17ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    Transliterator("Unaccent", 0) {
18ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
19ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
20ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/**
21ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Destructor
22ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */
23ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUnaccentTransliterator::~UnaccentTransliterator() {
24ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
25ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
26ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/**
27ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Remove accents from a character using Normalizer.
28ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */
29ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUChar UnaccentTransliterator::unaccent(UChar c) const {
30ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UnicodeString str(c);
31ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UErrorCode status = U_ZERO_ERROR;
32ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UnaccentTransliterator* t = (UnaccentTransliterator*)this;
33ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
34ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    t->normalizer.setText(str, status);
35ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if (U_FAILURE(status)) {
36ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return c;
37ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
38ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    return (UChar) t->normalizer.next();
39ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
40ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
41ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/**
42ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Implement Transliterator API
43ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */
44ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruvoid UnaccentTransliterator::handleTransliterate(Replaceable& text,
45ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                                                 UTransPosition& index,
46ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                                                 UBool incremental) const {
47ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UnicodeString str("a");
48ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    while (index.start < index.limit) {
49ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        UChar c = text.charAt(index.start);
50ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        UChar d = unaccent(c);
51ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if (c != d) {
52ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            str.setCharAt(0, d);
53ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            text.handleReplaceBetween(index.start, index.start+1, str);
54ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
55ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        index.start++;
56ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
57ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
58