1/*
2 * Copyright 2001-2004 The Apache Software Foundation.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17package org.apache.commons.codec.language;
18
19import org.apache.commons.codec.EncoderException;
20import org.apache.commons.codec.StringEncoder;
21
22/**
23 * Encodes a string into a double metaphone value.
24 * This Implementation is based on the algorithm by <CITE>Lawrence Philips</CITE>.
25 * <ul>
26 * <li>Original Article: <a
27 * href="http://www.cuj.com/documents/s=8038/cuj0006philips/">
28 * http://www.cuj.com/documents/s=8038/cuj0006philips/</a></li>
29 * <li>Original Source Code: <a href="ftp://ftp.cuj.com/pub/2000/1806/philips.zip">
30 * ftp://ftp.cuj.com/pub/2000/1806/philips.zip</a></li>
31 * </ul>
32 *
33 * @author Apache Software Foundation
34 * @version $Id: DoubleMetaphone.java,v 1.24 2004/06/05 18:32:04 ggregory Exp $
35 *
36 * @deprecated Please use {@link java.net.URL#openConnection} instead.
37 *     Please visit <a href="http://android-developers.blogspot.com/2011/09/androids-http-clients.html">this webpage</a>
38 *     for further details.
39 */
40@Deprecated
41public class DoubleMetaphone implements StringEncoder {
42
43    /**
44     * "Vowels" to test for
45     */
46    private static final String VOWELS = "AEIOUY";
47
48    /**
49     * Prefixes when present which are not pronounced
50     */
51    private static final String[] SILENT_START =
52    { "GN", "KN", "PN", "WR", "PS" };
53    private static final String[] L_R_N_M_B_H_F_V_W_SPACE =
54    { "L", "R", "N", "M", "B", "H", "F", "V", "W", " " };
55    private static final String[] ES_EP_EB_EL_EY_IB_IL_IN_IE_EI_ER =
56    { "ES", "EP", "EB", "EL", "EY", "IB", "IL", "IN", "IE", "EI", "ER" };
57    private static final String[] L_T_K_S_N_M_B_Z =
58    { "L", "T", "K", "S", "N", "M", "B", "Z" };
59
60    /**
61     * Maximum length of an encoding, default is 4
62     */
63    protected int maxCodeLen = 4;
64
65    /**
66     * Creates an instance of this DoubleMetaphone encoder
67     */
68    public DoubleMetaphone() {
69        super();
70    }
71
72    /**
73     * Encode a value with Double Metaphone
74     *
75     * @param value String to encode
76     * @return an encoded string
77     */
78    public String doubleMetaphone(String value) {
79        return doubleMetaphone(value, false);
80    }
81
82    /**
83     * Encode a value with Double Metaphone, optionally using the alternate
84     * encoding.
85     *
86     * @param value String to encode
87     * @param alternate use alternate encode
88     * @return an encoded string
89     */
90    public String doubleMetaphone(String value, boolean alternate) {
91        value = cleanInput(value);
92        if (value == null) {
93            return null;
94        }
95
96        boolean slavoGermanic = isSlavoGermanic(value);
97        int index = isSilentStart(value) ? 1 : 0;
98
99        DoubleMetaphoneResult result = new DoubleMetaphoneResult(this.getMaxCodeLen());
100
101        while (!result.isComplete() && index <= value.length() - 1) {
102            switch (value.charAt(index)) {
103            case 'A':
104            case 'E':
105            case 'I':
106            case 'O':
107            case 'U':
108            case 'Y':
109                index = handleAEIOUY(value, result, index);
110                break;
111            case 'B':
112                result.append('P');
113                index = charAt(value, index + 1) == 'B' ? index + 2 : index + 1;
114                break;
115            case '\u00C7':
116                // A C with a Cedilla
117                result.append('S');
118                index++;
119                break;
120            case 'C':
121                index = handleC(value, result, index);
122                break;
123            case 'D':
124                index = handleD(value, result, index);
125                break;
126            case 'F':
127                result.append('F');
128                index = charAt(value, index + 1) == 'F' ? index + 2 : index + 1;
129                break;
130            case 'G':
131                index = handleG(value, result, index, slavoGermanic);
132                break;
133            case 'H':
134                index = handleH(value, result, index);
135                break;
136            case 'J':
137                index = handleJ(value, result, index, slavoGermanic);
138                break;
139            case 'K':
140                result.append('K');
141                index = charAt(value, index + 1) == 'K' ? index + 2 : index + 1;
142                break;
143            case 'L':
144                index = handleL(value, result, index);
145                break;
146            case 'M':
147                result.append('M');
148                index = conditionM0(value, index) ? index + 2 : index + 1;
149                break;
150            case 'N':
151                result.append('N');
152                index = charAt(value, index + 1) == 'N' ? index + 2 : index + 1;
153                break;
154            case '\u00D1':
155                // N with a tilde (spanish ene)
156                result.append('N');
157                index++;
158                break;
159            case 'P':
160                index = handleP(value, result, index);
161                break;
162            case 'Q':
163                result.append('K');
164                index = charAt(value, index + 1) == 'Q' ? index + 2 : index + 1;
165                break;
166            case 'R':
167                index = handleR(value, result, index, slavoGermanic);
168                break;
169            case 'S':
170                index = handleS(value, result, index, slavoGermanic);
171                break;
172            case 'T':
173                index = handleT(value, result, index);
174                break;
175            case 'V':
176                result.append('F');
177                index = charAt(value, index + 1) == 'V' ? index + 2 : index + 1;
178                break;
179            case 'W':
180                index = handleW(value, result, index);
181                break;
182            case 'X':
183                index = handleX(value, result, index);
184                break;
185            case 'Z':
186                index = handleZ(value, result, index, slavoGermanic);
187                break;
188            default:
189                index++;
190                break;
191            }
192        }
193
194        return alternate ? result.getAlternate() : result.getPrimary();
195    }
196
197    /**
198     * Encode the value using DoubleMetaphone.  It will only work if
199     * <code>obj</code> is a <code>String</code> (like <code>Metaphone</code>).
200     *
201     * @param obj Object to encode (should be of type String)
202     * @return An encoded Object (will be of type String)
203     * @throws EncoderException encode parameter is not of type String
204     */
205    public Object encode(Object obj) throws EncoderException {
206        if (!(obj instanceof String)) {
207            throw new EncoderException("DoubleMetaphone encode parameter is not of type String");
208        }
209        return doubleMetaphone((String) obj);
210    }
211
212    /**
213     * Encode the value using DoubleMetaphone.
214     *
215     * @param value String to encode
216     * @return An encoded String
217     */
218    public String encode(String value) {
219        return doubleMetaphone(value);
220    }
221
222    /**
223     * Check if the Double Metaphone values of two <code>String</code> values
224     * are equal.
225     *
226     * @param value1 The left-hand side of the encoded {@link String#equals(Object)}.
227     * @param value2 The right-hand side of the encoded {@link String#equals(Object)}.
228     * @return <code>true</code> if the encoded <code>String</code>s are equal;
229     *          <code>false</code> otherwise.
230     * @see #isDoubleMetaphoneEqual(String,String,boolean)
231     */
232    public boolean isDoubleMetaphoneEqual(String value1, String value2) {
233        return isDoubleMetaphoneEqual(value1, value2, false);
234    }
235
236    /**
237     * Check if the Double Metaphone values of two <code>String</code> values
238     * are equal, optionally using the alternate value.
239     *
240     * @param value1 The left-hand side of the encoded {@link String#equals(Object)}.
241     * @param value2 The right-hand side of the encoded {@link String#equals(Object)}.
242     * @param alternate use the alternate value if <code>true</code>.
243     * @return <code>true</code> if the encoded <code>String</code>s are equal;
244     *          <code>false</code> otherwise.
245     */
246    public boolean isDoubleMetaphoneEqual(String value1,
247                                          String value2,
248                                          boolean alternate) {
249        return doubleMetaphone(value1, alternate).equals(doubleMetaphone
250                                                         (value2, alternate));
251    }
252
253    /**
254     * Returns the maxCodeLen.
255     * @return int
256     */
257    public int getMaxCodeLen() {
258        return this.maxCodeLen;
259    }
260
261    /**
262     * Sets the maxCodeLen.
263     * @param maxCodeLen The maxCodeLen to set
264     */
265    public void setMaxCodeLen(int maxCodeLen) {
266        this.maxCodeLen = maxCodeLen;
267    }
268
269    //-- BEGIN HANDLERS --//
270
271    /**
272     * Handles 'A', 'E', 'I', 'O', 'U', and 'Y' cases
273     */
274    private int handleAEIOUY(String value, DoubleMetaphoneResult result, int
275                             index) {
276        if (index == 0) {
277            result.append('A');
278        }
279        return index + 1;
280    }
281
282    /**
283     * Handles 'C' cases
284     */
285    private int handleC(String value,
286                        DoubleMetaphoneResult result,
287                        int index) {
288        if (conditionC0(value, index)) {  // very confusing, moved out
289            result.append('K');
290            index += 2;
291        } else if (index == 0 && contains(value, index, 6, "CAESAR")) {
292            result.append('S');
293            index += 2;
294        } else if (contains(value, index, 2, "CH")) {
295            index = handleCH(value, result, index);
296        } else if (contains(value, index, 2, "CZ") &&
297                   !contains(value, index - 2, 4, "WICZ")) {
298            //-- "Czerny" --//
299            result.append('S', 'X');
300            index += 2;
301        } else if (contains(value, index + 1, 3, "CIA")) {
302            //-- "focaccia" --//
303            result.append('X');
304            index += 3;
305        } else if (contains(value, index, 2, "CC") &&
306                   !(index == 1 && charAt(value, 0) == 'M')) {
307            //-- double "cc" but not "McClelland" --//
308            return handleCC(value, result, index);
309        } else if (contains(value, index, 2, "CK", "CG", "CQ")) {
310            result.append('K');
311            index += 2;
312        } else if (contains(value, index, 2, "CI", "CE", "CY")) {
313            //-- Italian vs. English --//
314            if (contains(value, index, 3, "CIO", "CIE", "CIA")) {
315                result.append('S', 'X');
316            } else {
317                result.append('S');
318            }
319            index += 2;
320        } else {
321            result.append('K');
322            if (contains(value, index + 1, 2, " C", " Q", " G")) {
323                //-- Mac Caffrey, Mac Gregor --//
324                index += 3;
325            } else if (contains(value, index + 1, 1, "C", "K", "Q") &&
326                       !contains(value, index + 1, 2, "CE", "CI")) {
327                index += 2;
328            } else {
329                index++;
330            }
331        }
332
333        return index;
334    }
335
336    /**
337     * Handles 'CC' cases
338     */
339    private int handleCC(String value,
340                         DoubleMetaphoneResult result,
341                         int index) {
342        if (contains(value, index + 2, 1, "I", "E", "H") &&
343            !contains(value, index + 2, 2, "HU")) {
344            //-- "bellocchio" but not "bacchus" --//
345            if ((index == 1 && charAt(value, index - 1) == 'A') ||
346                contains(value, index - 1, 5, "UCCEE", "UCCES")) {
347                //-- "accident", "accede", "succeed" --//
348                result.append("KS");
349            } else {
350                //-- "bacci", "bertucci", other Italian --//
351                result.append('X');
352            }
353            index += 3;
354        } else {    // Pierce's rule
355            result.append('K');
356            index += 2;
357        }
358
359        return index;
360    }
361
362    /**
363     * Handles 'CH' cases
364     */
365    private int handleCH(String value,
366                         DoubleMetaphoneResult result,
367                         int index) {
368        if (index > 0 && contains(value, index, 4, "CHAE")) {   // Michael
369            result.append('K', 'X');
370            return index + 2;
371        } else if (conditionCH0(value, index)) {
372            //-- Greek roots ("chemistry", "chorus", etc.) --//
373            result.append('K');
374            return index + 2;
375        } else if (conditionCH1(value, index)) {
376            //-- Germanic, Greek, or otherwise 'ch' for 'kh' sound --//
377            result.append('K');
378            return index + 2;
379        } else {
380            if (index > 0) {
381                if (contains(value, 0, 2, "MC")) {
382                    result.append('K');
383                } else {
384                    result.append('X', 'K');
385                }
386            } else {
387                result.append('X');
388            }
389            return index + 2;
390        }
391    }
392
393    /**
394     * Handles 'D' cases
395     */
396    private int handleD(String value,
397                        DoubleMetaphoneResult result,
398                        int index) {
399        if (contains(value, index, 2, "DG")) {
400            //-- "Edge" --//
401            if (contains(value, index + 2, 1, "I", "E", "Y")) {
402                result.append('J');
403                index += 3;
404                //-- "Edgar" --//
405            } else {
406                result.append("TK");
407                index += 2;
408            }
409        } else if (contains(value, index, 2, "DT", "DD")) {
410            result.append('T');
411            index += 2;
412        } else {
413            result.append('T');
414            index++;
415        }
416        return index;
417    }
418
419    /**
420     * Handles 'G' cases
421     */
422    private int handleG(String value,
423                        DoubleMetaphoneResult result,
424                        int index,
425                        boolean slavoGermanic) {
426        if (charAt(value, index + 1) == 'H') {
427            index = handleGH(value, result, index);
428        } else if (charAt(value, index + 1) == 'N') {
429            if (index == 1 && isVowel(charAt(value, 0)) && !slavoGermanic) {
430                result.append("KN", "N");
431            } else if (!contains(value, index + 2, 2, "EY") &&
432                       charAt(value, index + 1) != 'Y' && !slavoGermanic) {
433                result.append("N", "KN");
434            } else {
435                result.append("KN");
436            }
437            index = index + 2;
438        } else if (contains(value, index + 1, 2, "LI") && !slavoGermanic) {
439            result.append("KL", "L");
440            index += 2;
441        } else if (index == 0 && (charAt(value, index + 1) == 'Y' || contains(value, index + 1, 2, ES_EP_EB_EL_EY_IB_IL_IN_IE_EI_ER))) {
442            //-- -ges-, -gep-, -gel-, -gie- at beginning --//
443            result.append('K', 'J');
444            index += 2;
445        } else if ((contains(value, index + 1, 2, "ER") ||
446                    charAt(value, index + 1) == 'Y') &&
447                   !contains(value, 0, 6, "DANGER", "RANGER", "MANGER") &&
448                   !contains(value, index - 1, 1, "E", "I") &&
449                   !contains(value, index - 1, 3, "RGY", "OGY")) {
450            //-- -ger-, -gy- --//
451            result.append('K', 'J');
452            index += 2;
453        } else if (contains(value, index + 1, 1, "E", "I", "Y") ||
454                   contains(value, index - 1, 4, "AGGI", "OGGI")) {
455            //-- Italian "biaggi" --//
456            if ((contains(value, 0 ,4, "VAN ", "VON ") || contains(value, 0, 3, "SCH")) || contains(value, index + 1, 2, "ET")) {
457                //-- obvious germanic --//
458                result.append('K');
459            } else if (contains(value, index + 1, 4, "IER")) {
460                result.append('J');
461            } else {
462                result.append('J', 'K');
463            }
464            index += 2;
465        } else if (charAt(value, index + 1) == 'G') {
466            index += 2;
467            result.append('K');
468        } else {
469            index++;
470            result.append('K');
471        }
472        return index;
473    }
474
475    /**
476     * Handles 'GH' cases
477     */
478    private int handleGH(String value,
479                         DoubleMetaphoneResult result,
480                         int index) {
481        if (index > 0 && !isVowel(charAt(value, index - 1))) {
482            result.append('K');
483            index += 2;
484        } else if (index == 0) {
485            if (charAt(value, index + 2) == 'I') {
486                result.append('J');
487            } else {
488                result.append('K');
489            }
490            index += 2;
491        } else if ((index > 1 && contains(value, index - 2, 1, "B", "H", "D")) ||
492                   (index > 2 && contains(value, index - 3, 1, "B", "H", "D")) ||
493                   (index > 3 && contains(value, index - 4, 1, "B", "H"))) {
494            //-- Parker's rule (with some further refinements) - "hugh"
495            index += 2;
496        } else {
497            if (index > 2 && charAt(value, index - 1) == 'U' &&
498                contains(value, index - 3, 1, "C", "G", "L", "R", "T")) {
499                //-- "laugh", "McLaughlin", "cough", "gough", "rough", "tough"
500                result.append('F');
501            } else if (index > 0 && charAt(value, index - 1) != 'I') {
502                result.append('K');
503            }
504            index += 2;
505        }
506        return index;
507    }
508
509    /**
510     * Handles 'H' cases
511     */
512    private int handleH(String value,
513                        DoubleMetaphoneResult result,
514                        int index) {
515        //-- only keep if first & before vowel or between 2 vowels --//
516        if ((index == 0 || isVowel(charAt(value, index - 1))) &&
517            isVowel(charAt(value, index + 1))) {
518            result.append('H');
519            index += 2;
520            //-- also takes car of "HH" --//
521        } else {
522            index++;
523        }
524        return index;
525    }
526
527    /**
528     * Handles 'J' cases
529     */
530    private int handleJ(String value, DoubleMetaphoneResult result, int index,
531                        boolean slavoGermanic) {
532        if (contains(value, index, 4, "JOSE") || contains(value, 0, 4, "SAN ")) {
533                //-- obvious Spanish, "Jose", "San Jacinto" --//
534                if ((index == 0 && (charAt(value, index + 4) == ' ') ||
535                     value.length() == 4) || contains(value, 0, 4, "SAN ")) {
536                    result.append('H');
537                } else {
538                    result.append('J', 'H');
539                }
540                index++;
541            } else {
542                if (index == 0 && !contains(value, index, 4, "JOSE")) {
543                    result.append('J', 'A');
544                } else if (isVowel(charAt(value, index - 1)) && !slavoGermanic &&
545                              (charAt(value, index + 1) == 'A' || charAt(value, index + 1) == 'O')) {
546                    result.append('J', 'H');
547                } else if (index == value.length() - 1) {
548                    result.append('J', ' ');
549                } else if (!contains(value, index + 1, 1, L_T_K_S_N_M_B_Z) && !contains(value, index - 1, 1, "S", "K", "L")) {
550                    result.append('J');
551                }
552
553                if (charAt(value, index + 1) == 'J') {
554                    index += 2;
555                } else {
556                    index++;
557                }
558            }
559        return index;
560    }
561
562    /**
563     * Handles 'L' cases
564     */
565    private int handleL(String value,
566                        DoubleMetaphoneResult result,
567                        int index) {
568        result.append('L');
569        if (charAt(value, index + 1) == 'L') {
570            if (conditionL0(value, index)) {
571                result.appendAlternate(' ');
572            }
573            index += 2;
574        } else {
575            index++;
576        }
577        return index;
578    }
579
580    /**
581     * Handles 'P' cases
582     */
583    private int handleP(String value,
584                        DoubleMetaphoneResult result,
585                        int index) {
586        if (charAt(value, index + 1) == 'H') {
587            result.append('F');
588            index += 2;
589        } else {
590            result.append('P');
591            index = contains(value, index + 1, 1, "P", "B") ? index + 2 : index + 1;
592        }
593        return index;
594    }
595
596    /**
597     * Handles 'R' cases
598     */
599    private int handleR(String value,
600                        DoubleMetaphoneResult result,
601                        int index,
602                        boolean slavoGermanic) {
603        if (index == value.length() - 1 && !slavoGermanic &&
604            contains(value, index - 2, 2, "IE") &&
605            !contains(value, index - 4, 2, "ME", "MA")) {
606            result.appendAlternate('R');
607        } else {
608            result.append('R');
609        }
610        return charAt(value, index + 1) == 'R' ? index + 2 : index + 1;
611    }
612
613    /**
614     * Handles 'S' cases
615     */
616    private int handleS(String value,
617                        DoubleMetaphoneResult result,
618                        int index,
619                        boolean slavoGermanic) {
620        if (contains(value, index - 1, 3, "ISL", "YSL")) {
621            //-- special cases "island", "isle", "carlisle", "carlysle" --//
622            index++;
623        } else if (index == 0 && contains(value, index, 5, "SUGAR")) {
624            //-- special case "sugar-" --//
625            result.append('X', 'S');
626            index++;
627        } else if (contains(value, index, 2, "SH")) {
628            if (contains(value, index + 1, 4,
629                         "HEIM", "HOEK", "HOLM", "HOLZ")) {
630                //-- germanic --//
631                result.append('S');
632            } else {
633                result.append('X');
634            }
635            index += 2;
636        } else if (contains(value, index, 3, "SIO", "SIA") || contains(value, index, 4, "SIAN")) {
637            //-- Italian and Armenian --//
638            if (slavoGermanic) {
639                result.append('S');
640            } else {
641                result.append('S', 'X');
642            }
643            index += 3;
644        } else if ((index == 0 && contains(value, index + 1, 1, "M", "N", "L", "W")) || contains(value, index + 1, 1, "Z")) {
645            //-- german & anglicisations, e.g. "smith" match "schmidt" //
646            // "snider" match "schneider" --//
647            //-- also, -sz- in slavic language altho in hungarian it //
648            //   is pronounced "s" --//
649            result.append('S', 'X');
650            index = contains(value, index + 1, 1, "Z") ? index + 2 : index + 1;
651        } else if (contains(value, index, 2, "SC")) {
652            index = handleSC(value, result, index);
653        } else {
654            if (index == value.length() - 1 && contains(value, index - 2,
655                                                        2, "AI", "OI")){
656                //-- french e.g. "resnais", "artois" --//
657                result.appendAlternate('S');
658            } else {
659                result.append('S');
660            }
661            index = contains(value, index + 1, 1, "S", "Z") ? index + 2 : index + 1;
662        }
663        return index;
664    }
665
666    /**
667     * Handles 'SC' cases
668     */
669    private int handleSC(String value,
670                         DoubleMetaphoneResult result,
671                         int index) {
672        if (charAt(value, index + 2) == 'H') {
673            //-- Schlesinger's rule --//
674            if (contains(value, index + 3,
675                         2, "OO", "ER", "EN", "UY", "ED", "EM")) {
676                //-- Dutch origin, e.g. "school", "schooner" --//
677                if (contains(value, index + 3, 2, "ER", "EN")) {
678                    //-- "schermerhorn", "schenker" --//
679                    result.append("X", "SK");
680                } else {
681                    result.append("SK");
682                }
683            } else {
684                if (index == 0 && !isVowel(charAt(value, 3)) && charAt(value, 3) != 'W') {
685                    result.append('X', 'S');
686                } else {
687                    result.append('X');
688                }
689            }
690        } else if (contains(value, index + 2, 1, "I", "E", "Y")) {
691            result.append('S');
692        } else {
693            result.append("SK");
694        }
695        return index + 3;
696    }
697
698    /**
699     * Handles 'T' cases
700     */
701    private int handleT(String value,
702                        DoubleMetaphoneResult result,
703                        int index) {
704        if (contains(value, index, 4, "TION")) {
705            result.append('X');
706            index += 3;
707        } else if (contains(value, index, 3, "TIA", "TCH")) {
708            result.append('X');
709            index += 3;
710        } else if (contains(value, index, 2, "TH") || contains(value, index,
711                                                               3, "TTH")) {
712            if (contains(value, index + 2, 2, "OM", "AM") ||
713                //-- special case "thomas", "thames" or germanic --//
714                contains(value, 0, 4, "VAN ", "VON ") ||
715                contains(value, 0, 3, "SCH")) {
716                result.append('T');
717            } else {
718                result.append('0', 'T');
719            }
720            index += 2;
721        } else {
722            result.append('T');
723            index = contains(value, index + 1, 1, "T", "D") ? index + 2 : index + 1;
724        }
725        return index;
726    }
727
728    /**
729     * Handles 'W' cases
730     */
731    private int handleW(String value,
732                        DoubleMetaphoneResult result,
733                        int index) {
734        if (contains(value, index, 2, "WR")) {
735            //-- can also be in middle of word --//
736            result.append('R');
737            index += 2;
738        } else {
739            if (index == 0 && (isVowel(charAt(value, index + 1)) ||
740                               contains(value, index, 2, "WH"))) {
741                if (isVowel(charAt(value, index + 1))) {
742                    //-- Wasserman should match Vasserman --//
743                    result.append('A', 'F');
744                } else {
745                    //-- need Uomo to match Womo --//
746                    result.append('A');
747                }
748                index++;
749            } else if ((index == value.length() - 1 && isVowel(charAt(value, index - 1))) ||
750                       contains(value, index - 1,
751                                5, "EWSKI", "EWSKY", "OWSKI", "OWSKY") ||
752                       contains(value, 0, 3, "SCH")) {
753                //-- Arnow should match Arnoff --//
754                result.appendAlternate('F');
755                index++;
756            } else if (contains(value, index, 4, "WICZ", "WITZ")) {
757                //-- Polish e.g. "filipowicz" --//
758                result.append("TS", "FX");
759                index += 4;
760            } else {
761                index++;
762            }
763        }
764        return index;
765    }
766
767    /**
768     * Handles 'X' cases
769     */
770    private int handleX(String value,
771                        DoubleMetaphoneResult result,
772                        int index) {
773        if (index == 0) {
774            result.append('S');
775            index++;
776        } else {
777            if (!((index == value.length() - 1) &&
778                  (contains(value, index - 3, 3, "IAU", "EAU") ||
779                   contains(value, index - 2, 2, "AU", "OU")))) {
780                //-- French e.g. breaux --//
781                result.append("KS");
782            }
783            index = contains(value, index + 1, 1, "C", "X") ? index + 2 : index + 1;
784        }
785        return index;
786    }
787
788    /**
789     * Handles 'Z' cases
790     */
791    private int handleZ(String value, DoubleMetaphoneResult result, int index,
792                        boolean slavoGermanic) {
793        if (charAt(value, index + 1) == 'H') {
794            //-- Chinese pinyin e.g. "zhao" or Angelina "Zhang" --//
795            result.append('J');
796            index += 2;
797        } else {
798            if (contains(value, index + 1, 2, "ZO", "ZI", "ZA") || (slavoGermanic && (index > 0 && charAt(value, index - 1) != 'T'))) {
799                result.append("S", "TS");
800            } else {
801                result.append('S');
802            }
803            index = charAt(value, index + 1) == 'Z' ? index + 2 : index + 1;
804        }
805        return index;
806    }
807
808    //-- BEGIN CONDITIONS --//
809
810    /**
811     * Complex condition 0 for 'C'
812     */
813    private boolean conditionC0(String value, int index) {
814        if (contains(value, index, 4, "CHIA")) {
815            return true;
816        } else if (index <= 1) {
817            return false;
818        } else if (isVowel(charAt(value, index - 2))) {
819            return false;
820        } else if (!contains(value, index - 1, 3, "ACH")) {
821            return false;
822        } else {
823            char c = charAt(value, index + 2);
824            return (c != 'I' && c != 'E')
825                    || contains(value, index - 2, 6, "BACHER", "MACHER");
826        }
827    }
828
829    /**
830     * Complex condition 0 for 'CH'
831     */
832    private boolean conditionCH0(String value, int index) {
833        if (index != 0) {
834            return false;
835        } else if (!contains(value, index + 1, 5, "HARAC", "HARIS") &&
836                   !contains(value, index + 1, 3, "HOR", "HYM", "HIA", "HEM")) {
837            return false;
838        } else if (contains(value, 0, 5, "CHORE")) {
839            return false;
840        } else {
841            return true;
842        }
843    }
844
845    /**
846     * Complex condition 1 for 'CH'
847     */
848    private boolean conditionCH1(String value, int index) {
849        return ((contains(value, 0, 4, "VAN ", "VON ") || contains(value, 0,
850                                                                   3, "SCH")) ||
851                contains(value, index - 2, 6, "ORCHES", "ARCHIT", "ORCHID") ||
852                contains(value, index + 2, 1, "T", "S") ||
853                ((contains(value, index - 1, 1, "A", "O", "U", "E") || index == 0) &&
854                 (contains(value, index + 2, 1, L_R_N_M_B_H_F_V_W_SPACE) || index + 1 == value.length() - 1)));
855    }
856
857    /**
858     * Complex condition 0 for 'L'
859     */
860    private boolean conditionL0(String value, int index) {
861        if (index == value.length() - 3 &&
862            contains(value, index - 1, 4, "ILLO", "ILLA", "ALLE")) {
863            return true;
864        } else if ((contains(value, index - 1, 2, "AS", "OS") ||
865                    contains(value, value.length() - 1, 1, "A", "O")) &&
866                   contains(value, index - 1, 4, "ALLE")) {
867            return true;
868        } else {
869            return false;
870        }
871    }
872
873    /**
874     * Complex condition 0 for 'M'
875     */
876    private boolean conditionM0(String value, int index) {
877        if (charAt(value, index + 1) == 'M') {
878            return true;
879        }
880        return contains(value, index - 1, 3, "UMB")
881                && ((index + 1) == value.length() - 1 || contains(value,
882                        index + 2, 2, "ER"));
883    }
884
885    //-- BEGIN HELPER FUNCTIONS --//
886
887    /**
888     * Determines whether or not a value is of slavo-germanic orgin. A value is
889     * of slavo-germanic origin if it contians any of 'W', 'K', 'CZ', or 'WITZ'.
890     */
891    private boolean isSlavoGermanic(String value) {
892        return value.indexOf('W') > -1 || value.indexOf('K') > -1 ||
893            value.indexOf("CZ") > -1 || value.indexOf("WITZ") > -1;
894    }
895
896    /**
897     * Determines whether or not a character is a vowel or not
898     */
899    private boolean isVowel(char ch) {
900        return VOWELS.indexOf(ch) != -1;
901    }
902
903    /**
904     * Determines whether or not the value starts with a silent letter.  It will
905     * return <code>true</code> if the value starts with any of 'GN', 'KN',
906     * 'PN', 'WR' or 'PS'.
907     */
908    private boolean isSilentStart(String value) {
909        boolean result = false;
910        for (int i = 0; i < SILENT_START.length; i++) {
911            if (value.startsWith(SILENT_START[i])) {
912                result = true;
913                break;
914            }
915        }
916        return result;
917    }
918
919    /**
920     * Cleans the input
921     */
922    private String cleanInput(String input) {
923        if (input == null) {
924            return null;
925        }
926        input = input.trim();
927        if (input.length() == 0) {
928            return null;
929        }
930        return input.toUpperCase();
931    }
932
933    /**
934     * Gets the character at index <code>index</code> if available, otherwise
935     * it returns <code>Character.MIN_VALUE</code> so that there is some sort
936     * of a default
937     */
938    protected char charAt(String value, int index) {
939        if (index < 0 || index >= value.length()) {
940            return Character.MIN_VALUE;
941        }
942        return value.charAt(index);
943    }
944
945    /**
946     * Shortcut method with 1 criteria
947     */
948    private static boolean contains(String value, int start, int length,
949                                    String criteria) {
950        return contains(value, start, length,
951                        new String[] { criteria });
952    }
953
954    /**
955     * Shortcut method with 2 criteria
956     */
957    private static boolean contains(String value, int start, int length,
958                                    String criteria1, String criteria2) {
959        return contains(value, start, length,
960                        new String[] { criteria1, criteria2 });
961    }
962
963    /**
964     * Shortcut method with 3 criteria
965     */
966    private static boolean contains(String value, int start, int length,
967                                    String criteria1, String criteria2,
968                                    String criteria3) {
969        return contains(value, start, length,
970                        new String[] { criteria1, criteria2, criteria3 });
971    }
972
973    /**
974     * Shortcut method with 4 criteria
975     */
976    private static boolean contains(String value, int start, int length,
977                                    String criteria1, String criteria2,
978                                    String criteria3, String criteria4) {
979        return contains(value, start, length,
980                        new String[] { criteria1, criteria2, criteria3,
981                                       criteria4 });
982    }
983
984    /**
985     * Shortcut method with 5 criteria
986     */
987    private static boolean contains(String value, int start, int length,
988                                    String criteria1, String criteria2,
989                                    String criteria3, String criteria4,
990                                    String criteria5) {
991        return contains(value, start, length,
992                        new String[] { criteria1, criteria2, criteria3,
993                                       criteria4, criteria5 });
994    }
995
996    /**
997     * Shortcut method with 6 criteria
998     */
999    private static boolean contains(String value, int start, int length,
1000                                    String criteria1, String criteria2,
1001                                    String criteria3, String criteria4,
1002                                    String criteria5, String criteria6) {
1003        return contains(value, start, length,
1004                        new String[] { criteria1, criteria2, criteria3,
1005                                       criteria4, criteria5, criteria6 });
1006    }
1007
1008    /**
1009     * Determines whether <code>value</code> contains any of the criteria
1010     starting
1011     * at index <code>start</code> and matching up to length <code>length</code>
1012     */
1013    protected static boolean contains(String value, int start, int length,
1014                                      String[] criteria) {
1015        boolean result = false;
1016        if (start >= 0 && start + length <= value.length()) {
1017            String target = value.substring(start, start + length);
1018
1019            for (int i = 0; i < criteria.length; i++) {
1020                if (target.equals(criteria[i])) {
1021                    result = true;
1022                    break;
1023                }
1024            }
1025        }
1026        return result;
1027    }
1028
1029    //-- BEGIN INNER CLASSES --//
1030
1031    /**
1032     * Inner class for storing results, since there is the optional alternate
1033     * encoding.
1034     */
1035    public class DoubleMetaphoneResult {
1036
1037        private StringBuffer primary = new StringBuffer(getMaxCodeLen());
1038        private StringBuffer alternate = new StringBuffer(getMaxCodeLen());
1039        private int maxLength;
1040
1041        public DoubleMetaphoneResult(int maxLength) {
1042            this.maxLength = maxLength;
1043        }
1044
1045        public void append(char value) {
1046            appendPrimary(value);
1047            appendAlternate(value);
1048        }
1049
1050        public void append(char primary, char alternate) {
1051            appendPrimary(primary);
1052            appendAlternate(alternate);
1053        }
1054
1055        public void appendPrimary(char value) {
1056            if (this.primary.length() < this.maxLength) {
1057                this.primary.append(value);
1058            }
1059        }
1060
1061        public void appendAlternate(char value) {
1062            if (this.alternate.length() < this.maxLength) {
1063                this.alternate.append(value);
1064            }
1065        }
1066
1067        public void append(String value) {
1068            appendPrimary(value);
1069            appendAlternate(value);
1070        }
1071
1072        public void append(String primary, String alternate) {
1073            appendPrimary(primary);
1074            appendAlternate(alternate);
1075        }
1076
1077        public void appendPrimary(String value) {
1078            int addChars = this.maxLength - this.primary.length();
1079            if (value.length() <= addChars) {
1080                this.primary.append(value);
1081            } else {
1082                this.primary.append(value.substring(0, addChars));
1083            }
1084        }
1085
1086        public void appendAlternate(String value) {
1087            int addChars = this.maxLength - this.alternate.length();
1088            if (value.length() <= addChars) {
1089                this.alternate.append(value);
1090            } else {
1091                this.alternate.append(value.substring(0, addChars));
1092            }
1093        }
1094
1095        public String getPrimary() {
1096            return this.primary.toString();
1097        }
1098
1099        public String getAlternate() {
1100            return this.alternate.toString();
1101        }
1102
1103        public boolean isComplete() {
1104            return this.primary.length() >= this.maxLength &&
1105                this.alternate.length() >= this.maxLength;
1106        }
1107    }
1108}
1109