1/*
2 * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.  Oracle designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Oracle in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
23 * questions.
24 */
25
26package java.lang;
27
28import java.util.NoSuchElementException;
29import java.util.PrimitiveIterator;
30import java.util.Spliterator;
31import java.util.Spliterators;
32import java.util.function.IntConsumer;
33import java.util.stream.IntStream;
34import java.util.stream.StreamSupport;
35
36/**
37 * A <tt>CharSequence</tt> is a readable sequence of <code>char</code> values. This
38 * interface provides uniform, read-only access to many different kinds of
39 * <code>char</code> sequences.
40 * A <code>char</code> value represents a character in the <i>Basic
41 * Multilingual Plane (BMP)</i> or a surrogate. Refer to <a
42 * href="Character.html#unicode">Unicode Character Representation</a> for details.
43 *
44 * <p> This interface does not refine the general contracts of the {@link
45 * java.lang.Object#equals(java.lang.Object) equals} and {@link
46 * java.lang.Object#hashCode() hashCode} methods.  The result of comparing two
47 * objects that implement <tt>CharSequence</tt> is therefore, in general,
48 * undefined.  Each object may be implemented by a different class, and there
49 * is no guarantee that each class will be capable of testing its instances
50 * for equality with those of the other.  It is therefore inappropriate to use
51 * arbitrary <tt>CharSequence</tt> instances as elements in a set or as keys in
52 * a map. </p>
53 *
54 * @author Mike McCloskey
55 * @since 1.4
56 * @spec JSR-51
57 */
58
59public interface CharSequence {
60
61    /**
62     * Returns the length of this character sequence.  The length is the number
63     * of 16-bit <code>char</code>s in the sequence.</p>
64     *
65     * @return  the number of <code>char</code>s in this sequence
66     */
67    int length();
68
69    /**
70     * Returns the <code>char</code> value at the specified index.  An index ranges from zero
71     * to <tt>length() - 1</tt>.  The first <code>char</code> value of the sequence is at
72     * index zero, the next at index one, and so on, as for array
73     * indexing. </p>
74     *
75     * <p>If the <code>char</code> value specified by the index is a
76     * <a href="{@docRoot}/java/lang/Character.html#unicode">surrogate</a>, the surrogate
77     * value is returned.
78     *
79     * @param   index   the index of the <code>char</code> value to be returned
80     *
81     * @return  the specified <code>char</code> value
82     *
83     * @throws  IndexOutOfBoundsException
84     *          if the <tt>index</tt> argument is negative or not less than
85     *          <tt>length()</tt>
86     */
87    char charAt(int index);
88
89    /**
90     * Returns a new <code>CharSequence</code> that is a subsequence of this sequence.
91     * The subsequence starts with the <code>char</code> value at the specified index and
92     * ends with the <code>char</code> value at index <tt>end - 1</tt>.  The length
93     * (in <code>char</code>s) of the
94     * returned sequence is <tt>end - start</tt>, so if <tt>start == end</tt>
95     * then an empty sequence is returned. </p>
96     *
97     * @param   start   the start index, inclusive
98     * @param   end     the end index, exclusive
99     *
100     * @return  the specified subsequence
101     *
102     * @throws  IndexOutOfBoundsException
103     *          if <tt>start</tt> or <tt>end</tt> are negative,
104     *          if <tt>end</tt> is greater than <tt>length()</tt>,
105     *          or if <tt>start</tt> is greater than <tt>end</tt>
106     */
107    CharSequence subSequence(int start, int end);
108
109    /**
110     * Returns a string containing the characters in this sequence in the same
111     * order as this sequence.  The length of the string will be the length of
112     * this sequence. </p>
113     *
114     * @return  a string consisting of exactly this sequence of characters
115     */
116    public String toString();
117
118    /**
119     * Returns a stream of {@code int} zero-extending the {@code char} values
120     * from this sequence.  Any char which maps to a <a
121     * href="{@docRoot}/java/lang/Character.html#unicode">surrogate code
122     * point</a> is passed through uninterpreted.
123     *
124     * <p>If the sequence is mutated while the stream is being read, the
125     * result is undefined.
126     *
127     * @return an IntStream of char values from this sequence
128     * @since 1.8
129     */
130    public default IntStream chars() {
131        class CharIterator implements PrimitiveIterator.OfInt {
132            int cur = 0;
133
134            public boolean hasNext() {
135                return cur < length();
136            }
137
138            public int nextInt() {
139                if (hasNext()) {
140                    return charAt(cur++);
141                } else {
142                    throw new NoSuchElementException();
143                }
144            }
145
146            @Override
147            public void forEachRemaining(IntConsumer block) {
148                for (; cur < length(); cur++) {
149                    block.accept(charAt(cur));
150                }
151            }
152        }
153
154        return StreamSupport.intStream(() ->
155                Spliterators.spliterator(
156                        new CharIterator(),
157                        length(),
158                        Spliterator.ORDERED),
159                Spliterator.SUBSIZED | Spliterator.SIZED | Spliterator.ORDERED,
160                false);
161    }
162
163    /**
164     * Returns a stream of code point values from this sequence.  Any surrogate
165     * pairs encountered in the sequence are combined as if by {@linkplain
166     * Character#toCodePoint Character.toCodePoint} and the result is passed
167     * to the stream. Any other code units, including ordinary BMP characters,
168     * unpaired surrogates, and undefined code units, are zero-extended to
169     * {@code int} values which are then passed to the stream.
170     *
171     * <p>If the sequence is mutated while the stream is being read, the result
172     * is undefined.
173     *
174     * @return an IntStream of Unicode code points from this sequence
175     * @since 1.8
176     */
177    public default IntStream codePoints() {
178        class CodePointIterator implements PrimitiveIterator.OfInt {
179            int cur = 0;
180
181            @Override
182            public void forEachRemaining(IntConsumer block) {
183                final int length = length();
184                int i = cur;
185                try {
186                    while (i < length) {
187                        char c1 = charAt(i++);
188                        if (!Character.isHighSurrogate(c1) || i >= length) {
189                            block.accept(c1);
190                        } else {
191                            char c2 = charAt(i);
192                            if (Character.isLowSurrogate(c2)) {
193                                i++;
194                                block.accept(Character.toCodePoint(c1, c2));
195                            } else {
196                                block.accept(c1);
197                            }
198                        }
199                    }
200                } finally {
201                    cur = i;
202                }
203            }
204
205            public boolean hasNext() {
206                return cur < length();
207            }
208
209            public int nextInt() {
210                final int length = length();
211
212                if (cur >= length) {
213                    throw new NoSuchElementException();
214                }
215                char c1 = charAt(cur++);
216                if (Character.isHighSurrogate(c1) && cur < length) {
217                    char c2 = charAt(cur);
218                    if (Character.isLowSurrogate(c2)) {
219                        cur++;
220                        return Character.toCodePoint(c1, c2);
221                    }
222                }
223                return c1;
224            }
225        }
226
227        return StreamSupport.intStream(() ->
228                Spliterators.spliteratorUnknownSize(
229                        new CodePointIterator(),
230                        Spliterator.ORDERED),
231                Spliterator.ORDERED,
232                false);
233    }
234}
235