1// =================================================================================================
2// ADOBE SYSTEMS INCORPORATED
3// Copyright 2006 Adobe Systems Incorporated
4// All Rights Reserved
5//
6// NOTICE:  Adobe permits you to use, modify, and distribute this file in accordance with the terms
7// of the Adobe license agreement accompanying it.
8// =================================================================================================
9
10package com.adobe.xmp.impl;
11
12import java.io.IOException;
13import java.io.PushbackReader;
14import java.io.Reader;
15
16
17/**
18 * @since   22.08.2006
19 */
20public class FixASCIIControlsReader extends PushbackReader
21{
22	/** */
23	private static final int STATE_START = 0;
24	/** */
25	private static final int STATE_AMP = 1;
26	/** */
27	private static final int STATE_HASH = 2;
28	/** */
29	private static final int STATE_HEX = 3;
30	/** */
31	private static final int STATE_DIG1 = 4;
32	/** */
33	private static final int STATE_ERROR = 5;
34	/** */
35	private static final int BUFFER_SIZE = 8;
36	/** the state of the automaton */
37	private int state = STATE_START;
38	/** the result of the escaping sequence */
39	private int control = 0;
40	/** count the digits of the sequence */
41	private int digits = 0;
42
43	/**
44	 * The look-ahead size is 6 at maximum («)
45	 * @see PushbackReader#PushbackReader(Reader, int)
46	 * @param in a Reader
47	 */
48	public FixASCIIControlsReader(Reader in)
49	{
50		super(in, BUFFER_SIZE);
51	}
52
53
54	/**
55	 * @see Reader#read(char[], int, int)
56	 */
57	public int read(char[] cbuf, int off, int len) throws IOException
58	{
59		int readAhead = 0;
60		int read = 0;
61		int pos = off;
62		char[] readAheadBuffer = new char[BUFFER_SIZE];
63
64		boolean available = true;
65		while (available  &&  read < len)
66		{
67			available = super.read(readAheadBuffer, readAhead, 1) == 1;
68			if (available)
69			{
70				char c = processChar(readAheadBuffer[readAhead]);
71				if (state == STATE_START)
72				{
73					// replace control chars with space
74					if (Utils.isControlChar(c))
75					{
76						c = ' ';
77					}
78					cbuf[pos++] = c;
79					readAhead = 0;
80					read++;
81				}
82				else if (state == STATE_ERROR)
83				{
84					unread(readAheadBuffer, 0, readAhead + 1);
85					readAhead = 0;
86				}
87				else
88				{
89					readAhead++;
90				}
91			}
92			else if (readAhead > 0)
93			{
94				// handles case when file ends within excaped sequence
95				unread(readAheadBuffer, 0, readAhead);
96				state = STATE_ERROR;
97				readAhead = 0;
98				available = true;
99			}
100		}
101
102
103		return read > 0  ||  available ? read : -1;
104	}
105
106
107	/**
108	 * Processes numeric escaped chars to find out if they are a control character.
109	 * @param ch a char
110	 * @return Returns the char directly or as replacement for the escaped sequence.
111	 */
112	private char processChar(char ch)
113	{
114		switch (state)
115		{
116			case STATE_START:
117				if (ch == '&')
118				{
119					state = STATE_AMP;
120				}
121				return ch;
122
123			case STATE_AMP:
124				if (ch == '#')
125				{
126					state = STATE_HASH;
127				}
128				else
129				{
130					state = STATE_ERROR;
131				}
132				return ch;
133
134			case STATE_HASH:
135				if (ch == 'x')
136				{
137					control = 0;
138					digits = 0;
139					state = STATE_HEX;
140				}
141				else if ('0' <= ch  &&  ch <= '9')
142				{
143					control = Character.digit(ch, 10);
144					digits = 1;
145					state = STATE_DIG1;
146				}
147				else
148				{
149					state = STATE_ERROR;
150				}
151				return ch;
152
153			case STATE_DIG1:
154				if ('0' <= ch  &&  ch <= '9')
155				{
156					control = control * 10 + Character.digit(ch, 10);
157					digits++;
158					if (digits <= 5)
159					{
160						state = STATE_DIG1;
161					}
162					else
163					{
164						state = STATE_ERROR; // sequence too long
165					}
166				}
167				else if (ch == ';'  &&  Utils.isControlChar((char) control))
168				{
169					state = STATE_START;
170					return (char) control;
171				}
172				else
173				{
174					state = STATE_ERROR;
175				}
176				return ch;
177
178			case STATE_HEX:
179				if (('0' <= ch  &&  ch <= '9')  ||
180					('a' <= ch  &&  ch <= 'f')  ||
181					('A' <= ch  &&  ch <= 'F'))
182				{
183					control = control * 16 + Character.digit(ch, 16);
184					digits++;
185					if (digits <= 4)
186					{
187						state = STATE_HEX;
188					}
189					else
190					{
191						state = STATE_ERROR; // sequence too long
192					}
193				}
194				else if (ch == ';'  &&   Utils.isControlChar((char) control))
195				{
196					state = STATE_START;
197					return (char) control;
198				}
199				else
200				{
201					state = STATE_ERROR;
202				}
203				return ch;
204
205			case STATE_ERROR:
206				state = STATE_START;
207				return ch;
208
209			default:
210				// not reachable
211				return ch;
212		}
213	}
214}
215