1// This file is part of TagSoup and is Copyright 2002-2008 by John Cowan.
2//
3// TagSoup is licensed under the Apache License,
4// Version 2.0.  You may obtain a copy of this license at
5// http://www.apache.org/licenses/LICENSE-2.0 .  You may also have
6// additional legal rights not granted by this license.
7//
8// TagSoup is distributed in the hope that it will be useful, but
9// unless required by applicable law or agreed to in writing, TagSoup
10// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
11// OF ANY KIND, either express or implied; not even the implied warranty
12// of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
13//
14//
15// This file is part of TagSoup.
16//
17// This program is free software; you can redistribute it and/or modify
18// it under the terms of the GNU General Public License as published by
19// the Free Software Foundation; either version 2 of the License, or
20// (at your option) any later version.  You may also distribute
21// and/or modify it under version 2.1 of the Academic Free License.
22//
23// This program is distributed in the hope that it will be useful,
24// but WITHOUT ANY WARRANTY; without even the implied warranty of
25// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
26//
27//
28// PYX Scanner
29
30package org.ccil.cowan.tagsoup;
31import java.io.*;
32import org.xml.sax.SAXException;
33
34/**
35A Scanner that accepts PYX format instead of HTML.
36Useful primarily for debugging.
37**/
38public class PYXScanner implements Scanner {
39
40        public void resetDocumentLocator(String publicid, String systemid) {
41	// Need this method for interface compatibility, but note
42	// that PyxScanner does not implement Locator.
43        }
44
45	public void scan(Reader r, ScanHandler h) throws IOException, SAXException {
46		BufferedReader br = new BufferedReader(r);
47		String s;
48		char[] buff = null;
49		boolean instag = false;
50		while ((s = br.readLine()) != null) {
51			int size = s.length();
52			if (buff == null || buff.length < size) {
53				buff = new char[size];
54				}
55			s.getChars(0, size, buff, 0);
56			switch (buff[0]) {
57			case '(':
58				if (instag) {
59					h.stagc(buff, 0, 0);
60					instag = false;
61					}
62				h.gi(buff, 1, size - 1);
63				instag = true;
64				break;
65			case ')':
66				if (instag) {
67					h.stagc(buff, 0, 0);
68					instag = false;
69					}
70				h.etag(buff, 1, size - 1);
71				break;
72			case '?':
73				if (instag) {
74					h.stagc(buff, 0, 0);
75					instag = false;
76					}
77				h.pi(buff, 1, size - 1);
78				break;
79			case 'A':
80				int sp = s.indexOf(' ');
81				h.aname(buff, 1, sp - 1);
82				h.aval(buff, sp + 1, size - sp - 1);
83				break;
84			case '-':
85				if (instag) {
86					h.stagc(buff, 0, 0);
87					instag = false;
88					}
89				if (s.equals("-\\n")) {
90					buff[0] = '\n';
91					h.pcdata(buff, 0, 1);
92					}
93				else {
94					// FIXME:
95					// Does not decode \t and \\ in input
96					h.pcdata(buff, 1, size - 1);
97					}
98				break;
99			case 'E':
100				if (instag) {
101					h.stagc(buff, 0, 0);
102					instag = false;
103					}
104				h.entity(buff, 1, size - 1);
105				break;
106			default:
107//				System.err.print("Gotcha ");
108//				System.err.print(s);
109//				System.err.print('\n');
110				break;
111				}
112			}
113		h.eof(buff, 0, 0);
114		}
115
116	public void startCDATA() { }
117
118	public static void main(String[] argv) throws IOException, SAXException {
119		Scanner s = new PYXScanner();
120		Reader r = new InputStreamReader(System.in, "UTF-8");
121		Writer w = new BufferedWriter(new OutputStreamWriter(System.out, "UTF-8"));
122		s.scan(r, new PYXWriter(w));
123		}
124	}
125