1f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling// =================================================================================================
2f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling// ADOBE SYSTEMS INCORPORATED
3f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling// Copyright 2006 Adobe Systems Incorporated
4f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling// All Rights Reserved
5f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling//
6f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling// NOTICE:  Adobe permits you to use, modify, and distribute this file in accordance with the terms
7f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling// of the Adobe license agreement accompanying it.
8f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling// =================================================================================================
9f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling
10f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberlingpackage com.adobe.xmp.impl;
11f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling
12f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberlingimport java.io.IOException;
13f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberlingimport java.io.InputStream;
14f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberlingimport java.io.InputStreamReader;
15f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberlingimport java.io.Reader;
16f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberlingimport java.io.StringReader;
17f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberlingimport java.io.UnsupportedEncodingException;
18f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling
19f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberlingimport javax.xml.XMLConstants;
20f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberlingimport javax.xml.parsers.DocumentBuilder;
21f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberlingimport javax.xml.parsers.DocumentBuilderFactory;
22f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberlingimport javax.xml.parsers.ParserConfigurationException;
23f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling
24f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberlingimport org.w3c.dom.Document;
25f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberlingimport org.w3c.dom.Node;
26f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberlingimport org.w3c.dom.NodeList;
27f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberlingimport org.w3c.dom.ProcessingInstruction;
28f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberlingimport org.xml.sax.InputSource;
29f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberlingimport org.xml.sax.SAXException;
30f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling
31f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberlingimport com.adobe.xmp.XMPConst;
32f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberlingimport com.adobe.xmp.XMPError;
33f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberlingimport com.adobe.xmp.XMPException;
34f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberlingimport com.adobe.xmp.XMPMeta;
35f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberlingimport com.adobe.xmp.options.ParseOptions;
36f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling
37f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling
38f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling/**
39f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling * This class replaces the <code>ExpatAdapter.cpp</code> and does the
40f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling * XML-parsing and fixes the prefix. After the parsing several normalisations
41f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling * are applied to the XMPTree.
42f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling *
43f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling * @since 01.02.2006
44f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling */
45f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberlingpublic class XMPMetaParser
46f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling{
47f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling	/**  */
48f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling	private static final Object XMP_RDF = new Object();
49f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling	/** the DOM Parser Factory, options are set */
50f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling	private static DocumentBuilderFactory factory = createDocumentBuilderFactory();
51f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling
52f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling	/**
53f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling	 * Hidden constructor, initialises the SAX parser handler.
54f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling	 */
55f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling	private XMPMetaParser()
56f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling	{
57f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling		// EMPTY
58f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling	}
59f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling
60f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling
61f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling
62f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling	/**
63f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling	 * Parses the input source into an XMP metadata object, including
64f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling	 * de-aliasing and normalisation.
65f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling	 *
66f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling	 * @param input the input can be an <code>InputStream</code>, a <code>String</code> or
67f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling	 * 			a byte buffer containing the XMP packet.
68f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling	 * @param options the parse options
69f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling	 * @return Returns the resulting XMP metadata object
70f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling	 * @throws XMPException Thrown if parsing or normalisation fails.
71f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling	 */
72f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling	public static XMPMeta parse(Object input, ParseOptions options) throws XMPException
73f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling	{
74f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling		ParameterAsserts.assertNotNull(input);
75f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling		options = options != null ? options : new ParseOptions();
76f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling
77f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling		Document document = parseXml(input, options);
78f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling
79f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling		boolean xmpmetaRequired = options.getRequireXMPMeta();
80f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling		Object[] result = new Object[3];
81f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling		result = findRootNode(document, xmpmetaRequired, result);
82f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling
83f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling		if (result != null  &&  result[1] == XMP_RDF)
84f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling		{
85f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling			XMPMetaImpl xmp = ParseRDF.parse((Node) result[0]);
86f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling			xmp.setPacketHeader((String) result[2]);
87f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling
88f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling			// Check if the XMP object shall be normalized
89f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling			if (!options.getOmitNormalization())
90f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling			{
91f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling				return XMPNormalizer.process(xmp, options);
92f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling			}
93f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling			else
94f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling			{
95f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling				return xmp;
96f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling			}
97f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling		}
98f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling		else
99f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling		{
100f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling			// no appropriate root node found, return empty metadata object
101f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling			return new XMPMetaImpl();
102f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling		}
103f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling	}
104f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling
105f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling
106f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling	/**
107f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling	 * Parses the raw XML metadata packet considering the parsing options.
108f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling	 * Latin-1/ISO-8859-1 can be accepted when the input is a byte stream
109f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling	 * (some old toolkits versions such packets). The stream is
110f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling	 * then wrapped in another stream that converts Latin-1 to UTF-8.
111f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling	 * <p>
112f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling	 * If control characters shall be fixed, a reader is used that fixes the chars to spaces
113f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling	 * (if the input is a byte stream is has to be read as character stream).
114f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling	 * <p>
115f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling	 * Both options reduce the performance of the parser.
116f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling	 *
117f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling	 * @param input the input can be an <code>InputStream</code>, a <code>String</code> or
118f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling	 * 			a byte buffer containing the XMP packet.
119f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling	 * @param options the parsing options
120f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling	 * @return Returns the parsed XML document or an exception.
121f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling	 * @throws XMPException Thrown if the parsing fails for different reasons
122f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling	 */
123f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling	private static Document parseXml(Object input, ParseOptions options)
124f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling			throws XMPException
125f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling	{
126f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling		if (input instanceof InputStream)
127f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling		{
128f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling			return parseXmlFromInputStream((InputStream) input, options);
129f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling		}
130f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling		else if (input instanceof byte[])
131f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling		{
132f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling			return parseXmlFromBytebuffer(new ByteBuffer((byte[]) input), options);
133f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling		}
134f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling		else
135f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling		{
136f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling			return parseXmlFromString((String) input, options);
137f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling		}
138f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling	}
139f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling
140f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling
141f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling	/**
142f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling	 * Parses XML from an {@link InputStream},
143f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling	 * fixing the encoding (Latin-1 to UTF-8) and illegal control character optionally.
144f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling	 *
145f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling	 * @param stream an <code>InputStream</code>
146f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling	 * @param options the parsing options
147f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling	 * @return Returns an XML DOM-Document.
148f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling	 * @throws XMPException Thrown when the parsing fails.
149f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling	 */
150f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling	private static Document parseXmlFromInputStream(InputStream stream, ParseOptions options)
151f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling			throws XMPException
152f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling	{
153f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling		if (!options.getAcceptLatin1()  &&  !options.getFixControlChars())
154f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling		{
155f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling			return parseInputSource(new InputSource(stream));
156f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling		}
157f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling		else
158f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling		{
159f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling			// load stream into bytebuffer
160f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling			try
161f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling			{
162f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling				ByteBuffer buffer = new ByteBuffer(stream);
163f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling				return parseXmlFromBytebuffer(buffer, options);
164f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling			}
165f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling			catch (IOException e)
166f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling			{
167f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling				throw new XMPException("Error reading the XML-file",
168f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling						XMPError.BADSTREAM, e);
169f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling			}
170f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling		}
171f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling	}
172f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling
173f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling
174f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling	/**
175f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling	 * Parses XML from a byte buffer,
176f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling	 * fixing the encoding (Latin-1 to UTF-8) and illegal control character optionally.
177f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling	 *
178f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling	 * @param buffer a byte buffer containing the XMP packet
179f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling	 * @param options the parsing options
180f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling	 * @return Returns an XML DOM-Document.
181f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling	 * @throws XMPException Thrown when the parsing fails.
182f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling	 */
183f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling	private static Document parseXmlFromBytebuffer(ByteBuffer buffer, ParseOptions options)
184f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling		throws XMPException
185f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling	{
186f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling		InputSource source = new InputSource(buffer.getByteStream());
187f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling		try
188f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling		{
189f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling			return parseInputSource(source);
190f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling		}
191f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling		catch (XMPException e)
192f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling		{
193f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling			if (e.getErrorCode() == XMPError.BADXML  ||
194f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling				e.getErrorCode() == XMPError.BADSTREAM)
195f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling			{
196f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling				if (options.getAcceptLatin1())
197f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling				{
198f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling					buffer = Latin1Converter.convert(buffer);
199f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling				}
200f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling
201f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling				if (options.getFixControlChars())
202f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling				{
203f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling					try
204f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling					{
205f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling						String encoding = buffer.getEncoding();
206f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling						Reader fixReader = new FixASCIIControlsReader(
207f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling							new InputStreamReader(
208f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling								buffer.getByteStream(), encoding));
209f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling						return parseInputSource(new InputSource(fixReader));
210f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling					}
211f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling					catch (UnsupportedEncodingException e1)
212f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling					{
213f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling						// can normally not happen as the encoding is provided by a util function
214f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling						throw new XMPException("Unsupported Encoding",
215f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling								XMPError.INTERNALFAILURE, e);
216f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling					}
217f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling				}
218f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling				source = new InputSource(buffer.getByteStream());
219f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling				return parseInputSource(source);
220f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling			}
221f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling			else
222f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling			{
223f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling				throw e;
224f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling			}
225f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling		}
226f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling	}
227f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling
228f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling
229f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling	/**
230f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling	 * Parses XML from a {@link String},
231f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling	 * fixing the illegal control character optionally.
232f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling	 *
233f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling	 * @param input a <code>String</code> containing the XMP packet
234f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling	 * @param options the parsing options
235f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling	 * @return Returns an XML DOM-Document.
236f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling	 * @throws XMPException Thrown when the parsing fails.
237f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling	 */
238f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling	private static Document parseXmlFromString(String input, ParseOptions options)
239f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling			throws XMPException
240f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling	{
241f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling		InputSource source = new InputSource(new StringReader(input));
242f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling		try
243f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling		{
244f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling			return parseInputSource(source);
245f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling		}
246f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling		catch (XMPException e)
247f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling		{
248f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling			if (e.getErrorCode() == XMPError.BADXML  &&  options.getFixControlChars())
249f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling			{
250f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling				source = new InputSource(new FixASCIIControlsReader(new StringReader(input)));
251f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling				return parseInputSource(source);
252f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling			}
253f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling			else
254f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling			{
255f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling				throw e;
256f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling			}
257f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling		}
258f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling	}
259f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling
260f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling
261f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling	/**
262f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling	 * Runs the XML-Parser.
263f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling	 * @param source an <code>InputSource</code>
264f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling	 * @return Returns an XML DOM-Document.
265f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling	 * @throws XMPException Wraps parsing and I/O-exceptions into an XMPException.
266f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling	 */
267f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling	private static Document parseInputSource(InputSource source) throws XMPException
268f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling	{
269f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling		try
270f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling		{
271f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling			DocumentBuilder builder = factory.newDocumentBuilder();
272f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling			builder.setErrorHandler(null);
273f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling			return builder.parse(source);
274f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling		}
275f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling		catch (SAXException e)
276f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling		{
277f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling			throw new XMPException("XML parsing failure", XMPError.BADXML, e);
278f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling		}
279f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling		catch (ParserConfigurationException e)
280f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling		{
281f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling			throw new XMPException("XML Parser not correctly configured",
282f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling					XMPError.UNKNOWN, e);
283f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling		}
284f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling		catch (IOException e)
285f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling		{
286f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling			throw new XMPException("Error reading the XML-file", XMPError.BADSTREAM, e);
287f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling		}
288f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling	}
289f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling
290f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling
291f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling	/**
292f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling	 * Find the XML node that is the root of the XMP data tree. Generally this
293f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling	 * will be an outer node, but it could be anywhere if a general XML document
294f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling	 * is parsed (e.g. SVG). The XML parser counted all rdf:RDF and
295f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling	 * pxmp:XMP_Packet nodes, and kept a pointer to the last one. If there is
296f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling	 * more than one possible root use PickBestRoot to choose among them.
297f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling	 * <p>
298f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling	 * If there is a root node, try to extract the version of the previous XMP
299f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling	 * toolkit.
300f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling	 * <p>
301f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling	 * Pick the first x:xmpmeta among multiple root candidates. If there aren't
302f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling	 * any, pick the first bare rdf:RDF if that is allowed. The returned root is
303f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling	 * the rdf:RDF child if an x:xmpmeta element was chosen. The search is
304f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling	 * breadth first, so a higher level candiate is chosen over a lower level
305f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling	 * one that was textually earlier in the serialized XML.
306f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling	 *
307f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling	 * @param root the root of the xml document
308f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling	 * @param xmpmetaRequired flag if the xmpmeta-tag is still required, might be set
309f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling	 * 		initially to <code>true</code>, if the parse option "REQUIRE_XMP_META" is set
310f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling	 * @param result The result array that is filled during the recursive process.
311f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling	 * @return Returns an array that contains the result or <code>null</code>.
312f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling	 * 		   The array contains:
313f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling	 * <ol>
314f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling	 * 		<li>the rdf:RDF-node
315f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling	 * 		<li>an object that is either XMP_RDF or XMP_PLAIN
316f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling	 * 		<li>a flag that is true if a <?xpacket..> processing instruction has been found
317f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling	 * 		<li>the body text of the xpacket-instruction.
318f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling	 * </ol>
319f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling	 *
320f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling	 */
321f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling	private static Object[] findRootNode(Node root, boolean xmpmetaRequired, Object[] result)
322f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling	{
323f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling		// Look among this parent's content for x:xapmeta or x:xmpmeta.
324f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling		// The recursion for x:xmpmeta is broader than the strictly defined choice,
325f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling		// but gives us smaller code.
326f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling		NodeList children = root.getChildNodes();
327f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling		for (int i = 0; i < children.getLength(); i++)
328f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling		{
329f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling			root = children.item(i);
330f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling			if (Node.PROCESSING_INSTRUCTION_NODE == root.getNodeType()  &&
331f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling				((ProcessingInstruction) root).getTarget() == XMPConst.XMP_PI)
332f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling			{
333f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling				// Store the processing instructions content
334f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling				if (result != null)
335f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling				{
336f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling					result[2] = ((ProcessingInstruction) root).getData();
337f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling				}
338f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling			}
339f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling			else if (Node.TEXT_NODE != root.getNodeType()  &&
340f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling				Node.PROCESSING_INSTRUCTION_NODE != root.getNodeType())
341f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling			{
342f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling				String rootNS = root.getNamespaceURI();
343f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling				String rootLocal = root.getLocalName();
344f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling				if (
345f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling						(
346f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling							XMPConst.TAG_XMPMETA.equals(rootLocal)  ||
347f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling							XMPConst.TAG_XAPMETA.equals(rootLocal)
348f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling						)  &&
349f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling						XMPConst.NS_X.equals(rootNS)
350f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling				   )
351f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling				{
352f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling					// by not passing the RequireXMPMeta-option, the rdf-Node will be valid
353f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling					return findRootNode(root, false, result);
354f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling				}
355f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling				else if (!xmpmetaRequired  &&
356f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling						"RDF".equals(rootLocal)  &&
357f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling						 XMPConst.NS_RDF.equals(rootNS))
358f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling				{
359f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling					if (result != null)
360f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling					{
361f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling						result[0] = root;
362f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling						result[1] = XMP_RDF;
363f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling					}
364f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling					return result;
365f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling				}
366f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling				else
367f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling				{
368f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling					// continue searching
369f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling					Object[] newResult = findRootNode(root, xmpmetaRequired, result);
370f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling					if (newResult != null)
371f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling					{
372f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling						return newResult;
373f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling					}
374f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling					else
375f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling					{
376f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling						continue;
377f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling					}
378f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling				}
379f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling			}
380f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling		}
381f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling
382f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling		// no appropriate node has been found
383f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling		return null;
384f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling		//     is extracted here in the C++ Toolkit
385f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling	}
386f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling
387f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling
388f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling	/**
389f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling	 * @return Creates, configures and returnes the document builder factory for
390f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling	 *         the Metadata Parser.
391f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling	 */
392f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling	private static DocumentBuilderFactory createDocumentBuilderFactory()
393f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling	{
394f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling		DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
395f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling		factory.setNamespaceAware(true);
396f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling		factory.setIgnoringComments(true);
397f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling
398f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling		try
399f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling		{
400f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling			// honor System parsing limits, e.g.
401f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling			// System.setProperty("entityExpansionLimit", "10");
402f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling			factory.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, true);
403f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling		}
404f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling		catch (Exception e)
405f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling		{
406f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling			// Ignore IllegalArgumentException and ParserConfigurationException
407f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling			// in case the configured XML-Parser does not implement the feature.
408f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling		}
409f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling		return factory;
410f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling	}
411f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling}