1f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling// ================================================================================================= 2f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling// ADOBE SYSTEMS INCORPORATED 3f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling// Copyright 2006 Adobe Systems Incorporated 4f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling// All Rights Reserved 5f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling// 6f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling// NOTICE: Adobe permits you to use, modify, and distribute this file in accordance with the terms 7f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling// of the Adobe license agreement accompanying it. 8f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling// ================================================================================================= 9f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling 10f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberlingpackage com.adobe.xmp.impl; 11f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling 12f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberlingimport java.io.IOException; 13f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberlingimport java.io.InputStream; 14f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberlingimport java.io.InputStreamReader; 15f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberlingimport java.io.Reader; 16f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberlingimport java.io.StringReader; 17f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberlingimport java.io.UnsupportedEncodingException; 18f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling 19f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberlingimport javax.xml.XMLConstants; 20f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberlingimport javax.xml.parsers.DocumentBuilder; 21f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberlingimport javax.xml.parsers.DocumentBuilderFactory; 22f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberlingimport javax.xml.parsers.ParserConfigurationException; 23f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling 24f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberlingimport org.w3c.dom.Document; 25f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberlingimport org.w3c.dom.Node; 26f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberlingimport org.w3c.dom.NodeList; 27f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberlingimport org.w3c.dom.ProcessingInstruction; 28f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberlingimport org.xml.sax.InputSource; 29f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberlingimport org.xml.sax.SAXException; 30f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling 31f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberlingimport com.adobe.xmp.XMPConst; 32f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberlingimport com.adobe.xmp.XMPError; 33f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberlingimport com.adobe.xmp.XMPException; 34f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberlingimport com.adobe.xmp.XMPMeta; 35f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberlingimport com.adobe.xmp.options.ParseOptions; 36f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling 37f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling 38f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling/** 39f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling * This class replaces the <code>ExpatAdapter.cpp</code> and does the 40f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling * XML-parsing and fixes the prefix. After the parsing several normalisations 41f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling * are applied to the XMPTree. 42f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling * 43f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling * @since 01.02.2006 44f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling */ 45f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberlingpublic class XMPMetaParser 46f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling{ 47f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling /** */ 48f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling private static final Object XMP_RDF = new Object(); 49f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling /** the DOM Parser Factory, options are set */ 50f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling private static DocumentBuilderFactory factory = createDocumentBuilderFactory(); 51f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling 52f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling /** 53f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling * Hidden constructor, initialises the SAX parser handler. 54f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling */ 55f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling private XMPMetaParser() 56f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling { 57f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling // EMPTY 58f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling } 59f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling 60f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling 61f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling 62f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling /** 63f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling * Parses the input source into an XMP metadata object, including 64f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling * de-aliasing and normalisation. 65f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling * 66f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling * @param input the input can be an <code>InputStream</code>, a <code>String</code> or 67f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling * a byte buffer containing the XMP packet. 68f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling * @param options the parse options 69f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling * @return Returns the resulting XMP metadata object 70f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling * @throws XMPException Thrown if parsing or normalisation fails. 71f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling */ 72f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling public static XMPMeta parse(Object input, ParseOptions options) throws XMPException 73f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling { 74f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling ParameterAsserts.assertNotNull(input); 75f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling options = options != null ? options : new ParseOptions(); 76f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling 77f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling Document document = parseXml(input, options); 78f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling 79f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling boolean xmpmetaRequired = options.getRequireXMPMeta(); 80f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling Object[] result = new Object[3]; 81f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling result = findRootNode(document, xmpmetaRequired, result); 82f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling 83f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling if (result != null && result[1] == XMP_RDF) 84f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling { 85f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling XMPMetaImpl xmp = ParseRDF.parse((Node) result[0]); 86f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling xmp.setPacketHeader((String) result[2]); 87f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling 88f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling // Check if the XMP object shall be normalized 89f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling if (!options.getOmitNormalization()) 90f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling { 91f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling return XMPNormalizer.process(xmp, options); 92f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling } 93f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling else 94f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling { 95f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling return xmp; 96f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling } 97f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling } 98f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling else 99f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling { 100f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling // no appropriate root node found, return empty metadata object 101f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling return new XMPMetaImpl(); 102f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling } 103f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling } 104f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling 105f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling 106f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling /** 107f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling * Parses the raw XML metadata packet considering the parsing options. 108f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling * Latin-1/ISO-8859-1 can be accepted when the input is a byte stream 109f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling * (some old toolkits versions such packets). The stream is 110f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling * then wrapped in another stream that converts Latin-1 to UTF-8. 111f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling * <p> 112f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling * If control characters shall be fixed, a reader is used that fixes the chars to spaces 113f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling * (if the input is a byte stream is has to be read as character stream). 114f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling * <p> 115f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling * Both options reduce the performance of the parser. 116f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling * 117f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling * @param input the input can be an <code>InputStream</code>, a <code>String</code> or 118f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling * a byte buffer containing the XMP packet. 119f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling * @param options the parsing options 120f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling * @return Returns the parsed XML document or an exception. 121f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling * @throws XMPException Thrown if the parsing fails for different reasons 122f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling */ 123f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling private static Document parseXml(Object input, ParseOptions options) 124f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling throws XMPException 125f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling { 126f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling if (input instanceof InputStream) 127f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling { 128f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling return parseXmlFromInputStream((InputStream) input, options); 129f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling } 130f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling else if (input instanceof byte[]) 131f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling { 132f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling return parseXmlFromBytebuffer(new ByteBuffer((byte[]) input), options); 133f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling } 134f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling else 135f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling { 136f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling return parseXmlFromString((String) input, options); 137f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling } 138f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling } 139f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling 140f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling 141f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling /** 142f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling * Parses XML from an {@link InputStream}, 143f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling * fixing the encoding (Latin-1 to UTF-8) and illegal control character optionally. 144f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling * 145f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling * @param stream an <code>InputStream</code> 146f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling * @param options the parsing options 147f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling * @return Returns an XML DOM-Document. 148f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling * @throws XMPException Thrown when the parsing fails. 149f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling */ 150f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling private static Document parseXmlFromInputStream(InputStream stream, ParseOptions options) 151f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling throws XMPException 152f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling { 153f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling if (!options.getAcceptLatin1() && !options.getFixControlChars()) 154f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling { 155f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling return parseInputSource(new InputSource(stream)); 156f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling } 157f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling else 158f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling { 159f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling // load stream into bytebuffer 160f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling try 161f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling { 162f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling ByteBuffer buffer = new ByteBuffer(stream); 163f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling return parseXmlFromBytebuffer(buffer, options); 164f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling } 165f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling catch (IOException e) 166f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling { 167f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling throw new XMPException("Error reading the XML-file", 168f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling XMPError.BADSTREAM, e); 169f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling } 170f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling } 171f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling } 172f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling 173f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling 174f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling /** 175f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling * Parses XML from a byte buffer, 176f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling * fixing the encoding (Latin-1 to UTF-8) and illegal control character optionally. 177f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling * 178f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling * @param buffer a byte buffer containing the XMP packet 179f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling * @param options the parsing options 180f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling * @return Returns an XML DOM-Document. 181f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling * @throws XMPException Thrown when the parsing fails. 182f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling */ 183f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling private static Document parseXmlFromBytebuffer(ByteBuffer buffer, ParseOptions options) 184f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling throws XMPException 185f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling { 186f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling InputSource source = new InputSource(buffer.getByteStream()); 187f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling try 188f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling { 189f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling return parseInputSource(source); 190f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling } 191f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling catch (XMPException e) 192f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling { 193f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling if (e.getErrorCode() == XMPError.BADXML || 194f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling e.getErrorCode() == XMPError.BADSTREAM) 195f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling { 196f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling if (options.getAcceptLatin1()) 197f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling { 198f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling buffer = Latin1Converter.convert(buffer); 199f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling } 200f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling 201f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling if (options.getFixControlChars()) 202f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling { 203f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling try 204f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling { 205f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling String encoding = buffer.getEncoding(); 206f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling Reader fixReader = new FixASCIIControlsReader( 207f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling new InputStreamReader( 208f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling buffer.getByteStream(), encoding)); 209f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling return parseInputSource(new InputSource(fixReader)); 210f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling } 211f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling catch (UnsupportedEncodingException e1) 212f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling { 213f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling // can normally not happen as the encoding is provided by a util function 214f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling throw new XMPException("Unsupported Encoding", 215f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling XMPError.INTERNALFAILURE, e); 216f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling } 217f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling } 218f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling source = new InputSource(buffer.getByteStream()); 219f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling return parseInputSource(source); 220f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling } 221f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling else 222f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling { 223f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling throw e; 224f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling } 225f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling } 226f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling } 227f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling 228f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling 229f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling /** 230f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling * Parses XML from a {@link String}, 231f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling * fixing the illegal control character optionally. 232f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling * 233f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling * @param input a <code>String</code> containing the XMP packet 234f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling * @param options the parsing options 235f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling * @return Returns an XML DOM-Document. 236f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling * @throws XMPException Thrown when the parsing fails. 237f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling */ 238f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling private static Document parseXmlFromString(String input, ParseOptions options) 239f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling throws XMPException 240f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling { 241f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling InputSource source = new InputSource(new StringReader(input)); 242f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling try 243f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling { 244f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling return parseInputSource(source); 245f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling } 246f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling catch (XMPException e) 247f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling { 248f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling if (e.getErrorCode() == XMPError.BADXML && options.getFixControlChars()) 249f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling { 250f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling source = new InputSource(new FixASCIIControlsReader(new StringReader(input))); 251f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling return parseInputSource(source); 252f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling } 253f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling else 254f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling { 255f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling throw e; 256f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling } 257f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling } 258f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling } 259f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling 260f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling 261f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling /** 262f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling * Runs the XML-Parser. 263f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling * @param source an <code>InputSource</code> 264f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling * @return Returns an XML DOM-Document. 265f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling * @throws XMPException Wraps parsing and I/O-exceptions into an XMPException. 266f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling */ 267f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling private static Document parseInputSource(InputSource source) throws XMPException 268f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling { 269f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling try 270f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling { 271f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling DocumentBuilder builder = factory.newDocumentBuilder(); 272f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling builder.setErrorHandler(null); 273f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling return builder.parse(source); 274f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling } 275f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling catch (SAXException e) 276f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling { 277f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling throw new XMPException("XML parsing failure", XMPError.BADXML, e); 278f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling } 279f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling catch (ParserConfigurationException e) 280f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling { 281f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling throw new XMPException("XML Parser not correctly configured", 282f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling XMPError.UNKNOWN, e); 283f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling } 284f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling catch (IOException e) 285f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling { 286f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling throw new XMPException("Error reading the XML-file", XMPError.BADSTREAM, e); 287f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling } 288f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling } 289f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling 290f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling 291f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling /** 292f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling * Find the XML node that is the root of the XMP data tree. Generally this 293f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling * will be an outer node, but it could be anywhere if a general XML document 294f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling * is parsed (e.g. SVG). The XML parser counted all rdf:RDF and 295f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling * pxmp:XMP_Packet nodes, and kept a pointer to the last one. If there is 296f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling * more than one possible root use PickBestRoot to choose among them. 297f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling * <p> 298f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling * If there is a root node, try to extract the version of the previous XMP 299f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling * toolkit. 300f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling * <p> 301f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling * Pick the first x:xmpmeta among multiple root candidates. If there aren't 302f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling * any, pick the first bare rdf:RDF if that is allowed. The returned root is 303f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling * the rdf:RDF child if an x:xmpmeta element was chosen. The search is 304f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling * breadth first, so a higher level candiate is chosen over a lower level 305f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling * one that was textually earlier in the serialized XML. 306f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling * 307f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling * @param root the root of the xml document 308f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling * @param xmpmetaRequired flag if the xmpmeta-tag is still required, might be set 309f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling * initially to <code>true</code>, if the parse option "REQUIRE_XMP_META" is set 310f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling * @param result The result array that is filled during the recursive process. 311f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling * @return Returns an array that contains the result or <code>null</code>. 312f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling * The array contains: 313f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling * <ol> 314f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling * <li>the rdf:RDF-node 315f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling * <li>an object that is either XMP_RDF or XMP_PLAIN 316f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling * <li>a flag that is true if a <?xpacket..> processing instruction has been found 317f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling * <li>the body text of the xpacket-instruction. 318f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling * </ol> 319f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling * 320f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling */ 321f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling private static Object[] findRootNode(Node root, boolean xmpmetaRequired, Object[] result) 322f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling { 323f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling // Look among this parent's content for x:xapmeta or x:xmpmeta. 324f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling // The recursion for x:xmpmeta is broader than the strictly defined choice, 325f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling // but gives us smaller code. 326f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling NodeList children = root.getChildNodes(); 327f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling for (int i = 0; i < children.getLength(); i++) 328f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling { 329f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling root = children.item(i); 330f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling if (Node.PROCESSING_INSTRUCTION_NODE == root.getNodeType() && 331f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling ((ProcessingInstruction) root).getTarget() == XMPConst.XMP_PI) 332f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling { 333f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling // Store the processing instructions content 334f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling if (result != null) 335f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling { 336f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling result[2] = ((ProcessingInstruction) root).getData(); 337f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling } 338f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling } 339f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling else if (Node.TEXT_NODE != root.getNodeType() && 340f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling Node.PROCESSING_INSTRUCTION_NODE != root.getNodeType()) 341f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling { 342f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling String rootNS = root.getNamespaceURI(); 343f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling String rootLocal = root.getLocalName(); 344f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling if ( 345f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling ( 346f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling XMPConst.TAG_XMPMETA.equals(rootLocal) || 347f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling XMPConst.TAG_XAPMETA.equals(rootLocal) 348f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling ) && 349f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling XMPConst.NS_X.equals(rootNS) 350f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling ) 351f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling { 352f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling // by not passing the RequireXMPMeta-option, the rdf-Node will be valid 353f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling return findRootNode(root, false, result); 354f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling } 355f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling else if (!xmpmetaRequired && 356f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling "RDF".equals(rootLocal) && 357f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling XMPConst.NS_RDF.equals(rootNS)) 358f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling { 359f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling if (result != null) 360f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling { 361f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling result[0] = root; 362f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling result[1] = XMP_RDF; 363f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling } 364f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling return result; 365f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling } 366f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling else 367f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling { 368f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling // continue searching 369f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling Object[] newResult = findRootNode(root, xmpmetaRequired, result); 370f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling if (newResult != null) 371f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling { 372f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling return newResult; 373f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling } 374f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling else 375f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling { 376f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling continue; 377f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling } 378f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling } 379f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling } 380f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling } 381f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling 382f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling // no appropriate node has been found 383f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling return null; 384f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling // is extracted here in the C++ Toolkit 385f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling } 386f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling 387f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling 388f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling /** 389f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling * @return Creates, configures and returnes the document builder factory for 390f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling * the Metadata Parser. 391f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling */ 392f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling private static DocumentBuilderFactory createDocumentBuilderFactory() 393f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling { 394f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); 395f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling factory.setNamespaceAware(true); 396f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling factory.setIgnoringComments(true); 397f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling 398f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling try 399f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling { 400f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling // honor System parsing limits, e.g. 401f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling // System.setProperty("entityExpansionLimit", "10"); 402f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling factory.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, true); 403f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling } 404f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling catch (Exception e) 405f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling { 406f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling // Ignore IllegalArgumentException and ParserConfigurationException 407f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling // in case the configured XML-Parser does not implement the feature. 408f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling } 409f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling return factory; 410f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling } 411f12f744843a67c910ec325fc6dfa73988f67b97cSascha Haeberling}