1// =================================================================================================
2// ADOBE SYSTEMS INCORPORATED
3// Copyright 2006 Adobe Systems Incorporated
4// All Rights Reserved
5//
6// NOTICE:  Adobe permits you to use, modify, and distribute this file in accordance with the terms
7// of the Adobe license agreement accompanying it.
8// =================================================================================================
9
10package com.adobe.xmp.impl.xpath;
11
12import com.adobe.xmp.XMPError;
13import com.adobe.xmp.XMPException;
14import com.adobe.xmp.XMPMetaFactory;
15import com.adobe.xmp.impl.Utils;
16import com.adobe.xmp.properties.XMPAliasInfo;
17
18
19/**
20 * Parser for XMP XPaths.
21 *
22 * @since   01.03.2006
23 */
24public final class XMPPathParser
25{
26	/**
27	 * Private constructor
28	 */
29	private XMPPathParser()
30	{
31		// empty
32	}
33
34
35	/**
36	 * Split an XMPPath expression apart at the conceptual steps, adding the
37	 * root namespace prefix to the first property component. The schema URI is
38	 * put in the first (0th) slot in the expanded XMPPath. Check if the top
39	 * level component is an alias, but don't resolve it.
40	 * <p>
41	 * In the most verbose case steps are separated by '/', and each step can be
42	 * of these forms:
43	 * <dl>
44	 * <dt>prefix:name
45	 * <dd> A top level property or struct field.
46	 * <dt>[index]
47	 * <dd> An element of an array.
48	 * <dt>[last()]
49	 * <dd> The last element of an array.
50	 * <dt>[fieldName=&quot;value&quot;]
51	 * <dd> An element in an array of structs, chosen by a field value.
52	 * <dt>[@xml:lang=&quot;value&quot;]
53	 * <dd> An element in an alt-text array, chosen by the xml:lang qualifier.
54	 * <dt>[?qualName=&quot;value&quot;]
55	 * <dd> An element in an array, chosen by a qualifier value.
56	 * <dt>@xml:lang
57	 * <dd> An xml:lang qualifier.
58	 * <dt>?qualName
59	 * <dd> A general qualifier.
60	 * </dl>
61	 * <p>
62	 * The logic is complicated though by shorthand for arrays, the separating
63	 * '/' and leading '*' are optional. These are all equivalent: array/*[2]
64	 * array/[2] array*[2] array[2] All of these are broken into the 2 steps
65	 * "array" and "[2]".
66	 * <p>
67	 * The value portion in the array selector forms is a string quoted by '''
68	 * or '"'. The value may contain any character including a doubled quoting
69	 * character. The value may be empty.
70	 * <p>
71	 * The syntax isn't checked, but an XML name begins with a letter or '_',
72	 * and contains letters, digits, '.', '-', '_', and a bunch of special
73	 * non-ASCII Unicode characters. An XML qualified name is a pair of names
74	 * separated by a colon.
75	 * @param schemaNS
76	 *            schema namespace
77	 * @param path
78	 *            property name
79	 * @return Returns the expandet XMPPath.
80	 * @throws XMPException
81	 *             Thrown if the format is not correct somehow.
82	 *
83	 */
84	public static XMPPath expandXPath(String schemaNS, String path) throws XMPException
85	{
86		if (schemaNS == null  ||  path == null)
87		{
88			throw new XMPException("Parameter must not be null", XMPError.BADPARAM);
89		}
90
91		XMPPath expandedXPath = new XMPPath();
92		PathPosition pos = new PathPosition();
93		pos.path = path;
94
95		// Pull out the first component and do some special processing on it: add the schema
96		// namespace prefix and and see if it is an alias. The start must be a "qualName".
97		parseRootNode(schemaNS, pos, expandedXPath);
98
99		// Now continue to process the rest of the XMPPath string.
100		while (pos.stepEnd < path.length())
101		{
102			pos.stepBegin = pos.stepEnd;
103
104			skipPathDelimiter(path, pos);
105
106			pos.stepEnd = pos.stepBegin;
107
108
109			XMPPathSegment segment;
110			if (path.charAt(pos.stepBegin) != '[')
111			{
112				// A struct field or qualifier.
113				segment = parseStructSegment(pos);
114			}
115			else
116			{
117				// One of the array forms.
118				segment = parseIndexSegment(pos);
119			}
120
121
122			if (segment.getKind() == XMPPath.STRUCT_FIELD_STEP)
123			{
124				if (segment.getName().charAt(0) == '@')
125				{
126					segment.setName("?" + segment.getName().substring(1));
127					if (!"?xml:lang".equals(segment.getName()))
128					{
129						throw new XMPException("Only xml:lang allowed with '@'",
130								XMPError.BADXPATH);
131					}
132				}
133				if (segment.getName().charAt(0) == '?')
134				{
135					pos.nameStart++;
136					segment.setKind(XMPPath.QUALIFIER_STEP);
137				}
138
139				verifyQualName(pos.path.substring(pos.nameStart, pos.nameEnd));
140			}
141			else if (segment.getKind() == XMPPath.FIELD_SELECTOR_STEP)
142			{
143				if (segment.getName().charAt(1) == '@')
144				{
145					segment.setName("[?" + segment.getName().substring(2));
146					if (!segment.getName().startsWith("[?xml:lang="))
147					{
148						throw new XMPException("Only xml:lang allowed with '@'",
149								XMPError.BADXPATH);
150					}
151				}
152
153				if (segment.getName().charAt(1) == '?')
154				{
155					pos.nameStart++;
156					segment.setKind(XMPPath.QUAL_SELECTOR_STEP);
157					verifyQualName(pos.path.substring(pos.nameStart, pos.nameEnd));
158				}
159			}
160
161			expandedXPath.add(segment);
162		}
163		return expandedXPath;
164	}
165
166
167	/**
168	 * @param path
169	 * @param pos
170	 * @throws XMPException
171	 */
172	private static void skipPathDelimiter(String path, PathPosition pos) throws XMPException
173	{
174		if (path.charAt(pos.stepBegin) == '/')
175		{
176			// skip slash
177
178			pos.stepBegin++;
179
180			// added for Java
181			if (pos.stepBegin >= path.length())
182			{
183				throw new XMPException("Empty XMPPath segment", XMPError.BADXPATH);
184			}
185		}
186
187		if (path.charAt(pos.stepBegin) == '*')
188		{
189			// skip asterisk
190
191			pos.stepBegin++;
192			if (pos.stepBegin >= path.length() || path.charAt(pos.stepBegin) != '[')
193			{
194				throw new XMPException("Missing '[' after '*'", XMPError.BADXPATH);
195			}
196		}
197	}
198
199
200	/**
201	 * Parses a struct segment
202	 * @param pos the current position in the path
203	 * @return Retusn the segment or an errror
204	 * @throws XMPException If the sement is empty
205	 */
206	private static XMPPathSegment parseStructSegment(PathPosition pos) throws XMPException
207	{
208		pos.nameStart = pos.stepBegin;
209		while (pos.stepEnd < pos.path.length() && "/[*".indexOf(pos.path.charAt(pos.stepEnd)) < 0)
210		{
211			pos.stepEnd++;
212		}
213		pos.nameEnd = pos.stepEnd;
214
215		if (pos.stepEnd == pos.stepBegin)
216		{
217			throw new XMPException("Empty XMPPath segment", XMPError.BADXPATH);
218		}
219
220		// ! Touch up later, also changing '@' to '?'.
221		XMPPathSegment segment = new XMPPathSegment(pos.path.substring(pos.stepBegin, pos.stepEnd),
222				XMPPath.STRUCT_FIELD_STEP);
223		return segment;
224	}
225
226
227	/**
228	 * Parses an array index segment.
229	 *
230	 * @param pos the xmp path
231	 * @return Returns the segment or an error
232	 * @throws XMPException thrown on xmp path errors
233	 *
234	 */
235	private static XMPPathSegment parseIndexSegment(PathPosition pos) throws XMPException
236	{
237		XMPPathSegment segment;
238		pos.stepEnd++; // Look at the character after the leading '['.
239
240		if ('0' <= pos.path.charAt(pos.stepEnd) && pos.path.charAt(pos.stepEnd) <= '9')
241		{
242			// A numeric (decimal integer) array index.
243			while (pos.stepEnd < pos.path.length() && '0' <= pos.path.charAt(pos.stepEnd)
244					&& pos.path.charAt(pos.stepEnd) <= '9')
245			{
246				pos.stepEnd++;
247			}
248
249			segment = new XMPPathSegment(null, XMPPath.ARRAY_INDEX_STEP);
250		}
251		else
252		{
253			// Could be "[last()]" or one of the selector forms. Find the ']' or '='.
254
255			while (pos.stepEnd < pos.path.length() && pos.path.charAt(pos.stepEnd) != ']'
256					&& pos.path.charAt(pos.stepEnd) != '=')
257			{
258				pos.stepEnd++;
259			}
260
261			if (pos.stepEnd >= pos.path.length())
262			{
263				throw new XMPException("Missing ']' or '=' for array index", XMPError.BADXPATH);
264			}
265
266			if (pos.path.charAt(pos.stepEnd) == ']')
267			{
268				if (!"[last()".equals(pos.path.substring(pos.stepBegin, pos.stepEnd)))
269				{
270					throw new XMPException(
271						"Invalid non-numeric array index", XMPError.BADXPATH);
272				}
273				segment = new XMPPathSegment(null, XMPPath.ARRAY_LAST_STEP);
274			}
275			else
276			{
277				pos.nameStart = pos.stepBegin + 1;
278				pos.nameEnd = pos.stepEnd;
279				pos.stepEnd++; // Absorb the '=', remember the quote.
280				char quote = pos.path.charAt(pos.stepEnd);
281				if (quote != '\'' && quote != '"')
282				{
283					throw new XMPException(
284						"Invalid quote in array selector", XMPError.BADXPATH);
285				}
286
287				pos.stepEnd++; // Absorb the leading quote.
288				while (pos.stepEnd < pos.path.length())
289				{
290					if (pos.path.charAt(pos.stepEnd) == quote)
291					{
292						// check for escaped quote
293						if (pos.stepEnd + 1 >= pos.path.length()
294								|| pos.path.charAt(pos.stepEnd + 1) != quote)
295						{
296							break;
297						}
298						pos.stepEnd++;
299					}
300					pos.stepEnd++;
301				}
302
303				if (pos.stepEnd >= pos.path.length())
304				{
305					throw new XMPException("No terminating quote for array selector",
306							XMPError.BADXPATH);
307				}
308				pos.stepEnd++; // Absorb the trailing quote.
309
310				// ! Touch up later, also changing '@' to '?'.
311				segment = new XMPPathSegment(null, XMPPath.FIELD_SELECTOR_STEP);
312			}
313		}
314
315
316		if (pos.stepEnd >= pos.path.length() || pos.path.charAt(pos.stepEnd) != ']')
317		{
318			throw new XMPException("Missing ']' for array index", XMPError.BADXPATH);
319		}
320		pos.stepEnd++;
321		segment.setName(pos.path.substring(pos.stepBegin, pos.stepEnd));
322
323		return segment;
324	}
325
326
327	/**
328	 * Parses the root node of an XMP Path, checks if namespace and prefix fit together
329	 * and resolve the property to the base property if it is an alias.
330	 * @param schemaNS the root namespace
331	 * @param pos the parsing position helper
332	 * @param expandedXPath  the path to contribute to
333	 * @throws XMPException If the path is not valid.
334	 */
335	private static void parseRootNode(String schemaNS, PathPosition pos, XMPPath expandedXPath)
336			throws XMPException
337	{
338		while (pos.stepEnd < pos.path.length() && "/[*".indexOf(pos.path.charAt(pos.stepEnd)) < 0)
339		{
340			pos.stepEnd++;
341		}
342
343		if (pos.stepEnd == pos.stepBegin)
344		{
345			throw new XMPException("Empty initial XMPPath step", XMPError.BADXPATH);
346		}
347
348		String rootProp = verifyXPathRoot(schemaNS, pos.path.substring(pos.stepBegin, pos.stepEnd));
349		XMPAliasInfo aliasInfo = XMPMetaFactory.getSchemaRegistry().findAlias(rootProp);
350		if (aliasInfo == null)
351		{
352			// add schema xpath step
353			expandedXPath.add(new XMPPathSegment(schemaNS, XMPPath.SCHEMA_NODE));
354			XMPPathSegment rootStep = new XMPPathSegment(rootProp, XMPPath.STRUCT_FIELD_STEP);
355			expandedXPath.add(rootStep);
356		}
357		else
358		{
359			// add schema xpath step and base step of alias
360			expandedXPath.add(new XMPPathSegment(aliasInfo.getNamespace(), XMPPath.SCHEMA_NODE));
361			XMPPathSegment rootStep = new XMPPathSegment(verifyXPathRoot(aliasInfo.getNamespace(),
362					aliasInfo.getPropName()),
363					XMPPath.STRUCT_FIELD_STEP);
364			rootStep.setAlias(true);
365			rootStep.setAliasForm(aliasInfo.getAliasForm().getOptions());
366			expandedXPath.add(rootStep);
367
368			if (aliasInfo.getAliasForm().isArrayAltText())
369			{
370				XMPPathSegment qualSelectorStep = new XMPPathSegment("[?xml:lang='x-default']",
371						XMPPath.QUAL_SELECTOR_STEP);
372				qualSelectorStep.setAlias(true);
373				qualSelectorStep.setAliasForm(aliasInfo.getAliasForm().getOptions());
374				expandedXPath.add(qualSelectorStep);
375			}
376			else if (aliasInfo.getAliasForm().isArray())
377			{
378				XMPPathSegment indexStep = new XMPPathSegment("[1]",
379					XMPPath.ARRAY_INDEX_STEP);
380				indexStep.setAlias(true);
381				indexStep.setAliasForm(aliasInfo.getAliasForm().getOptions());
382				expandedXPath.add(indexStep);
383			}
384		}
385	}
386
387
388	/**
389	 * Verifies whether the qualifier name is not XML conformant or the
390	 * namespace prefix has not been registered.
391	 *
392	 * @param qualName
393	 *            a qualifier name
394	 * @throws XMPException
395	 *             If the name is not conformant
396	 */
397	private static void verifyQualName(String qualName) throws XMPException
398	{
399		int colonPos = qualName.indexOf(':');
400		if (colonPos > 0)
401		{
402			String prefix = qualName.substring(0, colonPos);
403			if (Utils.isXMLNameNS(prefix))
404			{
405				String regURI = XMPMetaFactory.getSchemaRegistry().getNamespaceURI(
406						prefix);
407				if (regURI != null)
408				{
409					return;
410				}
411
412				throw new XMPException("Unknown namespace prefix for qualified name",
413						XMPError.BADXPATH);
414			}
415		}
416
417		throw new XMPException("Ill-formed qualified name", XMPError.BADXPATH);
418	}
419
420
421	/**
422	 * Verify if an XML name is conformant.
423	 *
424	 * @param name
425	 *            an XML name
426	 * @throws XMPException
427	 *             When the name is not XML conformant
428	 */
429	private static void verifySimpleXMLName(String name) throws XMPException
430	{
431		if (!Utils.isXMLName(name))
432		{
433			throw new XMPException("Bad XML name", XMPError.BADXPATH);
434		}
435	}
436
437
438	/**
439	 * Set up the first 2 components of the expanded XMPPath. Normalizes the various cases of using
440	 * the full schema URI and/or a qualified root property name. Returns true for normal
441	 * processing. If allowUnknownSchemaNS is true and the schema namespace is not registered, false
442	 * is returned. If allowUnknownSchemaNS is false and the schema namespace is not registered, an
443	 * exception is thrown
444	 * <P>
445	 * (Should someday check the full syntax:)
446	 *
447	 * @param schemaNS schema namespace
448	 * @param rootProp the root xpath segment
449	 * @return Returns root QName.
450	 * @throws XMPException Thrown if the format is not correct somehow.
451	 */
452	private static String verifyXPathRoot(String schemaNS, String rootProp)
453		throws XMPException
454	{
455		// Do some basic checks on the URI and name. Try to lookup the URI. See if the name is
456		// qualified.
457
458		if (schemaNS == null || schemaNS.length() == 0)
459		{
460			throw new XMPException(
461				"Schema namespace URI is required", XMPError.BADSCHEMA);
462		}
463
464		if ((rootProp.charAt(0) == '?') || (rootProp.charAt(0) == '@'))
465		{
466			throw new XMPException("Top level name must not be a qualifier", XMPError.BADXPATH);
467		}
468
469		if (rootProp.indexOf('/') >= 0 || rootProp.indexOf('[') >= 0)
470		{
471			throw new XMPException("Top level name must be simple", XMPError.BADXPATH);
472		}
473
474		String prefix = XMPMetaFactory.getSchemaRegistry().getNamespacePrefix(schemaNS);
475		if (prefix == null)
476		{
477			throw new XMPException("Unregistered schema namespace URI", XMPError.BADSCHEMA);
478		}
479
480		// Verify the various URI and prefix combinations. Initialize the
481		// expanded XMPPath.
482		int colonPos = rootProp.indexOf(':');
483		if (colonPos < 0)
484		{
485			// The propName is unqualified, use the schemaURI and associated
486			// prefix.
487			verifySimpleXMLName(rootProp); // Verify the part before any colon
488			return prefix + rootProp;
489		}
490		else
491		{
492			// The propName is qualified. Make sure the prefix is legit. Use the associated URI and
493			// qualified name.
494
495			// Verify the part before any colon
496			verifySimpleXMLName(rootProp.substring(0, colonPos));
497			verifySimpleXMLName(rootProp.substring(colonPos));
498
499			prefix = rootProp.substring(0, colonPos + 1);
500
501			String regPrefix = XMPMetaFactory.getSchemaRegistry().getNamespacePrefix(schemaNS);
502			if (regPrefix == null)
503			{
504				throw new XMPException("Unknown schema namespace prefix", XMPError.BADSCHEMA);
505			}
506			if (!prefix.equals(regPrefix))
507			{
508				throw new XMPException("Schema namespace URI and prefix mismatch",
509						XMPError.BADSCHEMA);
510			}
511
512			return rootProp;
513		}
514	}
515}
516
517
518
519
520
521/**
522 * This objects contains all needed char positions to parse.
523 */
524class PathPosition
525{
526	/** the complete path */
527	public String path = null;
528	/** the start of a segment name */
529	int nameStart = 0;
530	/** the end of a segment name */
531	int nameEnd = 0;
532	/** the begin of a step */
533	int stepBegin = 0;
534	/** the end of a step */
535	int stepEnd = 0;
536}
537
538