1/*
2 * Copyright (C) 2013 Google Inc.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17package com.google.doclava;
18
19import java.io.*;
20import java.text.BreakIterator;
21import java.util.ArrayList;
22import java.util.Collections;
23import java.util.Comparator;
24import java.util.List;
25import java.util.regex.Pattern;
26import java.util.regex.Matcher;
27import java.io.File;
28
29import com.google.clearsilver.jsilver.data.Data;
30
31import org.ccil.cowan.tagsoup.*;
32import org.xml.sax.XMLReader;
33import org.xml.sax.InputSource;
34import org.xml.sax.Attributes;
35import org.xml.sax.helpers.DefaultHandler;
36
37import org.w3c.dom.Node;
38import org.w3c.dom.NodeList;
39
40import javax.xml.transform.dom.DOMResult;
41import javax.xml.transform.sax.SAXSource;
42import javax.xml.transform.Transformer;
43import javax.xml.transform.TransformerFactory;
44import javax.xml.xpath.XPath;
45import javax.xml.xpath.XPathConstants;
46import javax.xml.xpath.XPathExpression;
47import javax.xml.xpath.XPathFactory;
48
49/**
50* Metadata associated with a specific documentation page. Extracts
51* metadata based on the page's declared hdf vars (meta.tags and others)
52* as well as implicit data relating to the page, such as url, type, etc.
53* Includes a Node class that represents the metadata and lets it attach
54* to parent/child elements in the tree metadata nodes for all pages.
55* Node also includes methods for rendering the node tree to a json file
56* in docs output, which is then used by JavaScript to load metadata
57* objects into html pages.
58*/
59
60public class PageMetadata {
61  File mSource;
62  String mDest;
63  String mTagList;
64  static boolean sLowercaseTags = true;
65  static boolean sLowercaseKeywords = true;
66  //static String linkPrefix = (Doclava.META_DBG) ? "/" : "http://developer.android.com/";
67  /**
68   * regex pattern to match javadoc @link and similar tags. Extracts
69   * root symbol to $1.
70   */
71  private static final Pattern JD_TAG_PATTERN =
72      Pattern.compile("\\{@.*?[\\s\\.\\#]([A-Za-z\\(\\)\\d_]+)(?=\u007D)\u007D");
73
74  public PageMetadata(File source, String dest, List<Node> taglist) {
75    mSource = source;
76    mDest = dest;
77
78    if (dest != null) {
79      int len = dest.length();
80      if (len > 1 && dest.charAt(len - 1) != '/') {
81        mDest = dest + '/';
82      } else {
83        mDest = dest;
84      }
85    }
86  }
87
88  /**
89  * Given a list of metadata nodes organized by type, sort the
90  * root nodes by type name and render the types and their child
91  * metadata nodes to a json file in the out dir.
92  *
93  * @param rootTypeNodesList A list of root metadata nodes, each
94  *        representing a type and it's member child pages.
95  * @deprecated
96  */
97  public static void WriteList(List<Node> rootTypeNodesList) {
98    Collections.sort(rootTypeNodesList, BY_TYPE_NAME);
99    Node pageMeta = new Node.Builder().setLabel("TOP").setChildren(rootTypeNodesList).build();
100
101    StringBuilder buf = new StringBuilder();
102    // write the taglist to string format
103    pageMeta.renderTypeResources(buf);
104    pageMeta.renderTypesByTag(buf);
105    // write the taglist to js file
106    Data data = Doclava.makeHDF();
107    data.setValue("reference_tree", buf.toString());
108    ClearPage.write(data, "jd_lists_unified.cs",  "jd_lists_unified.js");
109  }
110
111  /**
112  * Given a list of metadata nodes organized by lang, sort the
113  * root nodes by type name and render the types and their child
114  * metadata nodes to separate lang-specific json files in the out dir.
115  *
116  * @param rootNodesList A list of root metadata nodes, each
117  *        representing a type and it's member child pages.
118  */
119  public static void WriteListByLang(List<Node> rootNodesList) {
120    Collections.sort(rootNodesList, BY_LANG_NAME);
121    for (Node n : rootNodesList) {
122      String langFilename = "";
123      String langname = n.getLang();
124      langFilename = "_" + langname;
125      Collections.sort(n.getChildren(), BY_TYPE_NAME);
126      Node pageMeta = new Node.Builder().setLabel("TOP").setChildren(n.getChildren()).build();
127
128      StringBuilder buf = new StringBuilder();
129      // write the taglist to string format
130      pageMeta.renderLangResources(buf,langname);
131      //pageMeta.renderTypesByTag(buf);
132      // write the taglist to js file
133      Data data = Doclava.makeHDF();
134      data.setValue("reference_tree", buf.toString());
135      data.setValue("metadata.lang", langname);
136      String unifiedFilename = "jd_lists_unified" + langFilename + ".js";
137      String extrasFilename = "jd_extras" + langFilename + ".js";
138      // write out jd_lists_unified for each lang
139      ClearPage.write(data, "jd_lists_unified.cs", unifiedFilename);
140      // append jd_extras to jd_lists_unified for each lang, then delete.
141      appendExtrasMetadata(extrasFilename, unifiedFilename);
142    }
143  }
144
145  /**
146  * Given a list of metadata nodes organized by lang, sort the
147  * root nodes by type name and render the types and their child
148  * samples metadata nodes only to separate lang-specific json files
149  * in the out dir. Only used by devsite (ds) builds.
150  *
151  * @param rootNodesList A list of root metadata nodes, each
152  *        representing a type and it's member child pages.
153  */
154  public static void WriteSamplesListByLang(List<Node> rootNodesList) {
155    Collections.sort(rootNodesList, BY_LANG_NAME);
156    for (Node n : rootNodesList) {
157      boolean langHasSamples = false;
158      String langFilename = "";
159      String langname = n.getLang();
160      langFilename = "_" + langname;
161      Collections.sort(n.getChildren(), BY_TYPE_NAME);
162      Node pageMeta = new Node.Builder().setLabel("TOP").setChildren(n.getChildren()).build();
163
164      StringBuilder buf = new StringBuilder();
165      // write the taglist to string format
166      langHasSamples = pageMeta.renderSamplesResources(buf,langname);
167      // write the taglist to js file
168      Data data = Doclava.makeHDF();
169      data.setValue("reference_tree", buf.toString());
170      data.setValue("metadata.lang", langname);
171
172      if (langHasSamples) {
173        data.setValue("samples_only", "1");
174        // write out jd_lists_unified for each lang
175        String unifiedFilename = "android_samples_metadata" + langFilename + ".js";
176        ClearPage.write(data, "jd_lists_unified.cs", unifiedFilename);
177      }
178    }
179  }
180
181  /**
182  * Extract supported metadata values from a page and add them as
183  * a child node of a root node based on type. Some metadata values
184  * are normalized. Unsupported metadata fields are ignored. See
185  * Node for supported metadata fields and methods for accessing values.
186  *
187  * @param docfile The file from which to extract metadata.
188  * @param dest The output path for the file, used to set link to page.
189  * @param filename The file from which to extract metadata.
190  * @param hdf Data object in which to store the metadata values.
191  * @param tagList The file from which to extract metadata.
192  */
193  public static void setPageMetadata(String docfile, String dest, String filename,
194      Data hdf, List<Node> tagList) {
195    //exclude this page if author does not want it included
196    boolean excludeNode = "true".equals(hdf.getValue("excludeFromSuggestions",""));
197
198    //check whether summary and image exist and if not, get them from itemprop/markup
199    Boolean needsSummary = "".equals(hdf.getValue("page.metaDescription", ""));
200    Boolean needsImage = "".equals(hdf.getValue("page.image", ""));
201    if ((needsSummary) || (needsImage)) {
202      //try to extract the metadata from itemprop and markup
203      inferMetadata(docfile, hdf, needsSummary, needsImage);
204    }
205
206    //extract available metadata and set it in a node
207    if (!excludeNode) {
208      Node pageMeta = new Node.Builder().build();
209      pageMeta.setLabel(getTitleNormalized(hdf, "page.title"));
210      pageMeta.setCategory(hdf.getValue("page.category",""));
211      pageMeta.setSummary(hdf.getValue("page.metaDescription",""));
212      pageMeta.setLink(getPageUrlNormalized(filename));
213      pageMeta.setGroup(getStringValueNormalized(hdf,"sample.group"));
214      pageMeta.setKeywords(getPageTagsNormalized(hdf, "page.tags"));
215      pageMeta.setTags(getPageTagsNormalized(hdf, "meta.tags"));
216      pageMeta.setImage(getImageUrlNormalized(hdf.getValue("page.image", "")));
217      pageMeta.setLang(getLangStringNormalized(hdf, filename));
218      pageMeta.setType(getStringValueNormalized(hdf, "page.type"));
219      pageMeta.setTimestamp(hdf.getValue("page.timestamp",""));
220      appendMetaNodeByLang(pageMeta, tagList);
221    }
222  }
223
224  /**
225  * Attempt to infer page metadata based on the contents of the
226  * file. Load and parse the file as a dom tree. Select values
227  * in this order: 1. dom node specifically tagged with
228  * microdata (itemprop). 2. first qualitifed p or img node.
229  *
230  * @param docfile The file from which to extract metadata.
231  * @param hdf Data object in which to store the metadata values.
232  * @param needsSummary Whether to extract summary metadata.
233  * @param needsImage Whether to extract image metadata.
234  */
235  public static void inferMetadata(String docfile, Data hdf,
236      Boolean needsSummary, Boolean needsImage) {
237    String sum = "";
238    String imageUrl = "";
239    String sumFrom = needsSummary ? "none" : "hdf";
240    String imgFrom = needsImage ? "none" : "hdf";
241    String filedata = hdf.getValue("commentText", "");
242    if (Doclava.META_DBG) System.out.println("----- " + docfile + "\n");
243
244    try {
245      XPathFactory xpathFac = XPathFactory.newInstance();
246      XPath xpath = xpathFac.newXPath();
247      InputStream inputStream = new ByteArrayInputStream(filedata.getBytes());
248      XMLReader reader = new Parser();
249      reader.setFeature(Parser.namespacesFeature, false);
250      reader.setFeature(Parser.namespacePrefixesFeature, false);
251      reader.setFeature(Parser.ignoreBogonsFeature, true);
252
253      Transformer transformer = TransformerFactory.newInstance().newTransformer();
254      DOMResult result = new DOMResult();
255      transformer.transform(new SAXSource(reader, new InputSource(inputStream)), result);
256      org.w3c.dom.Node htmlNode = result.getNode();
257
258      if (needsSummary) {
259        StringBuilder sumStrings = new StringBuilder();
260        XPathExpression ItempropDescExpr = xpath.compile("/descendant-or-self::*"
261            + "[@itemprop='description'][1]//text()[string(.)]");
262        org.w3c.dom.NodeList nodes = (org.w3c.dom.NodeList) ItempropDescExpr.evaluate(htmlNode,
263            XPathConstants.NODESET);
264        if (nodes.getLength() > 0) {
265          for (int i = 0; i < nodes.getLength(); i++) {
266            String tx = nodes.item(i).getNodeValue();
267            sumStrings.append(tx);
268            sumFrom = "itemprop";
269          }
270        } else {
271          XPathExpression FirstParaExpr = xpath.compile("//p[not(../../../"
272              + "@class='notice-developers') and not(../@class='sidebox')"
273              + "and not(@class)]//text()");
274          nodes = (org.w3c.dom.NodeList) FirstParaExpr.evaluate(htmlNode, XPathConstants.NODESET);
275          if (nodes.getLength() > 0) {
276            for (int i = 0; i < nodes.getLength(); i++) {
277              String tx = nodes.item(i).getNodeValue();
278              sumStrings.append(tx + " ");
279              sumFrom = "markup";
280            }
281          }
282        }
283        //found a summary string, now normalize it
284        sum = sumStrings.toString().trim();
285        if ((sum != null) && (!"".equals(sum))) {
286          sum = getSummaryNormalized(sum);
287        }
288        //normalized summary ended up being too short to be meaningful
289        if ("".equals(sum)) {
290           if (Doclava.META_DBG) System.out.println("Warning: description too short! ("
291            + sum.length() + "chars) ...\n\n");
292        }
293        //summary looks good, store it to the file hdf data
294        hdf.setValue("page.metaDescription", sum);
295      }
296      if (needsImage) {
297        XPathExpression ItempropImageExpr = xpath.compile("//*[@itemprop='image']/@src");
298        org.w3c.dom.NodeList imgNodes = (org.w3c.dom.NodeList) ItempropImageExpr.evaluate(htmlNode,
299            XPathConstants.NODESET);
300        if (imgNodes.getLength() > 0) {
301          imageUrl = imgNodes.item(0).getNodeValue();
302          imageUrl = getImageUrlNormalized(imageUrl);
303          imgFrom = "itemprop";
304        } else {
305          XPathExpression FirstImgExpr = xpath.compile("//img/@src");
306          imgNodes = (org.w3c.dom.NodeList) FirstImgExpr.evaluate(htmlNode, XPathConstants.NODESET);
307          if (imgNodes.getLength() > 0) {
308            //iterate nodes looking for valid image url and normalize.
309            for (int i = 0; i < imgNodes.getLength(); i++) {
310              String tx = imgNodes.item(i).getNodeValue();
311              //qualify and normalize the image
312              imageUrl = getImageUrlNormalized(tx);
313              //this img src did not qualify, keep looking...
314              if ("".equals(imageUrl)) {
315                if (Doclava.META_DBG) System.out.println("    >>>>> Discarded image: " + tx);
316                continue;
317              } else {
318                imgFrom = "markup";
319                break;
320              }
321            }
322          }
323        }
324        //img src url looks good, store it to the file hdf data
325        hdf.setValue("page.image", imageUrl);
326      }
327      if (Doclava.META_DBG) System.out.println("Image (" + imgFrom + "): " + imageUrl);
328      if (Doclava.META_DBG) System.out.println("Summary (" + sumFrom + "): " + sum.length()
329          + " chars\n\n" + sum + "\n");
330      return;
331
332    } catch (Exception e) {
333      if (Doclava.META_DBG) System.out.println("    >>>>> Exception: " + e + "\n");
334    }
335  }
336
337  /**
338  * Normalize a comma-delimited, multi-string value. Split on commas, remove
339  * quotes, trim whitespace, optionally make keywords/tags lowercase for
340  * easier matching.
341  *
342  * @param hdf Data object in which the metadata values are stored.
343  * @param tag The hdf var from which the metadata was extracted.
344  * @return A normalized string value for the specified tag.
345  */
346  public static String getPageTagsNormalized(Data hdf, String tag) {
347
348    String normTags = "";
349    StringBuilder tags = new StringBuilder();
350    String tagList = hdf.getValue(tag, "");
351    if (tag.equals("meta.tags") && (tagList.equals(""))) {
352      //use keywords as tags if no meta tags are available
353      tagList = hdf.getValue("page.tags", "");
354    }
355    if (!tagList.equals("")) {
356      tagList = tagList.replaceAll("\"", "");
357
358      String[] tagParts = tagList.split("[,\u3001]");
359      for (int iter = 0; iter < tagParts.length; iter++) {
360        tags.append("\"");
361        if (tag.equals("meta.tags") && sLowercaseTags) {
362          tagParts[iter] = tagParts[iter].toLowerCase();
363        } else if (tag.equals("page.tags") && sLowercaseKeywords) {
364          tagParts[iter] = tagParts[iter].toLowerCase();
365        }
366        if (tag.equals("meta.tags")) {
367          //tags.append("#"); //to match hashtag format used with yt/blogger resources
368          tagParts[iter] = tagParts[iter].replaceAll(" ","");
369        }
370        tags.append(tagParts[iter].trim());
371        tags.append("\"");
372        if (iter < tagParts.length - 1) {
373          tags.append(",");
374        }
375      }
376    }
377    //write this back to hdf to expose through js
378    if (tag.equals("meta.tags")) {
379      hdf.setValue(tag, tags.toString());
380    }
381    return tags.toString();
382  }
383
384  /**
385  * Normalize a string for which only a single value is supported.
386  * Extract the string up to the first comma, remove quotes, remove
387  * any forward-slash prefix, trim any whitespace, optionally make
388  * lowercase for easier matching.
389  *
390  * @param hdf Data object in which the metadata values are stored.
391  * @param tag The hdf var from which the metadata should be extracted.
392  * @return A normalized string value for the specified tag.
393  */
394  public static String getStringValueNormalized(Data hdf, String tag) {
395    StringBuilder outString =  new StringBuilder();
396    String tagList = hdf.getValue(tag, "");
397    tagList.replaceAll("\"", "");
398    if ("".equals(tagList)) {
399      return tagList;
400    } else {
401      int end = tagList.indexOf(",");
402      if (end != -1) {
403        tagList = tagList.substring(0,end);
404      }
405      tagList = tagList.startsWith("/") ? tagList.substring(1) : tagList;
406      if ("sample.group".equals(tag) && sLowercaseTags) {
407        tagList = tagList.toLowerCase();
408      }
409      outString.append(tagList.trim());
410      return outString.toString();
411    }
412  }
413
414  /**
415  * Normalize a page title. Extract the string, remove quotes, remove
416  * markup, and trim any whitespace.
417  *
418  * @param hdf Data object in which the metadata values are stored.
419  * @param tag The hdf var from which the metadata should be extracted.
420  * @return A normalized string value for the specified tag.
421  */
422  public static String getTitleNormalized(Data hdf, String tag) {
423    StringBuilder outTitle =  new StringBuilder();
424    String title = hdf.getValue(tag, "");
425    if (!title.isEmpty()) {
426      title = escapeString(title);
427      if (title.indexOf("<span") != -1) {
428        String[] splitTitle = title.split("<span(.*?)</span>");
429        title = splitTitle[0];
430        for (int j = 1; j < splitTitle.length; j++) {
431          title.concat(splitTitle[j]);
432        }
433      }
434      outTitle.append(title.trim());
435    }
436    return outTitle.toString();
437  }
438
439  /**
440  * Extract and normalize a page's language string based on the
441  * lowercased dir path. Non-supported langs are ignored and assigned
442  * the default lang string of "en".
443  *
444  * @param filename A path string to the file relative to root.
445  * @return A normalized lang value.
446  */
447  public static String getLangStringNormalized(Data data, String filename) {
448    String[] stripStr = filename.toLowerCase().split("\\/", 3);
449    String outFrag = "en";
450    String pathCanonical = filename;
451    if (stripStr.length > 0) {
452      for (String t : DocFile.DEVSITE_VALID_LANGS) {
453        if ("intl".equals(stripStr[0])) {
454          if (t.equals(stripStr[1])) {
455            outFrag = stripStr[1];
456            //extract the root url (exclusive of intl/nn)
457            pathCanonical = stripStr[2];
458            break;
459          }
460        }
461      }
462    }
463    //extract the root url (exclusive of intl/nn)
464    data.setValue("path.canonical", pathCanonical);
465    return outFrag;
466  }
467
468  /**
469  * Normalize a page summary string and truncate as needed. Strings
470  * exceeding max_chars are truncated at the first word boundary
471  * following the max_size marker. Strings smaller than min_chars
472  * are discarded (as they are assumed to be too little context).
473  *
474  * @param s String extracted from the page as it's summary.
475  * @return A normalized string value.
476  */
477  public static String getSummaryNormalized(String s) {
478    String str = "";
479    int max_chars = 250;
480    int min_chars = 50;
481    int marker = 0;
482    if (s.length() < min_chars) {
483      return str;
484    } else {
485      str = s.replaceAll("^\"|\"$", "");
486      str = str.replaceAll("\\s+", " ");
487      str = JD_TAG_PATTERN.matcher(str).replaceAll("$1");
488      str = escapeString(str);
489      BreakIterator bi = BreakIterator.getWordInstance();
490      bi.setText(str);
491      if (str.length() > max_chars) {
492        marker = bi.following(max_chars);
493      } else {
494        marker = bi.last();
495      }
496      str = str.substring(0, marker);
497      str = str.concat("\u2026" );
498    }
499    return str;
500  }
501
502  public static String escapeString(String s) {
503    s = s.replaceAll("\"", "&quot;");
504    s = s.replaceAll("\'", "&#39;");
505    s = s.replaceAll("<", "&lt;");
506    s = s.replaceAll(">", "&gt;");
507    s = s.replaceAll("/", "&#47;");
508    return s;
509  }
510
511  //Disqualify img src urls that include these substrings
512  public static String[] IMAGE_EXCLUDE = {"/triangle-", "favicon","android-logo",
513      "icon_play.png", "robot-tiny"};
514
515  public static boolean inList(String s, String[] list) {
516    for (String t : list) {
517      if (s.contains(t)) {
518        return true;
519      }
520    }
521    return false;
522  }
523
524  /**
525  * Normalize an img src url by removing docRoot and leading
526  * slash for local image references. These are added later
527  * in js to support offline mode and keep path reference
528  * format consistent with hrefs.
529  *
530  * @param url Abs or rel url sourced from img src.
531  * @return Normalized url if qualified, else empty
532  */
533  public static String getImageUrlNormalized(String url) {
534    String absUrl = "";
535    // validate to avoid choosing using specific images
536    if ((url != null) && (!url.equals("")) && (!inList(url, IMAGE_EXCLUDE))) {
537      absUrl = url.replace("{@docRoot}", "");
538      absUrl = absUrl.replaceFirst("^/(?!/)", "");
539    }
540    return absUrl;
541  }
542
543  /**
544  * Normalize an href url by removing docRoot and leading
545  * slash for local image references. These are added later
546  * in js to support offline mode and keep path reference
547  * format consistent with hrefs.
548  *
549  * @param url Abs or rel page url sourced from href
550  * @return Normalized url, either abs or rel to root
551  */
552  public static String getPageUrlNormalized(String url) {
553    String absUrl = "";
554
555    if ((url !=null) && (!url.equals(""))) {
556      absUrl = url.replace("{@docRoot}", "");
557      if (Doclava.USE_DEVSITE_LOCALE_OUTPUT_PATHS) {
558        absUrl = absUrl.replaceFirst("^en/", "");
559      }
560      absUrl = absUrl.replaceFirst("^/(?!/)", "");
561    }
562    return absUrl;
563  }
564
565  /**
566  * Given a metadata node, add it as a child of a root node based on its
567  * type. If there is no root node that matches the node's type, create one
568  * and add the metadata node as a child node.
569  *
570  * @param gNode The node to attach to a root node or add as a new root node.
571  * @param rootList The current list of root nodes.
572  * @return The updated list of root nodes.
573  */
574  public static List<Node> appendMetaNodeByLang(Node gNode, List<Node> rootList) {
575
576    String nodeLang = gNode.getLang();
577    boolean matched = false;
578    for (Node n : rootList) {
579      if (n.getLang().equals(nodeLang)) {  //find any matching lang node
580        appendMetaNodeByType(gNode,n.getChildren());
581        //n.getChildren().add(gNode);
582        matched = true;
583        break; // add to the first root node only
584      } // tag did not match
585    } // end rootnodes matching iterator
586    if (!matched) {
587      List<Node> mlangList = new ArrayList<Node>(); // list of file objects that have a given lang
588      //mlangList.add(gNode);
589      Node tnode = new Node.Builder().setChildren(mlangList).setLang(nodeLang).build();
590      rootList.add(tnode);
591      appendMetaNodeByType(gNode, mlangList);
592    }
593    return rootList;
594  }
595
596  /**
597  * Given a metadata node, add it as a child of a root node based on its
598  * type. If there is no root node that matches the node's type, create one
599  * and add the metadata node as a child node.
600  *
601  * @param gNode The node to attach to a root node or add as a new root node.
602  * @param rootList The current list of root nodes.
603  * @return The updated list of root nodes.
604  */
605  public static List<Node> appendMetaNodeByType(Node gNode, List<Node> rootList) {
606
607    String nodeTags = gNode.getType();
608    boolean matched = false;
609    for (Node n : rootList) {
610      if (n.getType().equals(nodeTags)) {  //find any matching type node
611        n.getChildren().add(gNode);
612        matched = true;
613        break; // add to the first root node only
614      } // tag did not match
615    } // end rootnodes matching iterator
616    if (!matched) {
617      List<Node> mtaglist = new ArrayList<Node>(); // list of file objects that have a given type
618      mtaglist.add(gNode);
619      Node tnode = new Node.Builder().setChildren(mtaglist).setType(nodeTags).build();
620      rootList.add(tnode);
621    }
622    return rootList;
623  }
624
625  /**
626  * Given a metadata node, add it as a child of a root node based on its
627  * tag. If there is no root node matching the tag, create one for it
628  * and add the metadata node as a child node.
629  *
630  * @param gNode The node to attach to a root node or add as a new root node.
631  * @param rootTagNodesList The current list of root nodes.
632  * @return The updated list of root nodes.
633  */
634  public static List<Node> appendMetaNodeByTagIndex(Node gNode, List<Node> rootTagNodesList) {
635
636    for (int iter = 0; iter < gNode.getChildren().size(); iter++) {
637      if (gNode.getChildren().get(iter).getTags() != null) {
638        List<String> nodeTags = gNode.getChildren().get(iter).getTags();
639        boolean matched = false;
640        for (String t : nodeTags) { //process each of the meta.tags
641          for (Node n : rootTagNodesList) {
642            if (n.getLabel().equals(t.toString())) {
643              n.getTags().add(String.valueOf(iter));
644              matched = true;
645              break; // add to the first root node only
646            } // tag did not match
647          } // end rootnodes matching iterator
648          if (!matched) {
649            List<String> mtaglist = new ArrayList<String>(); // list of objects with a given tag
650            mtaglist.add(String.valueOf(iter));
651            Node tnode = new Node.Builder().setLabel(t.toString()).setTags(mtaglist).build();
652            rootTagNodesList.add(tnode);
653          }
654        }
655      }
656    }
657    return rootTagNodesList;
658  }
659
660  /**
661  * Append the contents of jd_extras to jd_lists_unified for each language.
662  *
663  * @param extrasFilename The lang-specific extras file to append.
664  * @param unifiedFilename The lang-specific unified metadata file.
665  */
666  public static void appendExtrasMetadata (String extrasFilename, String unifiedFilename) {
667
668    File f = new File(ClearPage.outputDir + "/" + extrasFilename);
669    if (f.exists() && !f.isDirectory()) {
670      ClearPage.copyFile(true, f, unifiedFilename, true);
671    }
672  }
673
674  public static final Comparator<Node> BY_TAG_NAME = new Comparator<Node>() {
675    public int compare (Node one, Node other) {
676      return one.getLabel().compareTo(other.getLabel());
677    }
678  };
679
680  public static final Comparator<Node> BY_TYPE_NAME = new Comparator<Node>() {
681    public int compare (Node one, Node other) {
682      return one.getType().compareTo(other.getType());
683    }
684  };
685
686    public static final Comparator<Node> BY_LANG_NAME = new Comparator<Node>() {
687    public int compare (Node one, Node other) {
688      return one.getLang().compareTo(other.getLang());
689    }
690  };
691
692  /**
693  * A node for storing page metadata. Use Builder.build() to instantiate.
694  */
695  public static class Node {
696
697    private String mLabel; // holds page.title or similar identifier
698    private String mCategory; // subtabs, example 'training' 'guides'
699    private String mSummary; // Summary for card or similar use
700    private String mLink; //link href for item click
701    private String mGroup; // from sample.group in _index.jd
702    private List<String> mKeywords; // from page.tags
703    private List<String> mTags; // from meta.tags
704    private String mImage; // holds an href, fully qualified or relative to root
705    private List<Node> mChildren;
706    private String mLang;
707    private String mType; // design, develop, distribute, youtube, blog, etc
708    private String mTimestamp; // optional timestamp eg 1447452827
709
710    private Node(Builder builder) {
711      mLabel = builder.mLabel;
712      mCategory = builder.mCategory;
713      mSummary = builder.mSummary;
714      mLink = builder.mLink;
715      mGroup = builder.mGroup;
716      mKeywords = builder.mKeywords;
717      mTags = builder.mTags;
718      mImage = builder.mImage;
719      mChildren = builder.mChildren;
720      mLang = builder.mLang;
721      mType = builder.mType;
722      mTimestamp = builder.mTimestamp;
723    }
724
725    private static class Builder {
726      private String mLabel, mCategory, mSummary, mLink, mGroup, mImage, mLang, mType, mTimestamp;
727      private List<String> mKeywords = null;
728      private List<String> mTags = null;
729      private List<Node> mChildren = null;
730      public Builder setLabel(String mLabel) { this.mLabel = mLabel; return this;}
731      public Builder setCategory(String mCategory) {
732        this.mCategory = mCategory; return this;
733      }
734      public Builder setSummary(String mSummary) {this.mSummary = mSummary; return this;}
735      public Builder setLink(String mLink) {this.mLink = mLink; return this;}
736      public Builder setGroup(String mGroup) {this.mGroup = mGroup; return this;}
737      public Builder setKeywords(List<String> mKeywords) {
738        this.mKeywords = mKeywords; return this;
739      }
740      public Builder setTags(List<String> mTags) {this.mTags = mTags; return this;}
741      public Builder setImage(String mImage) {this.mImage = mImage; return this;}
742      public Builder setChildren(List<Node> mChildren) {this.mChildren = mChildren; return this;}
743      public Builder setLang(String mLang) {this.mLang = mLang; return this;}
744      public Builder setType(String mType) {this.mType = mType; return this;}
745      public Builder setTimestamp(String mTimestamp) {this.mTimestamp = mTimestamp; return this;}
746      public Node build() {return new Node(this);}
747    }
748
749    /**
750    * Render a tree of metadata nodes organized by type.
751    * @param buf Output buffer to render to.
752    */
753    void renderTypeResources(StringBuilder buf) {
754      List<Node> list = mChildren; //list of type rootnodes
755      if (list == null || list.size() == 0) {
756        buf.append("null");
757      } else {
758        final int n = list.size();
759        for (int i = 0; i < n; i++) {
760          buf.append("var " + list.get(i).mType.toUpperCase() + "_RESOURCES = [");
761          list.get(i).renderTypes(buf); //render this type's children
762          buf.append("\n];\n\n");
763        }
764      }
765    }
766
767    /**
768    * Render a tree of metadata nodes organized by lang.
769    * @param buf Output buffer to render to.
770    */
771    void renderLangResources(StringBuilder buf, String langname) {
772      List<Node> list = mChildren; //list of type rootnodes
773      if (list == null || list.size() == 0) {
774        buf.append("null");
775      } else {
776        final int n = list.size();
777        for (int i = 0; i < n; i++) {
778          buf.append("METADATA['" + langname + "']." + list.get(i).mType + " = [");
779          list.get(i).renderTypes(buf); //render this lang's children
780          buf.append("\n];\n\n");
781        }
782      }
783    }
784
785    /**
786    * Render a tree of metadata nodes of type 'develop' to extract
787    * samples metadata. Only used by devsite (ds) builds.
788    * @param buf Output buffer to render to.
789    * @return true if samples were rendered to buf
790    */
791    boolean renderSamplesResources(StringBuilder buf, String langname) {
792      boolean langHasSamples = false;
793      List<Node> list = mChildren; //list of type rootnodes
794      if (list == null || list.size() == 0) {
795        buf.append("null");
796      } else {
797        final int n = list.size();
798        for (int i = 0; i < n; i++) {
799          //samples are always in type 'develop', so restrict
800          if ("develop".equals(list.get(i).mType)) {
801            //render this type's children
802            langHasSamples = list.get(i).renderTypeForSamples(buf);
803          }
804        }
805      }
806      return langHasSamples;
807    }
808
809    /**
810    * Render all metadata nodes for a specific type.
811    * @param buf Output buffer to render to.
812    */
813    void renderTypes(StringBuilder buf) {
814      List<Node> list = mChildren;
815      if (list == null || list.size() == 0) {
816        buf.append("nulltype");
817      } else {
818        final int n = list.size();
819        for (int i = 0; i < n; i++) {
820          buf.append("\n      {\n");
821          buf.append("        \"title\":\"");
822          renderStrWithUcs(buf, list.get(i).mLabel);
823          buf.append("\",\n" );
824          buf.append("        \"summary\":\"");
825          renderStrWithUcs(buf, list.get(i).mSummary);
826          buf.append("\",\n" );
827          buf.append("        \"url\":\"" + list.get(i).mLink + "\",\n" );
828          if (!"".equals(list.get(i).mImage)) {
829            buf.append("        \"image\":\"" + list.get(i).mImage + "\",\n" );
830          }
831          if (!"".equals(list.get(i).mGroup)) {
832            buf.append("        \"group\":\"");
833            renderStrWithUcs(buf, list.get(i).mGroup);
834            buf.append("\",\n" );
835          }
836          if (!"".equals(list.get(i).mCategory)) {
837            buf.append("        \"category\":\"" + list.get(i).mCategory + "\",\n" );
838          }
839          if ((list.get(i).mType != null) && (list.get(i).mType != "")) {
840            buf.append("        \"type\":\"" + list.get(i).mType + "\",\n");
841          }
842          list.get(i).renderArrayType(buf, list.get(i).mKeywords, "keywords");
843          list.get(i).renderArrayType(buf, list.get(i).mTags, "tags");
844          if (!"".equals(list.get(i).mTimestamp)) {
845            buf.append("        \"timestamp\":\"" + list.get(i).mTimestamp + "\",\n");
846          }
847          buf.append("        \"lang\":\"" + list.get(i).mLang + "\"" );
848          buf.append("\n      }");
849          if (i != n - 1) {
850            buf.append(", ");
851          }
852        }
853      }
854    }
855
856    /**
857    * Render all metadata nodes for samples only.
858    * Only used by devsite (ds) builds.
859    * @param buf Output buffer to render to.
860    * @return whether any samples were rendered to buf
861    */
862    boolean renderTypeForSamples(StringBuilder buf) {
863      boolean typeHasSamples = false;
864      List<Node> list = mChildren;
865      if (list == null || list.size() == 0) {
866        buf.append("nulltype");
867      } else {
868        final int n = list.size();
869        for (int i = 0; i < n; i++) {
870          // valid samples must have category 'samples'
871          if ("samples".equals(list.get(i).mCategory)) {
872            typeHasSamples = true;
873            buf.append("\n      {\n");
874            buf.append("        \"title\":\"");
875            renderStrWithUcs(buf, list.get(i).mLabel);
876            buf.append("\",\n" );
877            buf.append("        \"summary\":\"");
878            renderStrWithUcs(buf, list.get(i).mSummary);
879            buf.append("\",\n" );
880            buf.append("        \"url\":\"" + list.get(i).mLink + "\",\n" );
881            if (!"".equals(list.get(i).mImage)) {
882              buf.append("        \"image\":\"" + list.get(i).mImage + "\",\n" );
883            }
884            if (!"".equals(list.get(i).mGroup)) {
885              buf.append("        \"group\":\"");
886              renderStrWithUcs(buf, list.get(i).mGroup);
887              buf.append("\",\n" );
888            }
889            if (!"".equals(list.get(i).mCategory)) {
890              buf.append("        \"category\":\"" + list.get(i).mCategory + "\",\n" );
891            }
892            if ((list.get(i).mType != null) && (list.get(i).mType != "")) {
893              buf.append("        \"type\":\"" + list.get(i).mType + "\",\n");
894            }
895            list.get(i).renderArrayType(buf, list.get(i).mKeywords, "keywords");
896            list.get(i).renderArrayType(buf, list.get(i).mTags, "tags");
897            if (!"".equals(list.get(i).mTimestamp)) {
898              buf.append("        \"timestamp\":\"" + list.get(i).mTimestamp + "\",\n");
899            }
900            buf.append("        \"lang\":\"" + list.get(i).mLang + "\"" );
901            buf.append("\n      }");
902            if (i != n - 1) {
903              buf.append(", ");
904            }
905          }
906        }
907      }
908      return typeHasSamples;
909    }
910
911    /**
912    * Build and render a list of tags associated with each type.
913    * @param buf Output buffer to render to.
914    */
915    void renderTypesByTag(StringBuilder buf) {
916      List<Node> list = mChildren; //list of rootnodes
917      if (list == null || list.size() == 0) {
918        buf.append("null");
919      } else {
920        final int n = list.size();
921        for (int i = 0; i < n; i++) {
922        buf.append("var " + list.get(i).mType.toUpperCase() + "_BY_TAG = {");
923        List<Node> mTagList = new ArrayList(); //list of rootnodes
924        mTagList = appendMetaNodeByTagIndex(list.get(i), mTagList);
925        list.get(i).renderTagIndices(buf, mTagList);
926          buf.append("\n};\n\n");
927        }
928      }
929    }
930
931    /**
932    * Render a list of tags associated with a type, including the
933    * tag's indices in the type array.
934    * @param buf Output buffer to render to.
935    * @param tagList Node tree of types to render.
936    */
937    void renderTagIndices(StringBuilder buf, List<Node> tagList) {
938      List<Node> list = tagList;
939      if (list == null || list.size() == 0) {
940        buf.append("");
941      } else {
942        final int n = list.size();
943        for (int i = 0; i < n; i++) {
944          buf.append("\n    " + list.get(i).mLabel + ":[");
945          renderArrayValue(buf, list.get(i).mTags);
946          buf.append("]");
947          if (i != n - 1) {
948            buf.append(", ");
949          }
950        }
951      }
952    }
953
954    /**
955    * Render key:arrayvalue pair.
956    * @param buf Output buffer to render to.
957    * @param type The list value to render as an arrayvalue.
958    * @param key The key for the pair.
959    */
960    void renderArrayType(StringBuilder buf, List<String> type, String key) {
961      buf.append("        \"" + key + "\": [");
962      renderArrayValue(buf, type);
963      buf.append("],\n");
964    }
965
966    /**
967    * Render an array value to buf, with special handling of unicode characters.
968    * @param buf Output buffer to render to.
969    * @param type The list value to render as an arrayvalue.
970    */
971    void renderArrayValue(StringBuilder buf, List<String> type) {
972      List<String> list = type;
973      if (list != null) {
974        final int n = list.size();
975        for (int i = 0; i < n; i++) {
976          String tagval = list.get(i).toString();
977          renderStrWithUcs(buf,tagval);
978          if (i != n - 1) {
979            buf.append(",");
980          }
981        }
982      }
983    }
984
985    /**
986    * Render a string that can include ucs2 encoded characters.
987    * @param buf Output buffer to render to.
988    * @param chars String to append to buf with any necessary encoding
989    */
990    void renderStrWithUcs(StringBuilder buf, String chars) {
991      String strval = chars;
992      final int L = strval.length();
993      for (int t = 0; t < L; t++) {
994        char c = strval.charAt(t);
995        if (c >= Character.MIN_HIGH_SURROGATE && c <= Character.MAX_HIGH_SURROGATE ) {
996          // we have a UTF-16 multi-byte character
997          int codePoint = strval.codePointAt(t);
998          int charSize = Character.charCount(codePoint);
999          t += charSize - 1;
1000          buf.append(String.format("\\u%04x",codePoint));
1001        } else if (c >= ' ' && c <= '~' && c != '\\') {
1002          buf.append(c);
1003        } else {
1004          // we are encoding a two byte character
1005          buf.append(String.format("\\u%04x", (int) c));
1006        }
1007      }
1008    }
1009
1010    public String getLabel() {
1011      return mLabel;
1012    }
1013
1014    public void setLabel(String label) {
1015       mLabel = label;
1016    }
1017
1018    public String getCategory() {
1019      return mCategory;
1020    }
1021
1022    public void setCategory(String title) {
1023       mCategory = title;
1024    }
1025
1026    public String getSummary() {
1027      return mSummary;
1028    }
1029
1030    public void setSummary(String summary) {
1031       mSummary = summary;
1032    }
1033
1034    public String getLink() {
1035      return mLink;
1036    }
1037
1038    public void setLink(String ref) {
1039       mLink = ref;
1040    }
1041
1042    public String getGroup() {
1043      return mGroup;
1044    }
1045
1046    public void setGroup(String group) {
1047      mGroup = group;
1048    }
1049
1050    public List<String> getTags() {
1051        return mTags;
1052    }
1053
1054    public void setTags(String tags) {
1055      if ("".equals(tags)) {
1056        mTags = null;
1057      } else {
1058        List<String> tagList = new ArrayList();
1059        String[] tagParts = tags.split(",");
1060
1061        for (String t : tagParts) {
1062          tagList.add(t);
1063        }
1064        mTags = tagList;
1065      }
1066    }
1067
1068    public List<String> getKeywords() {
1069        return mKeywords;
1070    }
1071
1072    public void setKeywords(String keywords) {
1073      if ("".equals(keywords)) {
1074        mKeywords = null;
1075      } else {
1076        List<String> keywordList = new ArrayList();
1077        String[] keywordParts = keywords.split(",");
1078
1079        for (String k : keywordParts) {
1080          keywordList.add(k);
1081        }
1082        mKeywords = keywordList;
1083      }
1084    }
1085
1086    public String getImage() {
1087        return mImage;
1088    }
1089
1090    public void setImage(String ref) {
1091       mImage = ref;
1092    }
1093
1094    public List<Node> getChildren() {
1095        return mChildren;
1096    }
1097
1098    public void setChildren(List<Node> node) {
1099        mChildren = node;
1100    }
1101
1102    public String getLang() {
1103      return mLang;
1104    }
1105
1106    public void setLang(String lang) {
1107      mLang = lang;
1108    }
1109
1110    public String getType() {
1111      return mType;
1112    }
1113
1114    public String getTimestamp() {
1115      return mTimestamp;
1116    }
1117
1118    public void setType(String type) {
1119      mType = type;
1120    }
1121
1122    public void setTimestamp(String timestamp) {
1123      mTimestamp = timestamp;
1124    }
1125  }
1126}
1127