PageMetadata.java revision 2a9088f83fa9b12100b231944d3a9ad371e42c28
1/*
2 * Copyright (C) 2013 Google Inc.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17package com.google.doclava;
18
19import java.util.ArrayList;
20import java.util.Collections;
21import java.util.Comparator;
22import java.util.List;
23import java.util.regex.Pattern;
24import java.util.regex.Matcher;
25import java.io.File;
26
27import com.google.clearsilver.jsilver.data.Data;
28
29/**
30* Metadata associated with a specific documentation page. Extracts
31* metadata based on the page's declared hdf vars (meta.tags and others)
32* as well as implicit data relating to the page, such as url, type, etc.
33* Includes a Node class that represents the metadata and lets it attach
34* to parent/child elements in the tree metadata nodes for all pages.
35* Node also includes methods for rendering the node tree to a json file
36* in docs output, which is then used by JavaScript to load metadata
37* objects into html pages.
38*/
39
40public class PageMetadata {
41  File mSource;
42  String mDest;
43  String mTagList;
44  static boolean sLowercaseTags = true;
45  static boolean sLowercaseKeywords = true;
46
47  public PageMetadata(File source, String dest, List<Node> taglist) {
48    mSource = source;
49    mDest = dest;
50
51    if (dest != null) {
52      int len = dest.length();
53      if (len > 1 && dest.charAt(len - 1) != '/') {
54        mDest = dest + '/';
55      } else {
56        mDest = dest;
57      }
58    }
59  }
60
61  /**
62  * Given a list of metadata nodes organized by type, sort the
63  * root nodes by type name and render the types and their child
64  * metadata nodes to a json file in the out dir.
65  *
66  * @param rootTypeNodesList A list of root metadata nodes, each
67  *        representing a type and it's member child pages.
68  */
69  public static void WriteList(List<Node> rootTypeNodesList) {
70
71    Collections.sort(rootTypeNodesList, BY_TYPE_NAME);
72    Node pageMeta = new Node.Builder().setLabel("TOP").setChildren(rootTypeNodesList).build();
73
74    StringBuilder buf = new StringBuilder();
75    // write the taglist to string format
76    pageMeta.renderTypeResources(buf);
77    pageMeta.renderTypesByTag(buf);
78    // write the taglist to js file
79    Data data = Doclava.makeHDF();
80    data.setValue("reference_tree", buf.toString());
81    ClearPage.write(data, "jd_lists_unified.cs", "jd_lists_unified.js");
82  }
83
84  /**
85  * Extract supported metadata values from a page and add them as
86  * a child node of a root node based on type. Some metadata values
87  * are normalized. Unsupported metadata fields are ignored. See
88  * Node for supported metadata fields and methods for accessing values.
89  *
90  * @param file The file from which to extract metadata.
91  * @param dest The output path for the file, used to set link to page.
92  * @param filename The file from which to extract metadata.
93  * @param hdf Data object in which to store the metadata values.
94  * @param tagList The file from which to extract metadata.
95  * @return tagList with new node added.
96  */
97  public static List<Node> setPageMetadata(File file, String dest, String filename,
98      Data hdf, List<Node> tagList) {
99    //exclude this page if author does not want it included
100    boolean excludeNode = "true".equals(hdf.getValue("excludeFromSuggestions",""));
101    if (!excludeNode) {
102      Node pageMeta = new Node.Builder().build();
103      pageMeta.setLabel(getTitleNormalized(hdf, "page.title"));
104      pageMeta.setTitleFriendly(hdf.getValue("page.titleFriendly",""));
105      pageMeta.setSummary(hdf.getValue("page.summary",""));
106      pageMeta.setLink(filename);
107      pageMeta.setGroup(getStringValueNormalized(hdf,"sample.group"));
108      pageMeta.setKeywords(getPageTagsNormalized(hdf, "page.tags"));
109      pageMeta.setTags(getPageTagsNormalized(hdf, "meta.tags"));
110      pageMeta.setImage(getStringValueNormalized(hdf, "page.image"));
111      pageMeta.setLang(getLangStringNormalized(filename));
112      pageMeta.setType(getStringValueNormalized(hdf, "page.type"));
113      appendMetaNodeByType(pageMeta, tagList);
114    }
115    return tagList;
116  }
117
118  /**
119  * Normalize a comma-delimited, multi-string value. Split on commas, remove
120  * quotes, trim whitespace, optionally make keywords/tags lowercase for
121  * easier matching.
122  *
123  * @param hdf Data object in which the metadata values are stored.
124  * @param tag The hdf var from which the metadata was extracted.
125  * @return A normalized string value for the specified tag.
126  */
127  public static String getPageTagsNormalized(Data hdf, String tag) {
128
129    StringBuilder tags = new StringBuilder();
130    String tagList = hdf.getValue(tag, "");
131    if (!tagList.equals("")) {
132      tagList = tagList.replaceAll("\"", "");
133      String[] tagParts = tagList.split(",");
134      for (int iter = 0; iter < tagParts.length; iter++) {
135        tags.append("'");
136        if (tag.equals("meta.tags") && sLowercaseTags) {
137          tagParts[iter] = tagParts[iter].toLowerCase();
138        } else if (tag.equals("page.tags") && sLowercaseKeywords) {
139          tagParts[iter] = tagParts[iter].toLowerCase();
140        }
141        tags.append(tagParts[iter].trim());
142        tags.append("'");
143        if (iter < tagParts.length - 1) {
144          tags.append(",");
145        }
146      }
147    }
148    return tags.toString();
149  }
150
151  /**
152  * Normalize a string for which only a single value is supported.
153  * Extract the string up to the first comma, remove quotes, remove
154  * any forward-slash prefix, trim any whitespace, optionally make
155  * lowercase for easier matching.
156  *
157  * @param hdf Data object in which the metadata values are stored.
158  * @param tag The hdf var from which the metadata should be extracted.
159  * @return A normalized string value for the specified tag.
160  */
161  public static String getStringValueNormalized(Data hdf, String tag) {
162    StringBuilder outString =  new StringBuilder();
163    String tagList = hdf.getValue(tag, "");
164    if (!tagList.isEmpty()) {
165      tagList.replaceAll("\"", "");
166      int end = tagList.indexOf(",");
167      if (end != -1) {
168        tagList = tagList.substring(0,end);
169      }
170      tagList = tagList.startsWith("/") ? tagList.substring(1) : tagList;
171      if ("sample.group".equals(tag) && sLowercaseTags) {
172        tagList = tagList.toLowerCase();
173      }
174      outString.append(tagList.trim());
175    }
176    return outString.toString();
177  }
178
179  /**
180  * Normalize a page title. Extract the string, remove quotes, remove
181  * markup, and trim any whitespace.
182  *
183  * @param hdf Data object in which the metadata values are stored.
184  * @param tag The hdf var from which the metadata should be extracted.
185  * @return A normalized string value for the specified tag.
186  */
187  public static String getTitleNormalized(Data hdf, String tag) {
188    StringBuilder outTitle =  new StringBuilder();
189    String title = hdf.getValue(tag, "");
190    if (!title.isEmpty()) {
191      title = title.replaceAll("\"", "'");
192      if (title.indexOf("<span") != -1) {
193        String[] splitTitle = title.split("<span(.*?)</span>");
194        title = splitTitle[0];
195        for (int j = 1; j < splitTitle.length; j++) {
196          title.concat(splitTitle[j]);
197        }
198      }
199      outTitle.append(title.trim());
200    }
201    return outTitle.toString();
202  }
203
204  /**
205  * Extract and normalize a page's language string based on the
206  * lowercased dir path. Non-supported langs are ignored and assigned
207  * the default lang string of "en".
208  *
209  * @param filename A path string to the file relative to root.
210  * @return A normalized lang value.
211  */
212  public static String getLangStringNormalized(String filename) {
213    String[] stripStr = filename.toLowerCase().split("\\/");
214    String outFrag = "en";
215    if (stripStr.length > 0) {
216      for (String t : DocFile.DEVSITE_VALID_LANGS) {
217        if ("intl".equals(stripStr[0])) {
218          if (t.equals(stripStr[1])) {
219            outFrag = stripStr[1];
220            break;
221          }
222        }
223      }
224    }
225    return outFrag;
226  }
227
228  /**
229  * Given a metadata node, add it as a child of a root node based on its
230  * type. If there is no root node that matches the node's type, create one
231  * and add the metadata node as a child node.
232  *
233  * @param gNode The node to attach to a root node or add as a new root node.
234  * @param rootList The current list of root nodes.
235  * @return The updated list of root nodes.
236  */
237  public static List<Node> appendMetaNodeByType(Node gNode, List<Node> rootList) {
238
239    String nodeTags = gNode.getType();
240    boolean matched = false;
241    for (Node n : rootList) {
242      if (n.getType().equals(nodeTags)) {  //find any matching type node
243        n.getChildren().add(gNode);
244        matched = true;
245        break; // add to the first root node only
246      } // tag did not match
247    } // end rootnodes matching iterator
248    if (!matched) {
249      List<Node> mtaglist = new ArrayList<Node>(); // list of file objects that have a given type
250      mtaglist.add(gNode);
251      Node tnode = new Node.Builder().setChildren(mtaglist).setType(nodeTags).build();
252      rootList.add(tnode);
253    }
254    return rootList;
255  }
256
257  /**
258  * Given a metadata node, add it as a child of a root node based on its
259  * tag. If there is no root node matching the tag, create one for it
260  * and add the metadata node as a child node.
261  *
262  * @param gNode The node to attach to a root node or add as a new root node.
263  * @param rootTagNodesList The current list of root nodes.
264  * @return The updated list of root nodes.
265  */
266  public static List<Node> appendMetaNodeByTagIndex(Node gNode, List<Node> rootTagNodesList) {
267
268    for (int iter = 0; iter < gNode.getChildren().size(); iter++) {
269      if (gNode.getChildren().get(iter).getTags() != null) {
270        List<String> nodeTags = gNode.getChildren().get(iter).getTags();
271        boolean matched = false;
272        for (String t : nodeTags) { //process each of the meta.tags
273          for (Node n : rootTagNodesList) {
274            if (n.getLabel().equals(t.toString())) {
275              matched = true;
276              break; // add to the first root node only
277            } // tag did not match
278          } // end rootnodes matching iterator
279          if (!matched) {
280            List<String> mtaglist = new ArrayList<String>(); // list of objects with a given tag
281            mtaglist.add(String.valueOf(iter));
282            Node tnode = new Node.Builder().setLabel(t.toString()).setTags(mtaglist).build();
283            rootTagNodesList.add(tnode);
284          }
285        }
286      }
287    }
288    return rootTagNodesList;
289  }
290
291  public static final Comparator<Node> BY_TAG_NAME = new Comparator<Node>() {
292    public int compare (Node one, Node other) {
293      return one.getLabel().compareTo(other.getLabel());
294    }
295  };
296
297  public static final Comparator<Node> BY_TYPE_NAME = new Comparator<Node>() {
298    public int compare (Node one, Node other) {
299      return one.getType().compareTo(other.getType());
300    }
301  };
302
303  /**
304  * A node for storing page metadata. Use Builder.build() to instantiate.
305  */
306  public static class Node {
307
308    private String mLabel; // holds page.title or similar identifier
309    private String mTitleFriendly; // title for card or similar use
310    private String mSummary; // Summary for card or similar use
311    private String mLink; //link href for item click
312    private String mGroup; // from sample.group in _index.jd
313    private List<String> mKeywords; // from page.tags
314    private List<String> mTags; // from meta.tags
315    private String mImage; // holds an href, fully qualified or relative to root
316    private List<Node> mChildren;
317    private String mLang;
318    private String mType; // can be file, dir, video show, announcement, etc.
319
320    private Node(Builder builder) {
321      mLabel = builder.mLabel;
322      mTitleFriendly = builder.mTitleFriendly;
323      mSummary = builder.mSummary;
324      mLink = builder.mLink;
325      mGroup = builder.mGroup;
326      mKeywords = builder.mKeywords;
327      mTags = builder.mTags;
328      mImage = builder.mImage;
329      mChildren = builder.mChildren;
330      mLang = builder.mLang;
331      mType = builder.mType;
332    }
333
334    private static class Builder {
335      private String mLabel, mTitleFriendly, mSummary, mLink, mGroup, mImage, mLang, mType;
336      private List<String> mKeywords = null;
337      private List<String> mTags = null;
338      private List<Node> mChildren = null;
339      public Builder setLabel(String mLabel) { this.mLabel = mLabel; return this;}
340      public Builder setTitleFriendly(String mTitleFriendly) {
341        this.mTitleFriendly = mTitleFriendly; return this;
342      }
343      public Builder setSummary(String mSummary) {this.mSummary = mSummary; return this;}
344      public Builder setLink(String mLink) {this.mLink = mLink; return this;}
345      public Builder setGroup(String mGroup) {this.mGroup = mGroup; return this;}
346      public Builder setKeywords(List<String> mKeywords) {
347        this.mKeywords = mKeywords; return this;
348      }
349      public Builder setTags(List<String> mTags) {this.mTags = mTags; return this;}
350      public Builder setImage(String mImage) {this.mImage = mImage; return this;}
351      public Builder setChildren(List<Node> mChildren) {this.mChildren = mChildren; return this;}
352      public Builder setLang(String mLang) {this.mLang = mLang; return this;}
353      public Builder setType(String mType) {this.mType = mType; return this;}
354      public Node build() {return new Node(this);}
355    }
356
357    /**
358    * Render a tree of metadata nodes organized by type.
359    * @param buf Output buffer to render to.
360    */
361    void renderTypeResources(StringBuilder buf) {
362      List<Node> list = mChildren; //list of type rootnodes
363      if (list == null || list.size() == 0) {
364        buf.append("null");
365      } else {
366        final int n = list.size();
367        for (int i = 0; i < n; i++) {
368          buf.append("var " + list.get(i).mType.toUpperCase() + "_RESOURCES = [");
369          list.get(i).renderTypes(buf); //render this type's children
370          buf.append("\n];\n\n");
371        }
372      }
373    }
374    /**
375    * Render all metadata nodes for a specific type.
376    * @param buf Output buffer to render to.
377    */
378    void renderTypes(StringBuilder buf) {
379      List<Node> list = mChildren;
380      if (list == null || list.size() == 0) {
381        buf.append("nulltype");
382      } else {
383        final int n = list.size();
384        for (int i = 0; i < n; i++) {
385          buf.append("\n      {\n");
386          buf.append("        title:\"" + list.get(i).mLabel + "\",\n" );
387          buf.append("        titleFriendly:\"" + list.get(i).mTitleFriendly + "\",\n" );
388          buf.append("        summary:\"" + list.get(i).mSummary + "\",\n" );
389          buf.append("        url:\"" + list.get(i).mLink + "\",\n" );
390          buf.append("        group:\"" + list.get(i).mGroup + "\",\n" );
391          list.get(i).renderArrayType(buf, list.get(i).mKeywords, "keywords");
392          list.get(i).renderArrayType(buf, list.get(i).mTags, "tags");
393          buf.append("        image:\"" + list.get(i).mImage + "\",\n" );
394          buf.append("        lang:\"" + list.get(i).mLang + "\",\n" );
395          buf.append("        type:\"" + list.get(i).mType + "\"");
396          buf.append("\n      }");
397          if (i != n - 1) {
398            buf.append(", ");
399          }
400        }
401      }
402    }
403
404    /**
405    * Build and render a list of tags associated with each type.
406    * @param buf Output buffer to render to.
407    */
408    void renderTypesByTag(StringBuilder buf) {
409      List<Node> list = mChildren; //list of rootnodes
410      if (list == null || list.size() == 0) {
411        buf.append("null");
412      } else {
413        final int n = list.size();
414        for (int i = 0; i < n; i++) {
415        buf.append("var " + list.get(i).mType.toUpperCase() + "_BY_TAG = {");
416        List<Node> mTagList = new ArrayList(); //list of rootnodes
417        mTagList = appendMetaNodeByTagIndex(list.get(i), mTagList);
418        list.get(i).renderTagIndices(buf, mTagList);
419          buf.append("\n};\n\n");
420        }
421      }
422    }
423
424    /**
425    * Render a list of tags associated with a type, including the
426    * tag's indices in the type array.
427    * @param buf Output buffer to render to.
428    * @param tagList Node tree of types to render.
429    */
430    void renderTagIndices(StringBuilder buf, List<Node> tagList) {
431      List<Node> list = tagList;
432      if (list == null || list.size() == 0) {
433        buf.append("null");
434      } else {
435        final int n = list.size();
436        for (int i = 0; i < n; i++) {
437
438          buf.append("\n    " + list.get(i).mLabel + ":[");
439          renderArrayValue(buf, list.get(i).mTags);
440          buf.append("]");
441          if (i != n - 1) {
442            buf.append(", ");
443          }
444        }
445      }
446    }
447
448    /**
449    * Render key:arrayvalue pair.
450    * @param buf Output buffer to render to.
451    * @param type The list value to render as an arrayvalue.
452    * @param key The key for the pair.
453    */
454    void renderArrayType(StringBuilder buf, List<String> type, String key) {
455      buf.append("        " + key + ": [");
456      renderArrayValue(buf, type);
457      buf.append("],\n");
458    }
459
460    /**
461    * Render an array value to buf, with special handling of unicode characters.
462    * @param buf Output buffer to render to.
463    * @param type The list value to render as an arrayvalue.
464    */
465    void renderArrayValue(StringBuilder buf, List<String> type) {
466      List<String> list = type;
467      if (list != null) {
468        final int n = list.size();
469        for (int i = 0; i < n; i++) {
470          String tagval = list.get(i).toString();
471          final int L = tagval.length();
472          for (int t = 0; t < L; t++) {
473            char c = tagval.charAt(t);
474            if (c >= ' ' && c <= '~' && c != '\\') {
475              buf.append(c);
476            } else {
477              buf.append("\\u");
478              for (int m = 0; m < 4; m++) {
479                char x = (char) (c & 0x000f);
480                if (x > 10) {
481                  x = (char) (x - 10 + 'a');
482                } else {
483                  x = (char) (x + '0');
484                }
485                buf.append(x);
486                c >>= 4;
487              }
488            }
489          }
490          if (i != n - 1) {
491            buf.append(",");
492          }
493        }
494      }
495    }
496
497    public String getLabel() {
498      return mLabel;
499    }
500
501    public void setLabel(String label) {
502       mLabel = label;
503    }
504
505    public String getTitleFriendly() {
506      return mTitleFriendly;
507    }
508
509    public void setTitleFriendly(String title) {
510       mTitleFriendly = title;
511    }
512
513    public String getSummary() {
514      return mSummary;
515    }
516
517    public void setSummary(String summary) {
518       mSummary = summary;
519    }
520
521    public String getLink() {
522      return mLink;
523    }
524
525    public void setLink(String ref) {
526       mLink = ref;
527    }
528
529    public String getGroup() {
530      return mGroup;
531    }
532
533    public void setGroup(String group) {
534      mGroup = group;
535    }
536
537    public List<String> getTags() {
538        return mTags;
539    }
540
541    public List<String> getKeywords() {
542        return mKeywords;
543    }
544
545    public void setKeywords(String tags) {
546      if (tags.equals("")) {
547        mKeywords = null;
548      } else {
549        List<String> tagList = new ArrayList();
550        String[] tagParts = tags.split(",");
551
552        for (String t : tagParts) {
553          tagList.add(t);
554        }
555        mKeywords = tagList;
556      }
557    }
558
559    public void setTags(String tags) {
560      if (tags.equals("")) {
561        mTags = null;
562      } else {
563        List<String> tagList = new ArrayList();
564        String[] tagParts = tags.split(",");
565
566        for (String t : tagParts) {
567          tagList.add(t);
568        }
569        mTags = tagList;
570      }
571    }
572
573    public String getImage() {
574        return mImage;
575    }
576
577    public void setImage(String ref) {
578       mImage = ref;
579    }
580
581    public List<Node> getChildren() {
582        return mChildren;
583    }
584
585    public void setChildren(List<Node> node) {
586        mChildren = node;
587    }
588
589    public String getLang() {
590      return mLang;
591    }
592
593    public void setLang(String lang) {
594      mLang = lang;
595    }
596
597    public String getType() {
598      return mType;
599    }
600
601    public void setType(String type) {
602      mType = type;
603    }
604  }
605}
606