/* * Copyright (C) 2007 The Android Open Source Project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.google.android.util; import java.util.ArrayList; import java.util.HashMap; import java.util.LinkedHashMap; import java.util.regex.Matcher; import java.util.regex.Pattern; import java.util.Set; import java.util.List; /** * * Logic for parsing a text message typed by the user looking for smileys, * urls, acronyms,formatting (e.g., '*'s for bold), me commands * (e.g., "/me is asleep"), and punctuation. * * It constructs an array, which breaks the text up into its * constituent pieces, which we return to the client. * */ public abstract class AbstractMessageParser { /** * Interface representing the set of resources needed by a message parser * * @author jessan (Jessan Hutchison-Quillian) */ public static interface Resources { /** Get the known set of URL schemes. */ public Set getSchemes(); /** Get the possible values for the last part of a domain name. * Values are expected to be reversed in the Trie. */ public TrieNode getDomainSuffixes(); /** Get the smileys accepted by the parser. */ public TrieNode getSmileys(); /** Get the acronyms accepted by the parser. */ public TrieNode getAcronyms(); } /** * Subclasses must define the schemes, domains, smileys and acronyms * that are necessary for parsing */ protected abstract Resources getResources(); /** Music note that indicates user is listening to a music track. */ public static final String musicNote = "\u266B "; private String text; private int nextChar; private int nextClass; private ArrayList parts; private ArrayList tokens; private HashMap formatStart; private boolean parseSmilies; private boolean parseAcronyms; private boolean parseFormatting; private boolean parseUrls; private boolean parseMeText; private boolean parseMusic; /** * Create a message parser to parse urls, formatting, acronyms, smileys, * /me text and music * * @param text the text to parse */ public AbstractMessageParser(String text) { this(text, true, true, true, true, true, true); } /** * Create a message parser, specifying the kinds of text to parse * * @param text the text to parse * */ public AbstractMessageParser(String text, boolean parseSmilies, boolean parseAcronyms, boolean parseFormatting, boolean parseUrls, boolean parseMusic, boolean parseMeText) { this.text = text; this.nextChar = 0; this.nextClass = 10; this.parts = new ArrayList(); this.tokens = new ArrayList(); this.formatStart = new HashMap(); this.parseSmilies = parseSmilies; this.parseAcronyms = parseAcronyms; this.parseFormatting = parseFormatting; this.parseUrls = parseUrls; this.parseMusic = parseMusic; this.parseMeText = parseMeText; } /** Returns the raw text being parsed. */ public final String getRawText() { return text; } /** Return the number of parts. */ public final int getPartCount() { return parts.size(); } /** Return the part at the given index. */ public final Part getPart(int index) { return parts.get(index); } /** Return the list of parts from the parsed text */ public final List getParts() { return parts; } /** Parses the text string into an internal representation. */ public void parse() { // Look for music track (of which there would be only one and it'll be the // first token) if (parseMusicTrack()) { buildParts(null); return; } // Look for me commands. String meText = null; if (parseMeText && text.startsWith("/me") && (text.length() > 3) && Character.isWhitespace(text.charAt(3))) { meText = text.substring(0, 4); text = text.substring(4); } // Break the text into tokens. boolean wasSmiley = false; while (nextChar < text.length()) { if (!isWordBreak(nextChar)) { if (!wasSmiley || !isSmileyBreak(nextChar)) { throw new AssertionError("last chunk did not end at word break"); } } if (parseSmiley()) { wasSmiley = true; } else { wasSmiley = false; if (!parseAcronym() && !parseURL() && !parseFormatting()) { parseText(); } } } // Trim the whitespace before and after media components. for (int i = 0; i < tokens.size(); ++i) { if (tokens.get(i).isMedia()) { if ((i > 0) && (tokens.get(i - 1) instanceof Html)) { ((Html)tokens.get(i - 1)).trimLeadingWhitespace(); } if ((i + 1 < tokens.size()) && (tokens.get(i + 1) instanceof Html)) { ((Html)tokens.get(i + 1)).trimTrailingWhitespace(); } } } // Remove any empty html tokens. for (int i = 0; i < tokens.size(); ++i) { if (tokens.get(i).isHtml() && (tokens.get(i).toHtml(true).length() == 0)) { tokens.remove(i); --i; // visit this index again } } buildParts(meText); } /** * Get a the appropriate Token for a given URL * * @param text the anchor text * @param url the url * */ public static Token tokenForUrl(String url, String text) { if(url == null) { return null; } //Look for video links Video video = Video.matchURL(url, text); if (video != null) { return video; } // Look for video links. YouTubeVideo ytVideo = YouTubeVideo.matchURL(url, text); if (ytVideo != null) { return ytVideo; } // Look for photo links. Photo photo = Photo.matchURL(url, text); if (photo != null) { return photo; } // Look for photo links. FlickrPhoto flickrPhoto = FlickrPhoto.matchURL(url, text); if (flickrPhoto != null) { return flickrPhoto; } //Not media, so must be a regular URL return new Link(url, text); } /** * Builds the parts list. * * @param meText any meText parsed from the message */ private void buildParts(String meText) { for (int i = 0; i < tokens.size(); ++i) { Token token = tokens.get(i); if (token.isMedia() || (parts.size() == 0) || lastPart().isMedia()) { parts.add(new Part()); } lastPart().add(token); } // The first part inherits the meText of the line. if (parts.size() > 0) { parts.get(0).setMeText(meText); } } /** Returns the last part in the list. */ private Part lastPart() { return parts.get(parts.size() - 1); } /** * Looks for a music track (\u266B is first character, everything else is * track info). */ private boolean parseMusicTrack() { if (parseMusic && text.startsWith(musicNote)) { addToken(new MusicTrack(text.substring(musicNote.length()))); nextChar = text.length(); return true; } return false; } /** Consumes all of the text in the next word . */ private void parseText() { StringBuilder buf = new StringBuilder(); int start = nextChar; do { char ch = text.charAt(nextChar++); switch (ch) { case '<': buf.append("<"); break; case '>': buf.append(">"); break; case '&': buf.append("&"); break; case '"': buf.append("""); break; case '\'': buf.append("'"); break; case '\n': buf.append("
"); break; default: buf.append(ch); break; } } while (!isWordBreak(nextChar)); addToken(new Html(text.substring(start, nextChar), buf.toString())); } /** * Looks for smileys (e.g., ":)") in the text. The set of known smileys is * loaded from a file into a trie at server start. */ private boolean parseSmiley() { if(!parseSmilies) { return false; } TrieNode match = longestMatch(getResources().getSmileys(), this, nextChar, true); if (match == null) { return false; } else { int previousCharClass = getCharClass(nextChar - 1); int nextCharClass = getCharClass(nextChar + match.getText().length()); if ((previousCharClass == 2 || previousCharClass == 3) && (nextCharClass == 2 || nextCharClass == 3)) { return false; } addToken(new Smiley(match.getText())); nextChar += match.getText().length(); return true; } } /** Looks for acronyms (e.g., "lol") in the text. */ private boolean parseAcronym() { if(!parseAcronyms) { return false; } TrieNode match = longestMatch(getResources().getAcronyms(), this, nextChar); if (match == null) { return false; } else { addToken(new Acronym(match.getText(), match.getValue())); nextChar += match.getText().length(); return true; } } /** Determines if this is an allowable domain character. */ private boolean isDomainChar(char c) { return c == '-' || Character.isLetter(c) || Character.isDigit(c); } /** Determines if the given string is a valid domain. */ private boolean isValidDomain(String domain) { // For hostnames, check that it ends with a known domain suffix if (matches(getResources().getDomainSuffixes(), reverse(domain))) { return true; } return false; } /** * Looks for a URL in two possible forms: either a proper URL with a known * scheme or a domain name optionally followed by a path, query, or query. */ private boolean parseURL() { // Make sure this is a valid place to start a URL. if (!parseUrls || !isURLBreak(nextChar)) { return false; } int start = nextChar; // Search for the first block of letters. int index = start; while ((index < text.length()) && isDomainChar(text.charAt(index))) { index += 1; } String url = ""; boolean done = false; if (index == text.length()) { return false; } else if (text.charAt(index) == ':') { // Make sure this is a known scheme. String scheme = text.substring(nextChar, index); if (!getResources().getSchemes().contains(scheme)) { return false; } } else if (text.charAt(index) == '.') { // Search for the end of the domain name. while (index < text.length()) { char ch = text.charAt(index); if ((ch != '.') && !isDomainChar(ch)) { break; } else { index += 1; } } // Make sure the domain name has a valid suffix. Since tries look for // prefix matches, we reverse all the strings to get suffix comparisons. String domain = text.substring(nextChar, index); if (!isValidDomain(domain)) { return false; } // Search for a port. We deal with this specially because a colon can // also be a punctuation character. if ((index + 1 < text.length()) && (text.charAt(index) == ':')) { char ch = text.charAt(index + 1); if (Character.isDigit(ch)) { index += 1; while ((index < text.length()) && Character.isDigit(text.charAt(index))) { index += 1; } } } // The domain name should be followed by end of line, whitespace, // punctuation, or a colon, slash, question, or hash character. The // tricky part here is that some URL characters are also punctuation, so // we need to distinguish them. Since we looked for ports above, a colon // is always punctuation here. To distinguish '?' cases, we look at the // character that follows it. if (index == text.length()) { done = true; } else { char ch = text.charAt(index); if (ch == '?') { // If the next character is whitespace or punctuation (or missing), // then this question mark looks like punctuation. if (index + 1 == text.length()) { done = true; } else { char ch2 = text.charAt(index + 1); if (Character.isWhitespace(ch2) || isPunctuation(ch2)) { done = true; } } } else if (isPunctuation(ch)) { done = true; } else if (Character.isWhitespace(ch)) { done = true; } else if ((ch == '/') || (ch == '#')) { // In this case, the URL is not done. We will search for the end of // it below. } else { return false; } } // We will assume the user meant HTTP. (One weird case is where they // type a port of 443. That could mean HTTPS, but they might also want // HTTP. We'll let them specify if they don't want HTTP.) url = "http://"; } else { return false; } // If the URL is not done, search for the end, which is just before the // next whitespace character. if (!done) { while ((index < text.length()) && !Character.isWhitespace(text.charAt(index))) { index += 1; } } String urlText = text.substring(start, index); url += urlText; // Figure out the appropriate token type. addURLToken(url, urlText); nextChar = index; return true; } /** * Adds the appropriate token for the given URL. This might be a simple * link or it might be a recognized media type. */ private void addURLToken(String url, String text) { addToken(tokenForUrl(url, text)); } /** * Deal with formatting characters. * * Parsing is as follows: * - Treat all contiguous strings of formatting characters as one block. * (This method processes one block.) * - Only a single instance of a particular format character within a block * is used to determine whether to turn on/off that type of formatting; * other instances simply print the character itself. * - If the format is to be turned on, we use the _first_ instance; if it * is to be turned off, we use the _last_ instance (by appending the * format.) * * Example: * **string** turns into *string* */ private boolean parseFormatting() { if(!parseFormatting) { return false; } int endChar = nextChar; while ((endChar < text.length()) && isFormatChar(text.charAt(endChar))) { endChar += 1; } if ((endChar == nextChar) || !isWordBreak(endChar)) { return false; } // Keeps track of whether we've seen a character (in map if we've seen it) // and whether we should append a closing format token (if value in // map is TRUE). Linked hashmap for consistent ordering. LinkedHashMap seenCharacters = new LinkedHashMap(); for (int index = nextChar; index < endChar; ++index) { char ch = text.charAt(index); Character key = Character.valueOf(ch); if (seenCharacters.containsKey(key)) { // Already seen this character, just append an unmatched token, which // will print plaintext character addToken(new Format(ch, false)); } else { Format start = formatStart.get(key); if (start != null) { // Match the start token, and ask an end token to be appended start.setMatched(true); formatStart.remove(key); seenCharacters.put(key, Boolean.TRUE); } else { // Append start token start = new Format(ch, true); formatStart.put(key, start); addToken(start); seenCharacters.put(key, Boolean.FALSE); } } } // Append any necessary end tokens for (Character key : seenCharacters.keySet()) { if (seenCharacters.get(key) == Boolean.TRUE) { Format end = new Format(key.charValue(), false); end.setMatched(true); addToken(end); } } nextChar = endChar; return true; } /** Determines whether the given index could be a possible word break. */ private boolean isWordBreak(int index) { return getCharClass(index - 1) != getCharClass(index); } /** Determines whether the given index could be a possible smiley break. */ private boolean isSmileyBreak(int index) { if (index > 0 && index < text.length()) { if (isSmileyBreak(text.charAt(index - 1), text.charAt(index))) { return true; } } return false; } /** * Verifies that the character before the given index is end of line, * whitespace, or punctuation. */ private boolean isURLBreak(int index) { switch (getCharClass(index - 1)) { case 2: case 3: case 4: return false; case 0: case 1: default: return true; } } /** Returns the class for the character at the given index. */ private int getCharClass(int index) { if ((index < 0) || (text.length() <= index)) { return 0; } char ch = text.charAt(index); if (Character.isWhitespace(ch)) { return 1; } else if (Character.isLetter(ch)) { return 2; } else if (Character.isDigit(ch)) { return 3; } else if (isPunctuation(ch)) { // For punctuation, we return a unique value every time so that they are // always different from any other character. Punctuation should always // be considered a possible word break. return ++nextClass; } else { return 4; } } /** * Returns true if c1 could be the last character of * a smiley and c2 could be the first character of * a different smiley, if {@link #isWordBreak} would not already * recognize that this is possible. */ private static boolean isSmileyBreak(char c1, char c2) { switch (c1) { /* * These characters can end smileys, but don't normally end words. */ case '$': case '&': case '*': case '+': case '-': case '/': case '<': case '=': case '>': case '@': case '[': case '\\': case ']': case '^': case '|': case '}': case '~': switch (c2) { /* * These characters can begin smileys, but don't normally * begin words. */ case '#': case '$': case '%': case '*': case '/': case '<': case '=': case '>': case '@': case '[': case '\\': case '^': case '~': return true; } } return false; } /** Determines whether the given character is punctuation. */ private static boolean isPunctuation(char ch) { switch (ch) { case '.': case ',': case '"': case ':': case ';': case '?': case '!': case '(': case ')': return true; default: return false; } } /** * Determines whether the given character is the beginning or end of a * section with special formatting. */ private static boolean isFormatChar(char ch) { switch (ch) { case '*': case '_': case '^': return true; default: return false; } } /** Represents a unit of parsed output. */ public static abstract class Token { public enum Type { HTML ("html"), FORMAT ("format"), // subtype of HTML LINK ("l"), SMILEY ("e"), ACRONYM ("a"), MUSIC ("m"), GOOGLE_VIDEO ("v"), YOUTUBE_VIDEO ("yt"), PHOTO ("p"), FLICKR ("f"); //stringreps for HTML and FORMAT don't really matter //because they don't define getInfo(), which is where it is used //For the other types, code depends on their stringreps private String stringRep; Type(String stringRep) { this.stringRep = stringRep; } /** {@inheritDoc} */ public String toString() { return this.stringRep; } } protected Type type; protected String text; protected Token(Type type, String text) { this.type = type; this.text = text; } /** Returns the type of the token. */ public Type getType() { return type; } /** * Get the relevant information about a token * * @return a list of strings representing the token, not null * The first item is always a string representation of the type */ public List getInfo() { List info = new ArrayList(); info.add(getType().toString()); return info; } /** Returns the raw text of the token. */ public String getRawText() { return text; } public boolean isMedia() { return false; } public abstract boolean isHtml(); public boolean isArray() { return !isHtml(); } public String toHtml(boolean caps) { throw new AssertionError("not html"); } // The token can change the caps of the text after that point. public boolean controlCaps() { return false; } public boolean setCaps() { return false; } } /** Represents a simple string of html text. */ public static class Html extends Token { private String html; public Html(String text, String html) { super(Type.HTML, text); this.html = html; } public boolean isHtml() { return true; } public String toHtml(boolean caps) { return caps ? html.toUpperCase() : html; } /** * Not supported. Info should not be needed for this type */ public List getInfo() { throw new UnsupportedOperationException(); } public void trimLeadingWhitespace() { text = trimLeadingWhitespace(text); html = trimLeadingWhitespace(html); } public void trimTrailingWhitespace() { text = trimTrailingWhitespace(text); html = trimTrailingWhitespace(html); } private static String trimLeadingWhitespace(String text) { int index = 0; while ((index < text.length()) && Character.isWhitespace(text.charAt(index))) { ++index; } return text.substring(index); } public static String trimTrailingWhitespace(String text) { int index = text.length(); while ((index > 0) && Character.isWhitespace(text.charAt(index - 1))) { --index; } return text.substring(0, index); } } /** Represents a music track token at the beginning. */ public static class MusicTrack extends Token { private String track; public MusicTrack(String track) { super(Type.MUSIC, track); this.track = track; } public String getTrack() { return track; } public boolean isHtml() { return false; } public List getInfo() { List info = super.getInfo(); info.add(getTrack()); return info; } } /** Represents a link that was found in the input. */ public static class Link extends Token { private String url; public Link(String url, String text) { super(Type.LINK, text); this.url = url; } public String getURL() { return url; } public boolean isHtml() { return false; } public List getInfo() { List info = super.getInfo(); info.add(getURL()); info.add(getRawText()); return info; } } /** Represents a link to a Google Video. */ public static class Video extends Token { /** Pattern for a video URL. */ private static final Pattern URL_PATTERN = Pattern.compile( "(?i)http://video\\.google\\.[a-z0-9]+(?:\\.[a-z0-9]+)?/videoplay\\?" + ".*?\\bdocid=(-?\\d+).*"); private String docid; public Video(String docid, String text) { super(Type.GOOGLE_VIDEO, text); this.docid = docid; } public String getDocID() { return docid; } public boolean isHtml() { return false; } public boolean isMedia() { return true; } /** Returns a Video object if the given url is to a video. */ public static Video matchURL(String url, String text) { Matcher m = URL_PATTERN.matcher(url); if (m.matches()) { return new Video(m.group(1), text); } else { return null; } } public List getInfo() { List info = super.getInfo(); info.add(getRssUrl(docid)); info.add(getURL(docid)); return info; } /** Returns the URL for the RSS description of the given video. */ public static String getRssUrl(String docid) { return "http://video.google.com/videofeed" + "?type=docid&output=rss&sourceid=gtalk&docid=" + docid; } /** (For testing purposes:) Returns a video URL with the given parts. */ public static String getURL(String docid) { return getURL(docid, null); } /** (For testing purposes:) Returns a video URL with the given parts. */ public static String getURL(String docid, String extraParams) { if (extraParams == null) { extraParams = ""; } else if (extraParams.length() > 0) { extraParams += "&"; } return "http://video.google.com/videoplay?" + extraParams + "docid=" + docid; } } /** Represents a link to a YouTube video. */ public static class YouTubeVideo extends Token { /** Pattern for a video URL. */ private static final Pattern URL_PATTERN = Pattern.compile( "(?i)http://(?:[a-z0-9]+\\.)?youtube\\.[a-z0-9]+(?:\\.[a-z0-9]+)?/watch\\?" + ".*\\bv=([-_a-zA-Z0-9=]+).*"); private String docid; public YouTubeVideo(String docid, String text) { super(Type.YOUTUBE_VIDEO, text); this.docid = docid; } public String getDocID() { return docid; } public boolean isHtml() { return false; } public boolean isMedia() { return true; } /** Returns a Video object if the given url is to a video. */ public static YouTubeVideo matchURL(String url, String text) { Matcher m = URL_PATTERN.matcher(url); if (m.matches()) { return new YouTubeVideo(m.group(1), text); } else { return null; } } public List getInfo() { List info = super.getInfo(); info.add(getRssUrl(docid)); info.add(getURL(docid)); return info; } /** Returns the URL for the RSS description of the given video. */ public static String getRssUrl(String docid) { return "http://youtube.com/watch?v=" + docid; } /** (For testing purposes:) Returns a video URL with the given parts. */ public static String getURL(String docid) { return getURL(docid, null); } /** (For testing purposes:) Returns a video URL with the given parts. */ public static String getURL(String docid, String extraParams) { if (extraParams == null) { extraParams = ""; } else if (extraParams.length() > 0) { extraParams += "&"; } return "http://youtube.com/watch?" + extraParams + "v=" + docid; } /** (For testing purposes:) Returns a video URL with the given parts. * @param http If true, includes http:// * @param prefix If non-null/non-blank, adds to URL before youtube.com. * (e.g., prefix="br." --> "br.youtube.com") */ public static String getPrefixedURL(boolean http, String prefix, String docid, String extraParams) { String protocol = ""; if (http) { protocol = "http://"; } if (prefix == null) { prefix = ""; } if (extraParams == null) { extraParams = ""; } else if (extraParams.length() > 0) { extraParams += "&"; } return protocol + prefix + "youtube.com/watch?" + extraParams + "v=" + docid; } } /** Represents a link to a Picasa photo or album. */ public static class Photo extends Token { /** Pattern for an album or photo URL. */ // TODO (katyarogers) searchbrowse includes search lists and tags, // it follows a different pattern than albums - would be nice to add later private static final Pattern URL_PATTERN = Pattern.compile( "http://picasaweb.google.com/([^/?#&]+)/+((?!searchbrowse)[^/?#&]+)(?:/|/photo)?(?:\\?[^#]*)?(?:#(.*))?"); private String user; private String album; private String photo; // null for albums public Photo(String user, String album, String photo, String text) { super(Type.PHOTO, text); this.user = user; this.album = album; this.photo = photo; } public String getUser() { return user; } public String getAlbum() { return album; } public String getPhoto() { return photo; } public boolean isHtml() { return false; } public boolean isMedia() { return true; } /** Returns a Photo object if the given url is to a photo or album. */ public static Photo matchURL(String url, String text) { Matcher m = URL_PATTERN.matcher(url); if (m.matches()) { return new Photo(m.group(1), m.group(2), m.group(3), text); } else { return null; } } public List getInfo() { List info = super.getInfo(); info.add(getRssUrl(getUser())); info.add(getAlbumURL(getUser(), getAlbum())); if (getPhoto() != null) { info.add(getPhotoURL(getUser(), getAlbum(), getPhoto())); } else { info.add((String)null); } return info; } /** Returns the URL for the RSS description of the user's albums. */ public static String getRssUrl(String user) { return "http://picasaweb.google.com/data/feed/api/user/" + user + "?category=album&alt=rss"; } /** Returns the URL for an album. */ public static String getAlbumURL(String user, String album) { return "http://picasaweb.google.com/" + user + "/" + album; } /** Returns the URL for a particular photo. */ public static String getPhotoURL(String user, String album, String photo) { return "http://picasaweb.google.com/" + user + "/" + album + "/photo#" + photo; } } /** Represents a link to a Flickr photo or album. */ public static class FlickrPhoto extends Token { /** Pattern for a user album or photo URL. */ private static final Pattern URL_PATTERN = Pattern.compile( "http://(?:www.)?flickr.com/photos/([^/?#&]+)/?([^/?#&]+)?/?.*"); private static final Pattern GROUPING_PATTERN = Pattern.compile( "http://(?:www.)?flickr.com/photos/([^/?#&]+)/(tags|sets)/" + "([^/?#&]+)/?"); private static final String SETS = "sets"; private static final String TAGS = "tags"; private String user; private String photo; // null for user album private String grouping; // either "tags" or "sets" private String groupingId; // sets or tags identifier public FlickrPhoto(String user, String photo, String grouping, String groupingId, String text) { super(Type.FLICKR, text); /* System wide tags look like the URL to a Flickr user. */ if (!TAGS.equals(user)) { this.user = user; // Don't consider slide show URL a photo this.photo = (!"show".equals(photo) ? photo : null); this.grouping = grouping; this.groupingId = groupingId; } else { this.user = null; this.photo = null; this.grouping = TAGS; this.groupingId = photo; } } public String getUser() { return user; } public String getPhoto() { return photo; } public String getGrouping() { return grouping; } public String getGroupingId() { return groupingId; } public boolean isHtml() { return false; } public boolean isMedia() { return true; } /** * Returns a FlickrPhoto object if the given url is to a photo or Flickr * user. */ public static FlickrPhoto matchURL(String url, String text) { Matcher m = GROUPING_PATTERN.matcher(url); if (m.matches()) { return new FlickrPhoto(m.group(1), null, m.group(2), m.group(3), text); } m = URL_PATTERN.matcher(url); if (m.matches()) { return new FlickrPhoto(m.group(1), m.group(2), null, null, text); } else { return null; } } public List getInfo() { List info = super.getInfo(); info.add(getUrl()); info.add(getUser() != null ? getUser() : ""); info.add(getPhoto() != null ? getPhoto() : ""); info.add(getGrouping() != null ? getGrouping() : ""); info.add(getGroupingId() != null ? getGroupingId() : ""); return info; } public String getUrl() { if (SETS.equals(grouping)) { return getUserSetsURL(user, groupingId); } else if (TAGS.equals(grouping)) { if (user != null) { return getUserTagsURL(user, groupingId); } else { return getTagsURL(groupingId); } } else if (photo != null) { return getPhotoURL(user, photo); } else { return getUserURL(user); } } /** Returns the URL for the RSS description. */ public static String getRssUrl(String user) { return null; } /** Returns the URL for a particular tag. */ public static String getTagsURL(String tag) { return "http://flickr.com/photos/tags/" + tag; } /** Returns the URL to the user's Flickr homepage. */ public static String getUserURL(String user) { return "http://flickr.com/photos/" + user; } /** Returns the URL for a particular photo. */ public static String getPhotoURL(String user, String photo) { return "http://flickr.com/photos/" + user + "/" + photo; } /** Returns the URL for a user tag photo set. */ public static String getUserTagsURL(String user, String tagId) { return "http://flickr.com/photos/" + user + "/tags/" + tagId; } /** Returns the URL for user set. */ public static String getUserSetsURL(String user, String setId) { return "http://flickr.com/photos/" + user + "/sets/" + setId; } } /** Represents a smiley that was found in the input. */ public static class Smiley extends Token { // TODO: Pass the SWF URL down to the client. public Smiley(String text) { super(Type.SMILEY, text); } public boolean isHtml() { return false; } public List getInfo() { List info = super.getInfo(); info.add(getRawText()); return info; } } /** Represents an acronym that was found in the input. */ public static class Acronym extends Token { private String value; // TODO: SWF public Acronym(String text, String value) { super(Type.ACRONYM, text); this.value = value; } public String getValue() { return value; } public boolean isHtml() { return false; } public List getInfo() { List info = super.getInfo(); info.add(getRawText()); info.add(getValue()); return info; } } /** Represents a character that changes formatting. */ public static class Format extends Token { private char ch; private boolean start; private boolean matched; public Format(char ch, boolean start) { super(Type.FORMAT, String.valueOf(ch)); this.ch = ch; this.start = start; } public void setMatched(boolean matched) { this.matched = matched; } public boolean isHtml() { return true; } public String toHtml(boolean caps) { // This character only implies special formatting if it was matched. // Otherwise, it was just a plain old character. if (matched) { return start ? getFormatStart(ch) : getFormatEnd(ch); } else { // We have to make sure we escape HTML characters as usual. return (ch == '"') ? """ : String.valueOf(ch); } } /** * Not supported. Info should not be needed for this type */ public List getInfo() { throw new UnsupportedOperationException(); } public boolean controlCaps() { return (ch == '^'); } public boolean setCaps() { return start; } private String getFormatStart(char ch) { switch (ch) { case '*': return ""; case '_': return ""; case '^': return ""; // TODO: all caps case '"': return "\u201c"; default: throw new AssertionError("unknown format '" + ch + "'"); } } private String getFormatEnd(char ch) { switch (ch) { case '*': return ""; case '_': return ""; case '^': return ""; // TODO: all caps case '"': return "\u201d"; default: throw new AssertionError("unknown format '" + ch + "'"); } } } /** Adds the given token to the parsed output. */ private void addToken(Token token) { tokens.add(token); } /** Converts the entire message into a single HTML display string. */ public String toHtml() { StringBuilder html = new StringBuilder(); for (Part part : parts) { boolean caps = false; html.append("

"); for (Token token : part.getTokens()) { if (token.isHtml()) { html.append(token.toHtml(caps)); } else { switch (token.getType()) { case LINK: html.append(""); html.append(token.getRawText()); html.append(""); break; case SMILEY: // TODO: link to an appropriate image html.append(token.getRawText()); break; case ACRONYM: html.append(token.getRawText()); break; case MUSIC: // TODO: include a music glyph html.append(((MusicTrack)token).getTrack()); break; case GOOGLE_VIDEO: // TODO: include a Google Video icon html.append(""); html.append(token.getRawText()); html.append(""); break; case YOUTUBE_VIDEO: // TODO: include a YouTube icon html.append(""); html.append(token.getRawText()); html.append(""); break; case PHOTO: { // TODO: include a Picasa Web icon html.append(""); html.append(token.getRawText()); html.append(""); break; } case FLICKR: // TODO: include a Flickr icon Photo p = (Photo) token; html.append(""); html.append(token.getRawText()); html.append(""); break; default: throw new AssertionError("unknown token type: " + token.getType()); } } if (token.controlCaps()) { caps = token.setCaps(); } } html.append("

\n"); } return html.toString(); } /** Returns the reverse of the given string. */ protected static String reverse(String str) { StringBuilder buf = new StringBuilder(); for (int i = str.length() - 1; i >= 0; --i) { buf.append(str.charAt(i)); } return buf.toString(); } public static class TrieNode { private final HashMap children = new HashMap(); private String text; private String value; public TrieNode() { this(""); } public TrieNode(String text) { this.text = text; } public final boolean exists() { return value != null; } public final String getText() { return text; } public final String getValue() { return value; } public void setValue(String value) { this.value = value; } public TrieNode getChild(char ch) { return children.get(Character.valueOf(ch)); } public TrieNode getOrCreateChild(char ch) { Character key = Character.valueOf(ch); TrieNode node = children.get(key); if (node == null) { node = new TrieNode(text + String.valueOf(ch)); children.put(key, node); } return node; } /** Adds the given string into the trie. */ public static void addToTrie(TrieNode root, String str, String value) { int index = 0; while (index < str.length()) { root = root.getOrCreateChild(str.charAt(index++)); } root.setValue(value); } } /** Determines whether the given string is in the given trie. */ private static boolean matches(TrieNode root, String str) { int index = 0; while (index < str.length()) { root = root.getChild(str.charAt(index++)); if (root == null) { break; } else if (root.exists()) { return true; } } return false; } /** * Returns the longest substring of the given string, starting at the given * index, that exists in the trie. */ private static TrieNode longestMatch( TrieNode root, AbstractMessageParser p, int start) { return longestMatch(root, p, start, false); } /** * Returns the longest substring of the given string, starting at the given * index, that exists in the trie, with a special tokenizing case for * smileys if specified. */ private static TrieNode longestMatch( TrieNode root, AbstractMessageParser p, int start, boolean smiley) { int index = start; TrieNode bestMatch = null; while (index < p.getRawText().length()) { root = root.getChild(p.getRawText().charAt(index++)); if (root == null) { break; } else if (root.exists()) { if (p.isWordBreak(index)) { bestMatch = root; } else if (smiley && p.isSmileyBreak(index)) { bestMatch = root; } } } return bestMatch; } /** Represents set of tokens that are delivered as a single message. */ public static class Part { private String meText; private ArrayList tokens; public Part() { this.tokens = new ArrayList(); } public String getType(boolean isSend) { return (isSend ? "s" : "r") + getPartType(); } private String getPartType() { if (isMedia()) { return "d"; } else if (meText != null) { return "m"; } else { return ""; } } public boolean isMedia() { return (tokens.size() == 1) && tokens.get(0).isMedia(); } /** * Convenience method for getting the Token of a Part that represents * a media Token. Parts of this kind will always only have a single Token * * @return if this.isMedia(), * returns the Token representing the media contained in this Part, * otherwise returns null; */ public Token getMediaToken() { if(isMedia()) { return tokens.get(0); } return null; } /** Adds the given token to this part. */ public void add(Token token) { if (isMedia()) { throw new AssertionError("media "); } tokens.add(token); } public void setMeText(String meText) { this.meText = meText; } /** Returns the original text of this part. */ public String getRawText() { StringBuilder buf = new StringBuilder(); if (meText != null) { buf.append(meText); } for (int i = 0; i < tokens.size(); ++i) { buf.append(tokens.get(i).getRawText()); } return buf.toString(); } /** Returns the tokens in this part. */ public ArrayList getTokens() { return tokens; } /** Adds the tokens into the given builder as an array. */ // public void toArray(JSArrayBuilder array) { // if (isMedia()) { // // For media, we send its array (i.e., we don't wrap this in another // // array as we do for non-media parts). // tokens.get(0).toArray(array); // } else { // array.beginArray(); // addToArray(array); // array.endArray(); // } // } } }