SimpleWikiHelper.java revision 807dc15b095139d806eab5c9d4de4fbf692ed447
1/*
2 * Copyright (C) 2009 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17package com.example.android.simplewiktionary;
18
19import org.apache.http.HttpEntity;
20import org.apache.http.HttpResponse;
21import org.apache.http.StatusLine;
22import org.apache.http.client.HttpClient;
23import org.apache.http.client.methods.HttpGet;
24import org.apache.http.impl.client.DefaultHttpClient;
25import org.json.JSONArray;
26import org.json.JSONException;
27import org.json.JSONObject;
28
29import android.content.Context;
30import android.content.pm.PackageInfo;
31import android.content.pm.PackageManager;
32import android.content.pm.PackageManager.NameNotFoundException;
33import android.net.Uri;
34import android.util.Log;
35
36import java.io.ByteArrayOutputStream;
37import java.io.IOException;
38import java.io.InputStream;
39
40/**
41 * Helper methods to simplify talking with and parsing responses from a
42 * lightweight Wiktionary API. Before making any requests, you should call
43 * {@link #prepareUserAgent(Context)} to generate a User-Agent string based on
44 * your application package name and version.
45 */
46public class SimpleWikiHelper {
47    private static final String TAG = "SimpleWikiHelper";
48
49    /**
50     * Regular expression that splits "Word of the day" entry into word
51     * name, word type, and the first description bullet point.
52     */
53    public static final String WORD_OF_DAY_REGEX =
54            "(?s)\\{\\{wotd\\|(.+?)\\|(.+?)\\|([^#\\|]+).*?\\}\\}";
55
56    /**
57     * Partial URL to use when requesting the detailed entry for a specific
58     * Wiktionary page. Use {@link String#format(String, Object...)} to insert
59     * the desired page title after escaping it as needed.
60     */
61    private static final String WIKTIONARY_PAGE =
62            "http://en.wiktionary.org/w/api.php?action=query&prop=revisions&titles=%s&" +
63            "rvprop=content&format=json%s";
64
65    /**
66     * Partial URL to append to {@link #WIKTIONARY_PAGE} when you want to expand
67     * any templates found on the requested page. This is useful when browsing
68     * full entries, but may use more network bandwidth.
69     */
70    private static final String WIKTIONARY_EXPAND_TEMPLATES =
71            "&rvexpandtemplates=true";
72
73    /**
74     * {@link StatusLine} HTTP status code when no server error has occurred.
75     */
76    private static final int HTTP_STATUS_OK = 200;
77
78    /**
79     * Shared buffer used by {@link #getUrlContent(String)} when reading results
80     * from an API request.
81     */
82    private static byte[] sBuffer = new byte[512];
83
84    /**
85     * User-agent string to use when making requests. Should be filled using
86     * {@link #prepareUserAgent(Context)} before making any other calls.
87     */
88    private static String sUserAgent = null;
89
90    /**
91     * Thrown when there were problems contacting the remote API server, either
92     * because of a network error, or the server returned a bad status code.
93     */
94    public static class ApiException extends Exception {
95        public ApiException(String detailMessage, Throwable throwable) {
96            super(detailMessage, throwable);
97        }
98
99        public ApiException(String detailMessage) {
100            super(detailMessage);
101        }
102    }
103
104    /**
105     * Thrown when there were problems parsing the response to an API call,
106     * either because the response was empty, or it was malformed.
107     */
108    public static class ParseException extends Exception {
109        public ParseException(String detailMessage, Throwable throwable) {
110            super(detailMessage, throwable);
111        }
112    }
113
114    /**
115     * Prepare the internal User-Agent string for use. This requires a
116     * {@link Context} to pull the package name and version number for this
117     * application.
118     */
119    public static void prepareUserAgent(Context context) {
120        try {
121            // Read package name and version number from manifest
122            PackageManager manager = context.getPackageManager();
123            PackageInfo info = manager.getPackageInfo(context.getPackageName(), 0);
124            sUserAgent = String.format(context.getString(R.string.template_user_agent),
125                    info.packageName, info.versionName);
126
127        } catch(NameNotFoundException e) {
128            Log.e(TAG, "Couldn't find package information in PackageManager", e);
129        }
130    }
131
132    /**
133     * Read and return the content for a specific Wiktionary page. This makes a
134     * lightweight API call, and trims out just the page content returned.
135     * Because this call blocks until results are available, it should not be
136     * run from a UI thread.
137     *
138     * @param title The exact title of the Wiktionary page requested.
139     * @param expandTemplates If true, expand any wiki templates found.
140     * @return Exact content of page.
141     * @throws ApiException If any connection or server error occurs.
142     * @throws ParseException If there are problems parsing the response.
143     */
144    public static String getPageContent(String title, boolean expandTemplates)
145            throws ApiException, ParseException {
146        // Encode page title and expand templates if requested
147        String encodedTitle = Uri.encode(title);
148        String expandClause = expandTemplates ? WIKTIONARY_EXPAND_TEMPLATES : "";
149
150        // Query the API for content
151        String content = getUrlContent(String.format(WIKTIONARY_PAGE,
152                encodedTitle, expandClause));
153        try {
154            // Drill into the JSON response to find the content body
155            JSONObject response = new JSONObject(content);
156            JSONObject query = response.getJSONObject("query");
157            JSONObject pages = query.getJSONObject("pages");
158            JSONObject page = pages.getJSONObject((String) pages.keys().next());
159            JSONArray revisions = page.getJSONArray("revisions");
160            JSONObject revision = revisions.getJSONObject(0);
161            return revision.getString("*");
162        } catch (JSONException e) {
163            throw new ParseException("Problem parsing API response", e);
164        }
165    }
166
167    /**
168     * Pull the raw text content of the given URL. This call blocks until the
169     * operation has completed, and is synchronized because it uses a shared
170     * buffer {@link #sBuffer}.
171     *
172     * @param url The exact URL to request.
173     * @return The raw content returned by the server.
174     * @throws ApiException If any connection or server error occurs.
175     */
176    protected static synchronized String getUrlContent(String url) throws ApiException {
177        if (sUserAgent == null) {
178            throw new ApiException("User-Agent string must be prepared");
179        }
180
181        // Create client and set our specific user-agent string
182        HttpClient client = new DefaultHttpClient();
183        HttpGet request = new HttpGet(url);
184        request.setHeader("User-Agent", sUserAgent);
185
186        try {
187            HttpResponse response = client.execute(request);
188
189            // Check if server response is valid
190            StatusLine status = response.getStatusLine();
191            if (status.getStatusCode() != HTTP_STATUS_OK) {
192                throw new ApiException("Invalid response from server: " +
193                        status.toString());
194            }
195
196            // Pull content stream from response
197            HttpEntity entity = response.getEntity();
198            InputStream inputStream = entity.getContent();
199
200            ByteArrayOutputStream content = new ByteArrayOutputStream();
201
202            // Read response into a buffered stream
203            int readBytes = 0;
204            while ((readBytes = inputStream.read(sBuffer)) != -1) {
205                content.write(sBuffer, 0, readBytes);
206            }
207
208            // Return result from buffered stream
209            return new String(content.toByteArray());
210        } catch (IOException e) {
211            throw new ApiException("Problem communicating with API", e);
212        }
213    }
214}
215