1/*
2 * Copyright (C) 2009 Google Inc. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are
6 * met:
7 *
8 *     * Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 *     * Redistributions in binary form must reproduce the above
11 * copyright notice, this list of conditions and the following disclaimer
12 * in the documentation and/or other materials provided with the
13 * distribution.
14 *     * Neither the name of Google Inc. nor the names of its
15 * contributors may be used to endorse or promote products derived from
16 * this software without specific prior written permission.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 */
30
31#include "config.h"
32#include "WebSearchableFormData.h"
33
34#include "Document.h"
35#include "DocumentLoader.h"
36#include "FormDataBuilder.h"
37#include "FormDataList.h"
38#include "Frame.h"
39#include "HTMLFormControlElement.h"
40#include "HTMLFormElement.h"
41#include "HTMLInputElement.h"
42#include "HTMLNames.h"
43#include "HTMLOptionElement.h"
44#include "HTMLOptionsCollection.h"
45#include "HTMLSelectElement.h"
46#include "TextEncoding.h"
47#include "WebFormElement.h"
48
49using namespace WebCore;
50using namespace HTMLNames;
51
52namespace {
53
54// Gets the encoding for the form.
55void GetFormEncoding(const HTMLFormElement* form, TextEncoding* encoding)
56{
57    String str(form->getAttribute(HTMLNames::accept_charsetAttr));
58    str.replace(',', ' ');
59    Vector<String> charsets;
60    str.split(' ', charsets);
61    for (Vector<String>::const_iterator i(charsets.begin()); i != charsets.end(); ++i) {
62        *encoding = TextEncoding(*i);
63        if (encoding->isValid())
64            return;
65    }
66    *encoding = TextEncoding(form->document()->loader()->writer()->encoding());
67}
68
69// Returns true if the submit request results in an HTTP URL.
70bool IsHTTPFormSubmit(const HTMLFormElement* form)
71{
72    String action(form->action());
73    return form->document()->frame()->loader()->completeURL(action.isNull() ? "" : action).protocol() == "http";
74}
75
76// If the form does not have an activated submit button, the first submit
77// button is returned.
78HTMLFormControlElement* GetButtonToActivate(HTMLFormElement* form)
79{
80    HTMLFormControlElement* firstSubmitButton = 0;
81    // FIXME: Consider refactoring this code so that we don't call form->associatedElements() twice.
82    for (Vector<FormAssociatedElement*>::const_iterator i(form->associatedElements().begin()); i != form->associatedElements().end(); ++i) {
83      if (!(*i)->isFormControlElement())
84          continue;
85      HTMLFormControlElement* formElement = static_cast<HTMLFormControlElement*>(*i);
86      if (formElement->isActivatedSubmit())
87          // There's a button that is already activated for submit, return 0.
88          return 0;
89      if (!firstSubmitButton && formElement->isSuccessfulSubmitButton())
90          firstSubmitButton = formElement;
91    }
92    return firstSubmitButton;
93}
94
95// Returns true if the selected state of all the options matches the default
96// selected state.
97bool IsSelectInDefaultState(const HTMLSelectElement* select)
98{
99    const Vector<Element*>& listItems = select->listItems();
100    if (select->multiple() || select->size() > 1) {
101        for (Vector<Element*>::const_iterator i(listItems.begin()); i != listItems.end(); ++i) {
102            if (!(*i)->hasLocalName(HTMLNames::optionTag))
103                continue;
104            const HTMLOptionElement* optionElement = static_cast<const HTMLOptionElement*>(*i);
105            if (optionElement->selected() != optionElement->defaultSelected())
106                return false;
107        }
108        return true;
109    }
110
111    // The select is rendered as a combobox (called menulist in WebKit). At
112    // least one item is selected, determine which one.
113    const HTMLOptionElement* initialSelected = 0;
114    for (Vector<Element*>::const_iterator i(listItems.begin()); i != listItems.end(); ++i) {
115        if (!(*i)->hasLocalName(HTMLNames::optionTag))
116            continue;
117        const HTMLOptionElement* optionElement = static_cast<const HTMLOptionElement*>(*i);
118        if (optionElement->defaultSelected()) {
119            // The page specified the option to select.
120            initialSelected = optionElement;
121            break;
122        }
123        if (!initialSelected)
124            initialSelected = optionElement;
125    }
126    return initialSelected ? initialSelected->selected() : true;
127}
128
129// Returns true if the form element is in its default state, false otherwise.
130// The default state is the state of the form element on initial load of the
131// page, and varies depending upon the form element. For example, a checkbox is
132// in its default state if the checked state matches the state of the checked attribute.
133bool IsInDefaultState(const HTMLFormControlElement* formElement)
134{
135    if (formElement->hasTagName(HTMLNames::inputTag)) {
136        const HTMLInputElement* inputElement = static_cast<const HTMLInputElement*>(formElement);
137        if (inputElement->isCheckbox() || inputElement->isRadioButton())
138            return inputElement->checked() == inputElement->hasAttribute(checkedAttr);
139    } else if (formElement->hasTagName(HTMLNames::selectTag))
140        return IsSelectInDefaultState(static_cast<const HTMLSelectElement*>(formElement));
141    return true;
142}
143
144// If form has only one text input element, return true. If a valid input
145// element is not found, return false. Additionally, the form data for all
146// elements is added to enc_string and the encoding used is set in
147// encoding_name.
148bool HasSuitableTextElement(const HTMLFormElement* form, Vector<char>* encodedString, String* encodingName)
149{
150    TextEncoding encoding;
151    GetFormEncoding(form, &encoding);
152    if (!encoding.isValid()) {
153        // Need a valid encoding to encode the form elements.
154        // If the encoding isn't found webkit ends up replacing the params with
155        // empty strings. So, we don't try to do anything here.
156        return 0;
157    }
158    *encodingName = encoding.name();
159
160    HTMLInputElement* textElement = 0;
161    // FIXME: Consider refactoring this code so that we don't call form->associatedElements() twice.
162    for (Vector<FormAssociatedElement*>::const_iterator i(form->associatedElements().begin()); i != form->associatedElements().end(); ++i) {
163        if (!(*i)->isFormControlElement())
164            continue;
165        HTMLFormControlElement* formElement = static_cast<HTMLFormControlElement*>(*i);
166        if (formElement->disabled() || formElement->name().isNull())
167            continue;
168
169        if (!IsInDefaultState(formElement) || formElement->hasTagName(HTMLNames::textareaTag))
170            return 0;
171
172        bool isTextElement = false;
173        if (formElement->hasTagName(HTMLNames::inputTag)) {
174            const HTMLInputElement* input = static_cast<const HTMLInputElement*>(formElement);
175            if (input->isFileUpload()) {
176                // Too big, don't try to index this.
177                return 0;
178            }
179
180            if (input->isPasswordField()) {
181                // Don't store passwords! This is most likely an https anyway.
182                return 0;
183            }
184
185            if (input->isTextField())
186                isTextElement = true;
187      }
188
189      FormDataList dataList(encoding);
190      if (!formElement->appendFormData(dataList, false))
191          continue;
192
193      const Vector<FormDataList::Item>& items = dataList.items();
194      if (isTextElement && !items.isEmpty()) {
195          if (textElement) {
196              // The auto-complete bar only knows how to fill in one value.
197              // This form has multiple fields; don't treat it as searchable.
198              return false;
199          }
200          textElement = static_cast<HTMLInputElement*>(formElement);
201      }
202      for (Vector<FormDataList::Item>::const_iterator j(items.begin()); j != items.end(); ++j) {
203          // Handle ISINDEX / <input name=isindex> specially, but only if it's
204          // the first entry.
205          if (!encodedString->isEmpty() || j->data() != "isindex") {
206              if (!encodedString->isEmpty())
207                  encodedString->append('&');
208              FormDataBuilder::encodeStringAsFormData(*encodedString, j->data());
209              encodedString->append('=');
210          }
211          ++j;
212          if (formElement == textElement)
213              encodedString->append("{searchTerms}", 13);
214          else
215              FormDataBuilder::encodeStringAsFormData(*encodedString, j->data());
216      }
217    }
218
219    return textElement;
220}
221
222} // namespace
223
224namespace WebKit {
225
226WebSearchableFormData::WebSearchableFormData(const WebFormElement& form)
227{
228    RefPtr<HTMLFormElement> formElement = form.operator PassRefPtr<HTMLFormElement>();
229    const Frame* frame = formElement->document()->frame();
230    if (!frame)
231        return;
232
233    // Only consider forms that GET data and the action targets an http page.
234    if (equalIgnoringCase(formElement->getAttribute(HTMLNames::methodAttr), "post") || !IsHTTPFormSubmit(formElement.get()))
235        return;
236
237    HTMLFormControlElement* firstSubmitButton = GetButtonToActivate(formElement.get());
238    if (firstSubmitButton) {
239        // The form does not have an active submit button, make the first button
240        // active. We need to do this, otherwise the URL will not contain the
241        // name of the submit button.
242        firstSubmitButton->setActivatedSubmit(true);
243    }
244    Vector<char> encodedString;
245    String encoding;
246    bool hasElement = HasSuitableTextElement(formElement.get(), &encodedString, &encoding);
247    if (firstSubmitButton)
248        firstSubmitButton->setActivatedSubmit(false);
249    if (!hasElement) {
250        // Not a searchable form.
251        return;
252    }
253
254    String action(formElement->action());
255    KURL url(frame->loader()->completeURL(action.isNull() ? "" : action));
256    RefPtr<FormData> formData = FormData::create(encodedString);
257    url.setQuery(formData->flattenToString());
258    m_url = url;
259    m_encoding = encoding;
260}
261
262} // namespace WebKit
263