1#!/usr/bin/env python
2
3# Copyright (C) 2013 Adobe Systems Incorporated. All rights reserved.
4#
5# Redistribution and use in source and binary forms, with or without
6# modification, are permitted provided that the following conditions
7# are met:
8#
9# 1. Redistributions of source code must retain the above
10#    copyright notice, this list of conditions and the following
11#    disclaimer.
12# 2. Redistributions in binary form must reproduce the above
13#    copyright notice, this list of conditions and the following
14#    disclaimer in the documentation and/or other materials
15#    provided with the distribution.
16#
17# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER "AS IS" AND ANY
18# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
20# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE
21# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,
22# OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
23# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
24# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
26# TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
27# THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28# SUCH DAMAGE.
29
30import logging
31import re
32
33from webkitpy.common.host import Host
34from webkitpy.thirdparty.BeautifulSoup import BeautifulSoup as Parser
35
36
37_log = logging.getLogger(__name__)
38
39
40class TestParser(object):
41
42    def __init__(self, options, filename):
43        self.options = options
44        self.filename = filename
45        self.host = Host()
46        self.filesystem = self.host.filesystem
47
48        self.test_doc = None
49        self.ref_doc = None
50        self.load_file(filename)
51
52    def load_file(self, filename):
53        if self.filesystem.isfile(filename):
54            try:
55                self.test_doc = Parser(self.filesystem.read_binary_file(filename))
56            except:
57                # FIXME: Figure out what to do if we can't parse the file.
58                _log.error("Failed to parse %s", filename)
59                self.test_doc is None
60        else:
61            if self.filesystem.isdir(filename):
62                # FIXME: Figure out what is triggering this and what to do about it.
63                _log.error("Trying to load %s, which is a directory", filename)
64            self.test_doc = None
65        self.ref_doc = None
66
67    def analyze_test(self, test_contents=None, ref_contents=None):
68        """ Analyzes a file to determine if it's a test, what type of test, and what reference or support files it requires. Returns all of the test info """
69
70        test_info = None
71
72        if test_contents is None and self.test_doc is None:
73            return test_info
74
75        if test_contents is not None:
76            self.test_doc = Parser(test_contents)
77
78        if ref_contents is not None:
79            self.ref_doc = Parser(ref_contents)
80
81        # First check if it's a reftest
82
83        matches = self.reference_links_of_type('match') + self.reference_links_of_type('mismatch')
84        if matches:
85            if len(matches) > 1:
86                # FIXME: Is this actually true? We should fix this.
87                _log.warning('Multiple references are not supported. Importing the first ref defined in %s',
88                             self.filesystem.basename(self.filename))
89
90            try:
91                ref_file = self.filesystem.join(self.filesystem.dirname(self.filename), matches[0]['href'])
92            except KeyError as e:
93                # FIXME: Figure out what to do w/ invalid test files.
94                _log.error('%s has a reference link but is missing the "href"', self.filesystem)
95                return None
96
97            if self.ref_doc is None:
98                self.ref_doc = self.load_file(ref_file)
99
100            test_info = {'test': self.filename, 'reference': ref_file}
101
102            # If the ref file path is relative, we need to check it for
103            # relative paths also because when it lands in WebKit, it will be
104            # moved down into the test dir.
105            #
106            # Note: The test files themselves are not checked for support files
107            # outside their directories as the convention in the CSSWG is to
108            # put all support files in the same dir or subdir as the test.
109            #
110            # All non-test files in the test's directory tree are normally
111            # copied as part of the import as they are assumed to be required
112            # support files.
113            #
114            # *But*, there is exactly one case in the entire css2.1 suite where
115            # a test depends on a file that lives in a different directory,
116            # which depends on another file that lives outside of its
117            # directory. This code covers that case :)
118            if matches[0]['href'].startswith('..'):
119                support_files = self.support_files(self.ref_doc)
120                test_info['refsupport'] = support_files
121
122        elif self.is_jstest():
123            test_info = {'test': self.filename, 'jstest': True}
124        elif self.options['all'] is True and not('-ref' in self.filename) and not('reference' in self.filename):
125            test_info = {'test': self.filename}
126
127        return test_info
128
129    def reference_links_of_type(self, reftest_type):
130        return self.test_doc.findAll(rel=reftest_type)
131
132    def is_jstest(self):
133        """Returns whether the file appears to be a jstest, by searching for usage of W3C-style testharness paths."""
134        return bool(self.test_doc.find(src=re.compile('[\'\"/]?/resources/testharness')))
135
136    def support_files(self, doc):
137        """ Searches the file for all paths specified in url()'s, href or src attributes."""
138        support_files = []
139
140        if doc is None:
141            return support_files
142
143        elements_with_src_attributes = doc.findAll(src=re.compile('.*'))
144        elements_with_href_attributes = doc.findAll(href=re.compile('.*'))
145
146        url_pattern = re.compile('url\(.*\)')
147        urls = []
148        for url in doc.findAll(text=url_pattern):
149            url = re.search(url_pattern, url)
150            url = re.sub('url\([\'\"]?', '', url.group(0))
151            url = re.sub('[\'\"]?\)', '', url)
152            urls.append(url)
153
154        src_paths = [src_tag['src'] for src_tag in elements_with_src_attributes]
155        href_paths = [href_tag['href'] for href_tag in elements_with_href_attributes]
156
157        paths = src_paths + href_paths + urls
158        for path in paths:
159            if not(path.startswith('http:')) and not(path.startswith('mailto:')):
160                support_files.append(path)
161
162        return support_files
163