15c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)# Copyright (C) 2011 Google Inc. All rights reserved.
25c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)#
35c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)# Redistribution and use in source and binary forms, with or without
45c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)# modification, are permitted provided that the following conditions are
55c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)# met:
65c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)#
75c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)#     * Redistributions of source code must retain the above copyright
85c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)# notice, this list of conditions and the following disclaimer.
95c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)#     * Redistributions in binary form must reproduce the above
105c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)# copyright notice, this list of conditions and the following disclaimer
115c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)# in the documentation and/or other materials provided with the
125c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)# distribution.
135c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)
145c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
155c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
165c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
175c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
185c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
195c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
205c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
215c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
225c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
235c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
245c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
255c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)
265c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)"""Utility module for reftests."""
275c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)
285c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)
295c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)from HTMLParser import HTMLParser
305c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)
315c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)
325c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)class ExtractReferenceLinkParser(HTMLParser):
335c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)
345c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    def __init__(self):
355c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)        HTMLParser.__init__(self)
365c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)        self.matches = []
375c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)        self.mismatches = []
385c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)
395c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    def handle_starttag(self, tag, attrs):
405c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)        if tag != "link":
415c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)            return
425c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)        attrs = dict(attrs)
435c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)        if not "rel" in attrs:
445c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)            return
455c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)        if not "href" in attrs:
465c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)            return
475c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)        if attrs["rel"] == "match":
485c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)            self.matches.append(attrs["href"])
495c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)        if attrs["rel"] == "mismatch":
505c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)            self.mismatches.append(attrs["href"])
515c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)
525c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)
535c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)def get_reference_link(html_string):
545c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    """Returns reference links in the given html_string.
555c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)
565c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    Returns:
575c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)        a tuple of two URL lists, (matches, mismatches).
585c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    """
595c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    parser = ExtractReferenceLinkParser()
605c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    parser.feed(html_string)
615c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    parser.close()
625c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)
635c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    return parser.matches, parser.mismatches
64