15c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)# Copyright (C) 2011 Google Inc. All rights reserved. 25c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)# 35c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)# Redistribution and use in source and binary forms, with or without 45c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)# modification, are permitted provided that the following conditions are 55c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)# met: 65c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)# 75c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)# * Redistributions of source code must retain the above copyright 85c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)# notice, this list of conditions and the following disclaimer. 95c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)# * Redistributions in binary form must reproduce the above 105c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)# copyright notice, this list of conditions and the following disclaimer 115c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)# in the documentation and/or other materials provided with the 125c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)# distribution. 135c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) 145c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 155c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 165c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 175c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 185c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 195c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 205c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 215c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 225c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 235c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 245c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 255c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) 265c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)"""Utility module for reftests.""" 275c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) 285c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) 295c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)from HTMLParser import HTMLParser 305c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) 315c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) 325c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)class ExtractReferenceLinkParser(HTMLParser): 335c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) 345c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) def __init__(self): 355c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) HTMLParser.__init__(self) 365c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) self.matches = [] 375c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) self.mismatches = [] 385c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) 395c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) def handle_starttag(self, tag, attrs): 405c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) if tag != "link": 415c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) return 425c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) attrs = dict(attrs) 435c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) if not "rel" in attrs: 445c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) return 455c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) if not "href" in attrs: 465c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) return 475c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) if attrs["rel"] == "match": 485c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) self.matches.append(attrs["href"]) 495c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) if attrs["rel"] == "mismatch": 505c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) self.mismatches.append(attrs["href"]) 515c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) 525c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) 535c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)def get_reference_link(html_string): 545c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) """Returns reference links in the given html_string. 555c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) 565c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) Returns: 575c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) a tuple of two URL lists, (matches, mismatches). 585c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) """ 595c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) parser = ExtractReferenceLinkParser() 605c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) parser.feed(html_string) 615c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) parser.close() 625c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) 635c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) return parser.matches, parser.mismatches 64