14710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 24710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm""" 34710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmcsv.py - read/write/investigate CSV files 44710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm""" 54710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 64710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmimport re 74710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmfrom functools import reduce 84710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmfrom _csv import Error, __version__, writer, reader, register_dialect, \ 94710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm unregister_dialect, get_dialect, list_dialects, \ 104710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm field_size_limit, \ 114710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm QUOTE_MINIMAL, QUOTE_ALL, QUOTE_NONNUMERIC, QUOTE_NONE, \ 124710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm __doc__ 134710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmfrom _csv import Dialect as _Dialect 144710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 154710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmtry: 164710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm from cStringIO import StringIO 174710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmexcept ImportError: 184710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm from StringIO import StringIO 194710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 204710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm__all__ = [ "QUOTE_MINIMAL", "QUOTE_ALL", "QUOTE_NONNUMERIC", "QUOTE_NONE", 214710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm "Error", "Dialect", "__doc__", "excel", "excel_tab", 224710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm "field_size_limit", "reader", "writer", 234710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm "register_dialect", "get_dialect", "list_dialects", "Sniffer", 244710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm "unregister_dialect", "__version__", "DictReader", "DictWriter" ] 254710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 264710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmclass Dialect: 274710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """Describe an Excel dialect. 284710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 294710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm This must be subclassed (see csv.excel). Valid attributes are: 304710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm delimiter, quotechar, escapechar, doublequote, skipinitialspace, 314710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm lineterminator, quoting. 324710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 334710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """ 344710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm _name = "" 354710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm _valid = False 364710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # placeholders 374710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm delimiter = None 384710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm quotechar = None 394710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm escapechar = None 404710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm doublequote = None 414710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm skipinitialspace = None 424710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm lineterminator = None 434710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm quoting = None 444710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 454710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def __init__(self): 464710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if self.__class__ != Dialect: 474710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self._valid = True 484710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self._validate() 494710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 504710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def _validate(self): 514710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm try: 524710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm _Dialect(self) 534710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm except TypeError, e: 544710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # We do this for compatibility with py2.3 554710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm raise Error(str(e)) 564710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 574710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmclass excel(Dialect): 584710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """Describe the usual properties of Excel-generated CSV files.""" 594710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm delimiter = ',' 604710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm quotechar = '"' 614710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm doublequote = True 624710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm skipinitialspace = False 634710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm lineterminator = '\r\n' 644710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm quoting = QUOTE_MINIMAL 654710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmregister_dialect("excel", excel) 664710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 674710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmclass excel_tab(excel): 684710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """Describe the usual properties of Excel-generated TAB-delimited files.""" 694710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm delimiter = '\t' 704710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmregister_dialect("excel-tab", excel_tab) 714710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 724710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 734710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmclass DictReader: 744710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def __init__(self, f, fieldnames=None, restkey=None, restval=None, 754710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm dialect="excel", *args, **kwds): 764710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self._fieldnames = fieldnames # list of keys for the dict 774710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.restkey = restkey # key to catch long rows 784710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.restval = restval # default value for short rows 794710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.reader = reader(f, dialect, *args, **kwds) 804710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.dialect = dialect 814710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.line_num = 0 824710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 834710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def __iter__(self): 844710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return self 854710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 864710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm @property 874710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def fieldnames(self): 884710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if self._fieldnames is None: 894710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm try: 904710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self._fieldnames = self.reader.next() 914710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm except StopIteration: 924710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm pass 934710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.line_num = self.reader.line_num 944710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return self._fieldnames 954710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 964710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm @fieldnames.setter 974710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def fieldnames(self, value): 984710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self._fieldnames = value 994710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 1004710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def next(self): 1014710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if self.line_num == 0: 1024710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # Used only for its side effect. 1034710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.fieldnames 1044710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm row = self.reader.next() 1054710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.line_num = self.reader.line_num 1064710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 1074710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # unlike the basic reader, we prefer not to return blanks, 1084710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # because we will typically wind up with a dict full of None 1094710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # values 1104710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm while row == []: 1114710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm row = self.reader.next() 1124710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm d = dict(zip(self.fieldnames, row)) 1134710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm lf = len(self.fieldnames) 1144710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm lr = len(row) 1154710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if lf < lr: 1164710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm d[self.restkey] = row[lf:] 1174710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm elif lf > lr: 1184710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm for key in self.fieldnames[lr:]: 1194710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm d[key] = self.restval 1204710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return d 1214710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 1224710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 1234710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmclass DictWriter: 1244710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def __init__(self, f, fieldnames, restval="", extrasaction="raise", 1254710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm dialect="excel", *args, **kwds): 1264710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.fieldnames = fieldnames # list of keys for the dict 1274710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.restval = restval # for writing short dicts 1284710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if extrasaction.lower() not in ("raise", "ignore"): 1294710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm raise ValueError, \ 1304710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ("extrasaction (%s) must be 'raise' or 'ignore'" % 1314710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm extrasaction) 1324710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.extrasaction = extrasaction 1334710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.writer = writer(f, dialect, *args, **kwds) 1344710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 1354710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def writeheader(self): 1364710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm header = dict(zip(self.fieldnames, self.fieldnames)) 1374710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.writerow(header) 1384710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 1394710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def _dict_to_list(self, rowdict): 1404710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if self.extrasaction == "raise": 1414710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm wrong_fields = [k for k in rowdict if k not in self.fieldnames] 1424710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if wrong_fields: 1434710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm raise ValueError("dict contains fields not in fieldnames: " + 1444710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ", ".join(wrong_fields)) 1454710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return [rowdict.get(key, self.restval) for key in self.fieldnames] 1464710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 1474710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def writerow(self, rowdict): 1484710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return self.writer.writerow(self._dict_to_list(rowdict)) 1494710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 1504710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def writerows(self, rowdicts): 1514710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm rows = [] 1524710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm for rowdict in rowdicts: 1534710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm rows.append(self._dict_to_list(rowdict)) 1544710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return self.writer.writerows(rows) 1554710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 1564710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# Guard Sniffer's type checking against builds that exclude complex() 1574710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmtry: 1584710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm complex 1594710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmexcept NameError: 1604710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm complex = float 1614710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 1624710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmclass Sniffer: 1634710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ''' 1644710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm "Sniffs" the format of a CSV file (i.e. delimiter, quotechar) 1654710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm Returns a Dialect object. 1664710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ''' 1674710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def __init__(self): 1684710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # in case there is more than one possible delimiter 1694710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.preferred = [',', '\t', ';', ' ', ':'] 1704710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 1714710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 1724710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def sniff(self, sample, delimiters=None): 1734710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """ 1744710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm Returns a dialect (or None) corresponding to the sample 1754710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """ 1764710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 1774710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm quotechar, doublequote, delimiter, skipinitialspace = \ 1784710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self._guess_quote_and_delimiter(sample, delimiters) 1794710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if not delimiter: 1804710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm delimiter, skipinitialspace = self._guess_delimiter(sample, 1814710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm delimiters) 1824710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 1834710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if not delimiter: 1844710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm raise Error, "Could not determine delimiter" 1854710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 1864710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm class dialect(Dialect): 1874710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm _name = "sniffed" 1884710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm lineterminator = '\r\n' 1894710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm quoting = QUOTE_MINIMAL 1904710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # escapechar = '' 1914710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 1924710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm dialect.doublequote = doublequote 1934710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm dialect.delimiter = delimiter 1944710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # _csv.reader won't accept a quotechar of '' 1954710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm dialect.quotechar = quotechar or '"' 1964710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm dialect.skipinitialspace = skipinitialspace 1974710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 1984710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return dialect 1994710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 2004710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 2014710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def _guess_quote_and_delimiter(self, data, delimiters): 2024710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """ 2034710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm Looks for text enclosed between two identical quotes 2044710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm (the probable quotechar) which are preceded and followed 2054710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm by the same character (the probable delimiter). 2064710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm For example: 2074710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ,'some text', 2084710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm The quote with the most wins, same with the delimiter. 2094710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm If there is no quotechar the delimiter can't be determined 2104710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm this way. 2114710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """ 2124710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 2134710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm matches = [] 2144710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm for restr in ('(?P<delim>[^\w\n"\'])(?P<space> ?)(?P<quote>["\']).*?(?P=quote)(?P=delim)', # ,".*?", 2154710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm '(?:^|\n)(?P<quote>["\']).*?(?P=quote)(?P<delim>[^\w\n"\'])(?P<space> ?)', # ".*?", 2164710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm '(?P<delim>>[^\w\n"\'])(?P<space> ?)(?P<quote>["\']).*?(?P=quote)(?:$|\n)', # ,".*?" 2174710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm '(?:^|\n)(?P<quote>["\']).*?(?P=quote)(?:$|\n)'): # ".*?" (no delim, no space) 2184710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm regexp = re.compile(restr, re.DOTALL | re.MULTILINE) 2194710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm matches = regexp.findall(data) 2204710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if matches: 2214710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm break 2224710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 2234710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if not matches: 2244710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # (quotechar, doublequote, delimiter, skipinitialspace) 2254710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return ('', False, None, 0) 2264710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm quotes = {} 2274710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm delims = {} 2284710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm spaces = 0 2294710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm for m in matches: 2304710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm n = regexp.groupindex['quote'] - 1 2314710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm key = m[n] 2324710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if key: 2334710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm quotes[key] = quotes.get(key, 0) + 1 2344710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm try: 2354710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm n = regexp.groupindex['delim'] - 1 2364710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm key = m[n] 2374710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm except KeyError: 2384710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm continue 2394710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if key and (delimiters is None or key in delimiters): 2404710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm delims[key] = delims.get(key, 0) + 1 2414710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm try: 2424710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm n = regexp.groupindex['space'] - 1 2434710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm except KeyError: 2444710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm continue 2454710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if m[n]: 2464710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm spaces += 1 2474710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 2484710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm quotechar = reduce(lambda a, b, quotes = quotes: 2494710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm (quotes[a] > quotes[b]) and a or b, quotes.keys()) 2504710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 2514710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if delims: 2524710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm delim = reduce(lambda a, b, delims = delims: 2534710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm (delims[a] > delims[b]) and a or b, delims.keys()) 2544710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm skipinitialspace = delims[delim] == spaces 2554710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if delim == '\n': # most likely a file with a single column 2564710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm delim = '' 2574710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm else: 2584710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # there is *no* delimiter, it's a single column of quoted data 2594710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm delim = '' 2604710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm skipinitialspace = 0 2614710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 2624710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # if we see an extra quote between delimiters, we've got a 2634710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # double quoted format 2644710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm dq_regexp = re.compile(r"((%(delim)s)|^)\W*%(quote)s[^%(delim)s\n]*%(quote)s[^%(delim)s\n]*%(quote)s\W*((%(delim)s)|$)" % \ 2654710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm {'delim':delim, 'quote':quotechar}, re.MULTILINE) 2664710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 2674710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 2684710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 2694710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if dq_regexp.search(data): 2704710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm doublequote = True 2714710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm else: 2724710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm doublequote = False 2734710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 2744710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return (quotechar, doublequote, delim, skipinitialspace) 2754710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 2764710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 2774710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def _guess_delimiter(self, data, delimiters): 2784710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """ 2794710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm The delimiter /should/ occur the same number of times on 2804710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm each row. However, due to malformed data, it may not. We don't want 2814710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm an all or nothing approach, so we allow for small variations in this 2824710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm number. 2834710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 1) build a table of the frequency of each character on every line. 2844710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 2) build a table of frequencies of this frequency (meta-frequency?), 2854710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm e.g. 'x occurred 5 times in 10 rows, 6 times in 1000 rows, 2864710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 7 times in 2 rows' 2874710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 3) use the mode of the meta-frequency to determine the /expected/ 2884710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm frequency for that character 2894710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 4) find out how often the character actually meets that goal 2904710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 5) the character that best meets its goal is the delimiter 2914710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm For performance reasons, the data is evaluated in chunks, so it can 2924710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm try and evaluate the smallest portion of the data possible, evaluating 2934710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm additional chunks as necessary. 2944710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """ 2954710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 2964710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm data = filter(None, data.split('\n')) 2974710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 2984710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ascii = [chr(c) for c in range(127)] # 7-bit ASCII 2994710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 3004710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # build frequency tables 3014710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm chunkLength = min(10, len(data)) 3024710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm iteration = 0 3034710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm charFrequency = {} 3044710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm modes = {} 3054710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm delims = {} 3064710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm start, end = 0, min(chunkLength, len(data)) 3074710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm while start < len(data): 3084710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm iteration += 1 3094710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm for line in data[start:end]: 3104710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm for char in ascii: 3114710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm metaFrequency = charFrequency.get(char, {}) 3124710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # must count even if frequency is 0 3134710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm freq = line.count(char) 3144710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # value is the mode 3154710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm metaFrequency[freq] = metaFrequency.get(freq, 0) + 1 3164710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm charFrequency[char] = metaFrequency 3174710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 3184710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm for char in charFrequency.keys(): 3194710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm items = charFrequency[char].items() 3204710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if len(items) == 1 and items[0][0] == 0: 3214710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm continue 3224710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # get the mode of the frequencies 3234710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if len(items) > 1: 3244710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm modes[char] = reduce(lambda a, b: a[1] > b[1] and a or b, 3254710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm items) 3264710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # adjust the mode - subtract the sum of all 3274710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # other frequencies 3284710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm items.remove(modes[char]) 3294710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm modes[char] = (modes[char][0], modes[char][1] 3304710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm - reduce(lambda a, b: (0, a[1] + b[1]), 3314710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm items)[1]) 3324710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm else: 3334710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm modes[char] = items[0] 3344710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 3354710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # build a list of possible delimiters 3364710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm modeList = modes.items() 3374710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm total = float(chunkLength * iteration) 3384710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # (rows of consistent data) / (number of rows) = 100% 3394710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm consistency = 1.0 3404710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # minimum consistency threshold 3414710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm threshold = 0.9 3424710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm while len(delims) == 0 and consistency >= threshold: 3434710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm for k, v in modeList: 3444710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if v[0] > 0 and v[1] > 0: 3454710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if ((v[1]/total) >= consistency and 3464710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm (delimiters is None or k in delimiters)): 3474710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm delims[k] = v 3484710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm consistency -= 0.01 3494710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 3504710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if len(delims) == 1: 3514710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm delim = delims.keys()[0] 3524710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm skipinitialspace = (data[0].count(delim) == 3534710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm data[0].count("%c " % delim)) 3544710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return (delim, skipinitialspace) 3554710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 3564710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # analyze another chunkLength lines 3574710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm start = end 3584710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm end += chunkLength 3594710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 3604710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if not delims: 3614710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return ('', 0) 3624710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 3634710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # if there's more than one, fall back to a 'preferred' list 3644710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if len(delims) > 1: 3654710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm for d in self.preferred: 3664710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if d in delims.keys(): 3674710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm skipinitialspace = (data[0].count(d) == 3684710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm data[0].count("%c " % d)) 3694710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return (d, skipinitialspace) 3704710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 3714710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # nothing else indicates a preference, pick the character that 3724710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # dominates(?) 3734710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm items = [(v,k) for (k,v) in delims.items()] 3744710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm items.sort() 3754710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm delim = items[-1][1] 3764710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 3774710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm skipinitialspace = (data[0].count(delim) == 3784710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm data[0].count("%c " % delim)) 3794710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return (delim, skipinitialspace) 3804710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 3814710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 3824710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def has_header(self, sample): 3834710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # Creates a dictionary of types of data in each column. If any 3844710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # column is of a single type (say, integers), *except* for the first 3854710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # row, then the first row is presumed to be labels. If the type 3864710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # can't be determined, it is assumed to be a string in which case 3874710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # the length of the string is the determining factor: if all of the 3884710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # rows except for the first are the same length, it's a header. 3894710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # Finally, a 'vote' is taken at the end for each column, adding or 3904710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # subtracting from the likelihood of the first row being a header. 3914710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 3924710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm rdr = reader(StringIO(sample), self.sniff(sample)) 3934710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 3944710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm header = rdr.next() # assume first row is header 3954710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 3964710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm columns = len(header) 3974710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm columnTypes = {} 3984710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm for i in range(columns): columnTypes[i] = None 3994710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 4004710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm checked = 0 4014710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm for row in rdr: 4024710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # arbitrary number of rows to check, to keep it sane 4034710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if checked > 20: 4044710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm break 4054710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm checked += 1 4064710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 4074710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if len(row) != columns: 4084710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm continue # skip rows that have irregular number of columns 4094710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 4104710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm for col in columnTypes.keys(): 4114710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 4124710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm for thisType in [int, long, float, complex]: 4134710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm try: 4144710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm thisType(row[col]) 4154710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm break 4164710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm except (ValueError, OverflowError): 4174710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm pass 4184710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm else: 4194710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # fallback to length of string 4204710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm thisType = len(row[col]) 4214710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 4224710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # treat longs as ints 4234710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if thisType == long: 4244710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm thisType = int 4254710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 4264710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if thisType != columnTypes[col]: 4274710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if columnTypes[col] is None: # add new column type 4284710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm columnTypes[col] = thisType 4294710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm else: 4304710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # type is inconsistent, remove column from 4314710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # consideration 4324710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm del columnTypes[col] 4334710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 4344710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # finally, compare results against first row and "vote" 4354710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # on whether it's a header 4364710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm hasHeader = 0 4374710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm for col, colType in columnTypes.items(): 4384710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if type(colType) == type(0): # it's a length 4394710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if len(header[col]) != colType: 4404710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm hasHeader += 1 4414710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm else: 4424710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm hasHeader -= 1 4434710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm else: # attempt typecast 4444710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm try: 4454710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm colType(header[col]) 4464710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm except (ValueError, TypeError): 4474710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm hasHeader += 1 4484710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm else: 4494710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm hasHeader -= 1 4504710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 4514710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return hasHeader > 0 452