1edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep# -*- coding: iso-8859-1 -*- 2edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep"""A lexical analyzer class for simple shell-like syntaxes.""" 3edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 4edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep# Module and documentation by Eric S. Raymond, 21 Dec 1998 5edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep# Input stacking and error message cleanup added by ESR, March 2000 6edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep# push_source() and pop_source() made explicit by ESR, January 2001. 7edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep# Posix compliance, split(), string arguments, and 8edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep# iterator interface by Gustavo Niemeyer, April 2003. 9edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 10edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoepimport os.path 11edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoepimport sys 12edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoepfrom collections import deque 13edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 14edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoeptry: 15edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep from cStringIO import StringIO 16edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoepexcept ImportError: 17edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep from StringIO import StringIO 18edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 19edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep__all__ = ["shlex", "split"] 20edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 21edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoepclass shlex: 22edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep "A lexical analyzer class for simple shell-like syntaxes." 23edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def __init__(self, instream=None, infile=None, posix=False): 24edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep if isinstance(instream, basestring): 25edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep instream = StringIO(instream) 26edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep if instream is not None: 27edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.instream = instream 28edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.infile = infile 29edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep else: 30edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.instream = sys.stdin 31edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.infile = None 32edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.posix = posix 33edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep if posix: 34edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.eof = None 35edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep else: 36edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.eof = '' 37edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.commenters = '#' 38edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.wordchars = ('abcdfeghijklmnopqrstuvwxyz' 39edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 'ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_') 40edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep if self.posix: 41edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.wordchars += ('��������������������������������' 42edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep '������������������������������') 43edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.whitespace = ' \t\r\n' 44edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.whitespace_split = False 45edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.quotes = '\'"' 46edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.escape = '\\' 47edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.escapedquotes = '"' 48edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.state = ' ' 49edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.pushback = deque() 50edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.lineno = 1 51edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.debug = 0 52edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.token = '' 53edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.filestack = deque() 54edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.source = None 55edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep if self.debug: 56edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep print 'shlex: reading from %s, line %d' \ 57edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep % (self.instream, self.lineno) 58edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 59edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def push_token(self, tok): 60edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep "Push a token onto the stack popped by the get_token method" 61edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep if self.debug >= 1: 62edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep print "shlex: pushing token " + repr(tok) 63edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.pushback.appendleft(tok) 64edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 65edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def push_source(self, newstream, newfile=None): 66edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep "Push an input source onto the lexer's input source stack." 67edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep if isinstance(newstream, basestring): 68edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep newstream = StringIO(newstream) 69edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.filestack.appendleft((self.infile, self.instream, self.lineno)) 70edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.infile = newfile 71edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.instream = newstream 72edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.lineno = 1 73edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep if self.debug: 74edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep if newfile is not None: 75edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep print 'shlex: pushing to file %s' % (self.infile,) 76edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep else: 77edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep print 'shlex: pushing to stream %s' % (self.instream,) 78edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 79edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def pop_source(self): 80edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep "Pop the input source stack." 81edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.instream.close() 82edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep (self.infile, self.instream, self.lineno) = self.filestack.popleft() 83edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep if self.debug: 84edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep print 'shlex: popping to %s, line %d' \ 85edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep % (self.instream, self.lineno) 86edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.state = ' ' 87edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 88edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def get_token(self): 89edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep "Get a token from the input stream (or from stack if it's nonempty)" 90edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep if self.pushback: 91edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep tok = self.pushback.popleft() 92edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep if self.debug >= 1: 93edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep print "shlex: popping token " + repr(tok) 94edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep return tok 95edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep # No pushback. Get a token. 96edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep raw = self.read_token() 97edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep # Handle inclusions 98edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep if self.source is not None: 99edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep while raw == self.source: 100edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep spec = self.sourcehook(self.read_token()) 101edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep if spec: 102edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep (newfile, newstream) = spec 103edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.push_source(newstream, newfile) 104edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep raw = self.get_token() 105edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep # Maybe we got EOF instead? 106edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep while raw == self.eof: 107edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep if not self.filestack: 108edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep return self.eof 109edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep else: 110edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.pop_source() 111edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep raw = self.get_token() 112edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep # Neither inclusion nor EOF 113edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep if self.debug >= 1: 114edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep if raw != self.eof: 115edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep print "shlex: token=" + repr(raw) 116edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep else: 117edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep print "shlex: token=EOF" 118edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep return raw 119edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 120edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def read_token(self): 121edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep quoted = False 122edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep escapedstate = ' ' 123edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep while True: 124edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep nextchar = self.instream.read(1) 125edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep if nextchar == '\n': 126edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.lineno = self.lineno + 1 127edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep if self.debug >= 3: 128edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep print "shlex: in state", repr(self.state), \ 129edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep "I see character:", repr(nextchar) 130edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep if self.state is None: 131edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.token = '' # past end of file 132edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep break 133edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep elif self.state == ' ': 134edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep if not nextchar: 135edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.state = None # end of file 136edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep break 137edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep elif nextchar in self.whitespace: 138edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep if self.debug >= 2: 139edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep print "shlex: I see whitespace in whitespace state" 140edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep if self.token or (self.posix and quoted): 141edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep break # emit current token 142edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep else: 143edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep continue 144edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep elif nextchar in self.commenters: 145edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.instream.readline() 146edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.lineno = self.lineno + 1 147edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep elif self.posix and nextchar in self.escape: 148edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep escapedstate = 'a' 149edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.state = nextchar 150edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep elif nextchar in self.wordchars: 151edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.token = nextchar 152edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.state = 'a' 153edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep elif nextchar in self.quotes: 154edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep if not self.posix: 155edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.token = nextchar 156edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.state = nextchar 157edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep elif self.whitespace_split: 158edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.token = nextchar 159edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.state = 'a' 160edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep else: 161edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.token = nextchar 162edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep if self.token or (self.posix and quoted): 163edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep break # emit current token 164edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep else: 165edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep continue 166edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep elif self.state in self.quotes: 167edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep quoted = True 168edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep if not nextchar: # end of file 169edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep if self.debug >= 2: 170edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep print "shlex: I see EOF in quotes state" 171edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep # XXX what error should be raised here? 172edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep raise ValueError, "No closing quotation" 173edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep if nextchar == self.state: 174edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep if not self.posix: 175edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.token = self.token + nextchar 176edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.state = ' ' 177edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep break 178edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep else: 179edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.state = 'a' 180edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep elif self.posix and nextchar in self.escape and \ 181edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.state in self.escapedquotes: 182edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep escapedstate = self.state 183edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.state = nextchar 184edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep else: 185edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.token = self.token + nextchar 186edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep elif self.state in self.escape: 187edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep if not nextchar: # end of file 188edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep if self.debug >= 2: 189edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep print "shlex: I see EOF in escape state" 190edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep # XXX what error should be raised here? 191edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep raise ValueError, "No escaped character" 192edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep # In posix shells, only the quote itself or the escape 193edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep # character may be escaped within quotes. 194edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep if escapedstate in self.quotes and \ 195edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep nextchar != self.state and nextchar != escapedstate: 196edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.token = self.token + self.state 197edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.token = self.token + nextchar 198edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.state = escapedstate 199edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep elif self.state == 'a': 200edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep if not nextchar: 201edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.state = None # end of file 202edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep break 203edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep elif nextchar in self.whitespace: 204edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep if self.debug >= 2: 205edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep print "shlex: I see whitespace in word state" 206edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.state = ' ' 207edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep if self.token or (self.posix and quoted): 208edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep break # emit current token 209edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep else: 210edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep continue 211edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep elif nextchar in self.commenters: 212edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.instream.readline() 213edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.lineno = self.lineno + 1 214edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep if self.posix: 215edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.state = ' ' 216edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep if self.token or (self.posix and quoted): 217edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep break # emit current token 218edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep else: 219edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep continue 220edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep elif self.posix and nextchar in self.quotes: 221edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.state = nextchar 222edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep elif self.posix and nextchar in self.escape: 223edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep escapedstate = 'a' 224edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.state = nextchar 225edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep elif nextchar in self.wordchars or nextchar in self.quotes \ 226edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep or self.whitespace_split: 227edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.token = self.token + nextchar 228edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep else: 229edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.pushback.appendleft(nextchar) 230edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep if self.debug >= 2: 231edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep print "shlex: I see punctuation in word state" 232edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.state = ' ' 233edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep if self.token: 234edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep break # emit current token 235edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep else: 236edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep continue 237edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep result = self.token 238edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep self.token = '' 239edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep if self.posix and not quoted and result == '': 240edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep result = None 241edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep if self.debug > 1: 242edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep if result: 243edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep print "shlex: raw token=" + repr(result) 244edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep else: 245edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep print "shlex: raw token=EOF" 246edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep return result 247edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 248edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def sourcehook(self, newfile): 249edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep "Hook called on a filename to be sourced." 250edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep if newfile[0] == '"': 251edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep newfile = newfile[1:-1] 252edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep # This implements cpp-like semantics for relative-path inclusion. 253edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep if isinstance(self.infile, basestring) and not os.path.isabs(newfile): 254edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep newfile = os.path.join(os.path.dirname(self.infile), newfile) 255edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep return (newfile, open(newfile, "r")) 256edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 257edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def error_leader(self, infile=None, lineno=None): 258edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep "Emit a C-compiler-like, Emacs-friendly error-message leader." 259edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep if infile is None: 260edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep infile = self.infile 261edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep if lineno is None: 262edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep lineno = self.lineno 263edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep return "\"%s\", line %d: " % (infile, lineno) 264edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 265edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def __iter__(self): 266edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep return self 267edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 268edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep def next(self): 269edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep token = self.get_token() 270edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep if token == self.eof: 271edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep raise StopIteration 272edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep return token 273edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 274edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoepdef split(s, comments=False, posix=True): 275edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep lex = shlex(s, posix=posix) 276edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep lex.whitespace_split = True 277edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep if not comments: 278edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep lex.commenters = '' 279edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep return list(lex) 280edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep 281edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoepif __name__ == '__main__': 282edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep if len(sys.argv) == 1: 283edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep lexer = shlex() 284edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep else: 285edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep file = sys.argv[1] 286edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep lexer = shlex(open(file), file) 287edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep while 1: 288edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep tt = lexer.get_token() 289edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep if tt: 290edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep print "Token: " + repr(tt) 291edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep else: 292edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep break 293