1edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep# -*- coding: iso-8859-1 -*-
2edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep"""A lexical analyzer class for simple shell-like syntaxes."""
3edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
4edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep# Module and documentation by Eric S. Raymond, 21 Dec 1998
5edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep# Input stacking and error message cleanup added by ESR, March 2000
6edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep# push_source() and pop_source() made explicit by ESR, January 2001.
7edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep# Posix compliance, split(), string arguments, and
8edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep# iterator interface by Gustavo Niemeyer, April 2003.
9edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
10edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoepimport os.path
11edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoepimport sys
12edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoepfrom collections import deque
13edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
14edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoeptry:
15edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    from cStringIO import StringIO
16edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoepexcept ImportError:
17edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    from StringIO import StringIO
18edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
19edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep__all__ = ["shlex", "split"]
20edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
21edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoepclass shlex:
22edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    "A lexical analyzer class for simple shell-like syntaxes."
23edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def __init__(self, instream=None, infile=None, posix=False):
24edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        if isinstance(instream, basestring):
25edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            instream = StringIO(instream)
26edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        if instream is not None:
27edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            self.instream = instream
28edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            self.infile = infile
29edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        else:
30edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            self.instream = sys.stdin
31edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            self.infile = None
32edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.posix = posix
33edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        if posix:
34edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            self.eof = None
35edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        else:
36edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            self.eof = ''
37edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.commenters = '#'
38edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.wordchars = ('abcdfeghijklmnopqrstuvwxyz'
39edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                          'ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_')
40edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        if self.posix:
41edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            self.wordchars += ('��������������������������������'
42edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                               '������������������������������')
43edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.whitespace = ' \t\r\n'
44edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.whitespace_split = False
45edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.quotes = '\'"'
46edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.escape = '\\'
47edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.escapedquotes = '"'
48edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.state = ' '
49edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.pushback = deque()
50edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.lineno = 1
51edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.debug = 0
52edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.token = ''
53edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.filestack = deque()
54edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.source = None
55edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        if self.debug:
56edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            print 'shlex: reading from %s, line %d' \
57edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                  % (self.instream, self.lineno)
58edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
59edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def push_token(self, tok):
60edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        "Push a token onto the stack popped by the get_token method"
61edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        if self.debug >= 1:
62edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            print "shlex: pushing token " + repr(tok)
63edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.pushback.appendleft(tok)
64edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
65edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def push_source(self, newstream, newfile=None):
66edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        "Push an input source onto the lexer's input source stack."
67edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        if isinstance(newstream, basestring):
68edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            newstream = StringIO(newstream)
69edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.filestack.appendleft((self.infile, self.instream, self.lineno))
70edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.infile = newfile
71edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.instream = newstream
72edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.lineno = 1
73edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        if self.debug:
74edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            if newfile is not None:
75edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                print 'shlex: pushing to file %s' % (self.infile,)
76edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            else:
77edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                print 'shlex: pushing to stream %s' % (self.instream,)
78edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
79edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def pop_source(self):
80edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        "Pop the input source stack."
81edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.instream.close()
82edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        (self.infile, self.instream, self.lineno) = self.filestack.popleft()
83edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        if self.debug:
84edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            print 'shlex: popping to %s, line %d' \
85edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                  % (self.instream, self.lineno)
86edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.state = ' '
87edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
88edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def get_token(self):
89edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        "Get a token from the input stream (or from stack if it's nonempty)"
90edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        if self.pushback:
91edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            tok = self.pushback.popleft()
92edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            if self.debug >= 1:
93edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                print "shlex: popping token " + repr(tok)
94edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            return tok
95edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        # No pushback.  Get a token.
96edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        raw = self.read_token()
97edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        # Handle inclusions
98edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        if self.source is not None:
99edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            while raw == self.source:
100edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                spec = self.sourcehook(self.read_token())
101edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                if spec:
102edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                    (newfile, newstream) = spec
103edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                    self.push_source(newstream, newfile)
104edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                raw = self.get_token()
105edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        # Maybe we got EOF instead?
106edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        while raw == self.eof:
107edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            if not self.filestack:
108edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                return self.eof
109edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            else:
110edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                self.pop_source()
111edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                raw = self.get_token()
112edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        # Neither inclusion nor EOF
113edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        if self.debug >= 1:
114edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            if raw != self.eof:
115edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                print "shlex: token=" + repr(raw)
116edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            else:
117edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                print "shlex: token=EOF"
118edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        return raw
119edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
120edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def read_token(self):
121edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        quoted = False
122edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        escapedstate = ' '
123edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        while True:
124edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            nextchar = self.instream.read(1)
125edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            if nextchar == '\n':
126edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                self.lineno = self.lineno + 1
127edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            if self.debug >= 3:
128edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                print "shlex: in state", repr(self.state), \
129edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                      "I see character:", repr(nextchar)
130edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            if self.state is None:
131edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                self.token = ''        # past end of file
132edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                break
133edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            elif self.state == ' ':
134edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                if not nextchar:
135edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                    self.state = None  # end of file
136edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                    break
137edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                elif nextchar in self.whitespace:
138edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                    if self.debug >= 2:
139edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                        print "shlex: I see whitespace in whitespace state"
140edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                    if self.token or (self.posix and quoted):
141edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                        break   # emit current token
142edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                    else:
143edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                        continue
144edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                elif nextchar in self.commenters:
145edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                    self.instream.readline()
146edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                    self.lineno = self.lineno + 1
147edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                elif self.posix and nextchar in self.escape:
148edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                    escapedstate = 'a'
149edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                    self.state = nextchar
150edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                elif nextchar in self.wordchars:
151edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                    self.token = nextchar
152edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                    self.state = 'a'
153edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                elif nextchar in self.quotes:
154edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                    if not self.posix:
155edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                        self.token = nextchar
156edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                    self.state = nextchar
157edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                elif self.whitespace_split:
158edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                    self.token = nextchar
159edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                    self.state = 'a'
160edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                else:
161edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                    self.token = nextchar
162edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                    if self.token or (self.posix and quoted):
163edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                        break   # emit current token
164edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                    else:
165edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                        continue
166edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            elif self.state in self.quotes:
167edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                quoted = True
168edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                if not nextchar:      # end of file
169edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                    if self.debug >= 2:
170edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                        print "shlex: I see EOF in quotes state"
171edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                    # XXX what error should be raised here?
172edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                    raise ValueError, "No closing quotation"
173edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                if nextchar == self.state:
174edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                    if not self.posix:
175edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                        self.token = self.token + nextchar
176edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                        self.state = ' '
177edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                        break
178edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                    else:
179edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                        self.state = 'a'
180edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                elif self.posix and nextchar in self.escape and \
181edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                     self.state in self.escapedquotes:
182edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                    escapedstate = self.state
183edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                    self.state = nextchar
184edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                else:
185edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                    self.token = self.token + nextchar
186edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            elif self.state in self.escape:
187edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                if not nextchar:      # end of file
188edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                    if self.debug >= 2:
189edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                        print "shlex: I see EOF in escape state"
190edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                    # XXX what error should be raised here?
191edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                    raise ValueError, "No escaped character"
192edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                # In posix shells, only the quote itself or the escape
193edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                # character may be escaped within quotes.
194edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                if escapedstate in self.quotes and \
195edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                   nextchar != self.state and nextchar != escapedstate:
196edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                    self.token = self.token + self.state
197edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                self.token = self.token + nextchar
198edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                self.state = escapedstate
199edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            elif self.state == 'a':
200edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                if not nextchar:
201edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                    self.state = None   # end of file
202edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                    break
203edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                elif nextchar in self.whitespace:
204edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                    if self.debug >= 2:
205edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                        print "shlex: I see whitespace in word state"
206edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                    self.state = ' '
207edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                    if self.token or (self.posix and quoted):
208edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                        break   # emit current token
209edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                    else:
210edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                        continue
211edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                elif nextchar in self.commenters:
212edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                    self.instream.readline()
213edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                    self.lineno = self.lineno + 1
214edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                    if self.posix:
215edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                        self.state = ' '
216edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                        if self.token or (self.posix and quoted):
217edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                            break   # emit current token
218edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                        else:
219edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                            continue
220edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                elif self.posix and nextchar in self.quotes:
221edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                    self.state = nextchar
222edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                elif self.posix and nextchar in self.escape:
223edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                    escapedstate = 'a'
224edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                    self.state = nextchar
225edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                elif nextchar in self.wordchars or nextchar in self.quotes \
226edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                    or self.whitespace_split:
227edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                    self.token = self.token + nextchar
228edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                else:
229edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                    self.pushback.appendleft(nextchar)
230edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                    if self.debug >= 2:
231edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                        print "shlex: I see punctuation in word state"
232edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                    self.state = ' '
233edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                    if self.token:
234edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                        break   # emit current token
235edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                    else:
236edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                        continue
237edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        result = self.token
238edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        self.token = ''
239edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        if self.posix and not quoted and result == '':
240edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            result = None
241edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        if self.debug > 1:
242edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            if result:
243edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                print "shlex: raw token=" + repr(result)
244edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            else:
245edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep                print "shlex: raw token=EOF"
246edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        return result
247edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
248edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def sourcehook(self, newfile):
249edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        "Hook called on a filename to be sourced."
250edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        if newfile[0] == '"':
251edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            newfile = newfile[1:-1]
252edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        # This implements cpp-like semantics for relative-path inclusion.
253edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        if isinstance(self.infile, basestring) and not os.path.isabs(newfile):
254edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            newfile = os.path.join(os.path.dirname(self.infile), newfile)
255edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        return (newfile, open(newfile, "r"))
256edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
257edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def error_leader(self, infile=None, lineno=None):
258edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        "Emit a C-compiler-like, Emacs-friendly error-message leader."
259edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        if infile is None:
260edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            infile = self.infile
261edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        if lineno is None:
262edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            lineno = self.lineno
263edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        return "\"%s\", line %d: " % (infile, lineno)
264edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
265edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def __iter__(self):
266edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        return self
267edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
268edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    def next(self):
269edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        token = self.get_token()
270edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        if token == self.eof:
271edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            raise StopIteration
272edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        return token
273edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
274edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoepdef split(s, comments=False, posix=True):
275edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    lex = shlex(s, posix=posix)
276edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    lex.whitespace_split = True
277edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    if not comments:
278edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        lex.commenters = ''
279edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    return list(lex)
280edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep
281edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoepif __name__ == '__main__':
282edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    if len(sys.argv) == 1:
283edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        lexer = shlex()
284edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    else:
285edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        file = sys.argv[1]
286edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        lexer = shlex(open(file), file)
287edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep    while 1:
288edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        tt = lexer.get_token()
289edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        if tt:
290edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            print "Token: " + repr(tt)
291edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep        else:
292edbb763a2b63074cd468a5d33a17908b2cc0654Jeff Vander Stoep            break
293