1#!/usr/bin/python
2"""
3Cartesian configuration format file parser.
4
5 Filter syntax:
6 , means OR
7 .. means AND
8 . means IMMEDIATELY-FOLLOWED-BY
9
10 Example:
11 qcow2..Fedora.14, RHEL.6..raw..boot, smp2..qcow2..migrate..ide
12 means match all dicts whose names have:
13 (qcow2 AND (Fedora IMMEDIATELY-FOLLOWED-BY 14)) OR
14 ((RHEL IMMEDIATELY-FOLLOWED-BY 6) AND raw AND boot) OR
15 (smp2 AND qcow2 AND migrate AND ide)
16
17 Note:
18 'qcow2..Fedora.14' is equivalent to 'Fedora.14..qcow2'.
19 'qcow2..Fedora.14' is not equivalent to 'qcow2..14.Fedora'.
20 'ide, scsi' is equivalent to 'scsi, ide'.
21
22 Filters can be used in 3 ways:
23 only <filter>
24 no <filter>
25 <filter>:
26 The last one starts a conditional block.
27
28@copyright: Red Hat 2008-2011
29"""
30
31import re, os, sys, optparse, collections
32
33class ParserError:
34    def __init__(self, msg, line=None, filename=None, linenum=None):
35        self.msg = msg
36        self.line = line
37        self.filename = filename
38        self.linenum = linenum
39
40    def __str__(self):
41        if self.line:
42            return "%s: %r (%s:%s)" % (self.msg, self.line,
43                                       self.filename, self.linenum)
44        else:
45            return "%s (%s:%s)" % (self.msg, self.filename, self.linenum)
46
47
48num_failed_cases = 5
49
50
51class Node(object):
52    def __init__(self):
53        self.name = []
54        self.dep = []
55        self.content = []
56        self.children = []
57        self.labels = set()
58        self.append_to_shortname = False
59        self.failed_cases = collections.deque()
60
61
62def _match_adjacent(block, ctx, ctx_set):
63    # TODO: explain what this function does
64    if block[0] not in ctx_set:
65        return 0
66    if len(block) == 1:
67        return 1
68    if block[1] not in ctx_set:
69        return int(ctx[-1] == block[0])
70    k = 0
71    i = ctx.index(block[0])
72    while i < len(ctx):
73        if k > 0 and ctx[i] != block[k]:
74            i -= k - 1
75            k = 0
76        if ctx[i] == block[k]:
77            k += 1
78            if k >= len(block):
79                break
80            if block[k] not in ctx_set:
81                break
82        i += 1
83    return k
84
85
86def _might_match_adjacent(block, ctx, ctx_set, descendant_labels):
87    matched = _match_adjacent(block, ctx, ctx_set)
88    for elem in block[matched:]:
89        if elem not in descendant_labels:
90            return False
91    return True
92
93
94# Filter must inherit from object (otherwise type() won't work)
95class Filter(object):
96    def __init__(self, s):
97        self.filter = []
98        for char in s:
99            if not (char.isalnum() or char.isspace() or char in ".,_-"):
100                raise ParserError("Illegal characters in filter")
101        for word in s.replace(",", " ").split():
102            word = [block.split(".") for block in word.split("..")]
103            for block in word:
104                for elem in block:
105                    if not elem:
106                        raise ParserError("Syntax error")
107            self.filter += [word]
108
109
110    def match(self, ctx, ctx_set):
111        for word in self.filter:
112            for block in word:
113                if _match_adjacent(block, ctx, ctx_set) != len(block):
114                    break
115            else:
116                return True
117        return False
118
119
120    def might_match(self, ctx, ctx_set, descendant_labels):
121        for word in self.filter:
122            for block in word:
123                if not _might_match_adjacent(block, ctx, ctx_set,
124                                             descendant_labels):
125                    break
126            else:
127                return True
128        return False
129
130
131class NoOnlyFilter(Filter):
132    def __init__(self, line):
133        Filter.__init__(self, line.split(None, 1)[1])
134        self.line = line
135
136
137class OnlyFilter(NoOnlyFilter):
138    def is_irrelevant(self, ctx, ctx_set, descendant_labels):
139        return self.match(ctx, ctx_set)
140
141
142    def requires_action(self, ctx, ctx_set, descendant_labels):
143        return not self.might_match(ctx, ctx_set, descendant_labels)
144
145
146    def might_pass(self, failed_ctx, failed_ctx_set, ctx, ctx_set,
147                   descendant_labels):
148        for word in self.filter:
149            for block in word:
150                if (_match_adjacent(block, ctx, ctx_set) >
151                    _match_adjacent(block, failed_ctx, failed_ctx_set)):
152                    return self.might_match(ctx, ctx_set, descendant_labels)
153        return False
154
155
156class NoFilter(NoOnlyFilter):
157    def is_irrelevant(self, ctx, ctx_set, descendant_labels):
158        return not self.might_match(ctx, ctx_set, descendant_labels)
159
160
161    def requires_action(self, ctx, ctx_set, descendant_labels):
162        return self.match(ctx, ctx_set)
163
164
165    def might_pass(self, failed_ctx, failed_ctx_set, ctx, ctx_set,
166                   descendant_labels):
167        for word in self.filter:
168            for block in word:
169                if (_match_adjacent(block, ctx, ctx_set) <
170                    _match_adjacent(block, failed_ctx, failed_ctx_set)):
171                    return not self.match(ctx, ctx_set)
172        return False
173
174
175class Condition(NoFilter):
176    def __init__(self, line):
177        Filter.__init__(self, line.rstrip(":"))
178        self.line = line
179        self.content = []
180
181
182class NegativeCondition(OnlyFilter):
183    def __init__(self, line):
184        Filter.__init__(self, line.lstrip("!").rstrip(":"))
185        self.line = line
186        self.content = []
187
188
189class Parser(object):
190    """
191    Parse an input file or string that follows the Cartesian Config File format
192    and generate a list of dicts that will be later used as configuration
193    parameters by autotest tests that use that format.
194
195    @see: http://autotest.kernel.org/wiki/CartesianConfig
196    """
197
198    def __init__(self, filename=None, debug=False):
199        """
200        Initialize the parser and optionally parse a file.
201
202        @param filename: Path of the file to parse.
203        @param debug: Whether to turn on debugging output.
204        """
205        self.node = Node()
206        self.debug = debug
207        if filename:
208            self.parse_file(filename)
209
210
211    def parse_file(self, filename):
212        """
213        Parse a file.
214
215        @param filename: Path of the configuration file.
216        """
217        self.node = self._parse(FileReader(filename), self.node)
218
219
220    def parse_string(self, s):
221        """
222        Parse a string.
223
224        @param s: String to parse.
225        """
226        self.node = self._parse(StrReader(s), self.node)
227
228
229    def get_dicts(self, node=None, ctx=[], content=[], shortname=[], dep=[]):
230        """
231        Generate dictionaries from the code parsed so far.  This should
232        be called after parsing something.
233
234        @return: A dict generator.
235        """
236        def process_content(content, failed_filters):
237            # 1. Check that the filters in content are OK with the current
238            #    context (ctx).
239            # 2. Move the parts of content that are still relevant into
240            #    new_content and unpack conditional blocks if appropriate.
241            #    For example, if an 'only' statement fully matches ctx, it
242            #    becomes irrelevant and is not appended to new_content.
243            #    If a conditional block fully matches, its contents are
244            #    unpacked into new_content.
245            # 3. Move failed filters into failed_filters, so that next time we
246            #    reach this node or one of its ancestors, we'll check those
247            #    filters first.
248            for t in content:
249                filename, linenum, obj = t
250                if type(obj) is Op:
251                    new_content.append(t)
252                    continue
253                # obj is an OnlyFilter/NoFilter/Condition/NegativeCondition
254                if obj.requires_action(ctx, ctx_set, labels):
255                    # This filter requires action now
256                    if type(obj) is OnlyFilter or type(obj) is NoFilter:
257                        self._debug("    filter did not pass: %r (%s:%s)",
258                                    obj.line, filename, linenum)
259                        failed_filters.append(t)
260                        return False
261                    else:
262                        self._debug("    conditional block matches: %r (%s:%s)",
263                                    obj.line, filename, linenum)
264                        # Check and unpack the content inside this Condition
265                        # object (note: the failed filters should go into
266                        # new_internal_filters because we don't expect them to
267                        # come from outside this node, even if the Condition
268                        # itself was external)
269                        if not process_content(obj.content,
270                                               new_internal_filters):
271                            failed_filters.append(t)
272                            return False
273                        continue
274                elif obj.is_irrelevant(ctx, ctx_set, labels):
275                    # This filter is no longer relevant and can be removed
276                    continue
277                else:
278                    # Keep the filter and check it again later
279                    new_content.append(t)
280            return True
281
282        def might_pass(failed_ctx,
283                       failed_ctx_set,
284                       failed_external_filters,
285                       failed_internal_filters):
286            for t in failed_external_filters:
287                if t not in content:
288                    return True
289                filename, linenum, filter = t
290                if filter.might_pass(failed_ctx, failed_ctx_set, ctx, ctx_set,
291                                     labels):
292                    return True
293            for t in failed_internal_filters:
294                filename, linenum, filter = t
295                if filter.might_pass(failed_ctx, failed_ctx_set, ctx, ctx_set,
296                                     labels):
297                    return True
298            return False
299
300        def add_failed_case():
301            node.failed_cases.appendleft((ctx, ctx_set,
302                                          new_external_filters,
303                                          new_internal_filters))
304            if len(node.failed_cases) > num_failed_cases:
305                node.failed_cases.pop()
306
307        node = node or self.node
308        # Update dep
309        for d in node.dep:
310            dep = dep + [".".join(ctx + [d])]
311        # Update ctx
312        ctx = ctx + node.name
313        ctx_set = set(ctx)
314        labels = node.labels
315        # Get the current name
316        name = ".".join(ctx)
317        if node.name:
318            self._debug("checking out %r", name)
319        # Check previously failed filters
320        for i, failed_case in enumerate(node.failed_cases):
321            if not might_pass(*failed_case):
322                self._debug("    this subtree has failed before")
323                del node.failed_cases[i]
324                node.failed_cases.appendleft(failed_case)
325                return
326        # Check content and unpack it into new_content
327        new_content = []
328        new_external_filters = []
329        new_internal_filters = []
330        if (not process_content(node.content, new_internal_filters) or
331            not process_content(content, new_external_filters)):
332            add_failed_case()
333            return
334        # Update shortname
335        if node.append_to_shortname:
336            shortname = shortname + node.name
337        # Recurse into children
338        count = 0
339        for n in node.children:
340            for d in self.get_dicts(n, ctx, new_content, shortname, dep):
341                count += 1
342                yield d
343        # Reached leaf?
344        if not node.children:
345            self._debug("    reached leaf, returning it")
346            d = {"name": name, "dep": dep, "shortname": ".".join(shortname)}
347            for filename, linenum, op in new_content:
348                op.apply_to_dict(d)
349            yield d
350        # If this node did not produce any dicts, remember the failed filters
351        # of its descendants
352        elif not count:
353            new_external_filters = []
354            new_internal_filters = []
355            for n in node.children:
356                (failed_ctx,
357                 failed_ctx_set,
358                 failed_external_filters,
359                 failed_internal_filters) = n.failed_cases[0]
360                for obj in failed_internal_filters:
361                    if obj not in new_internal_filters:
362                        new_internal_filters.append(obj)
363                for obj in failed_external_filters:
364                    if obj in content:
365                        if obj not in new_external_filters:
366                            new_external_filters.append(obj)
367                    else:
368                        if obj not in new_internal_filters:
369                            new_internal_filters.append(obj)
370            add_failed_case()
371
372
373    def _debug(self, s, *args):
374        if self.debug:
375            s = "DEBUG: %s" % s
376            print s % args
377
378
379    def _warn(self, s, *args):
380        s = "WARNING: %s" % s
381        print s % args
382
383
384    def _parse_variants(self, cr, node, prev_indent=-1):
385        """
386        Read and parse lines from a FileReader object until a line with an
387        indent level lower than or equal to prev_indent is encountered.
388
389        @param cr: A FileReader/StrReader object.
390        @param node: A node to operate on.
391        @param prev_indent: The indent level of the "parent" block.
392        @return: A node object.
393        """
394        node4 = Node()
395
396        while True:
397            line, indent, linenum = cr.get_next_line(prev_indent)
398            if not line:
399                break
400
401            name, dep = map(str.strip, line.lstrip("- ").split(":", 1))
402            for char in name:
403                if not (char.isalnum() or char in "@._-"):
404                    raise ParserError("Illegal characters in variant name",
405                                      line, cr.filename, linenum)
406            for char in dep:
407                if not (char.isalnum() or char.isspace() or char in ".,_-"):
408                    raise ParserError("Illegal characters in dependencies",
409                                      line, cr.filename, linenum)
410
411            node2 = Node()
412            node2.children = [node]
413            node2.labels = node.labels
414
415            node3 = self._parse(cr, node2, prev_indent=indent)
416            node3.name = name.lstrip("@").split(".")
417            node3.dep = dep.replace(",", " ").split()
418            node3.append_to_shortname = not name.startswith("@")
419
420            node4.children += [node3]
421            node4.labels.update(node3.labels)
422            node4.labels.update(node3.name)
423
424        return node4
425
426
427    def _parse(self, cr, node, prev_indent=-1):
428        """
429        Read and parse lines from a StrReader object until a line with an
430        indent level lower than or equal to prev_indent is encountered.
431
432        @param cr: A FileReader/StrReader object.
433        @param node: A Node or a Condition object to operate on.
434        @param prev_indent: The indent level of the "parent" block.
435        @return: A node object.
436        """
437        while True:
438            line, indent, linenum = cr.get_next_line(prev_indent)
439            if not line:
440                break
441
442            words = line.split(None, 1)
443
444            # Parse 'variants'
445            if line == "variants:":
446                # 'variants' is not allowed inside a conditional block
447                if (isinstance(node, Condition) or
448                    isinstance(node, NegativeCondition)):
449                    raise ParserError("'variants' is not allowed inside a "
450                                      "conditional block",
451                                      None, cr.filename, linenum)
452                node = self._parse_variants(cr, node, prev_indent=indent)
453                continue
454
455            # Parse 'include' statements
456            if words[0] == "include":
457                if len(words) < 2:
458                    raise ParserError("Syntax error: missing parameter",
459                                      line, cr.filename, linenum)
460                filename = os.path.expanduser(words[1])
461                if isinstance(cr, FileReader) and not os.path.isabs(filename):
462                    filename = os.path.join(os.path.dirname(cr.filename),
463                                            filename)
464                if not os.path.isfile(filename):
465                    self._warn("%r (%s:%s): file doesn't exist or is not a "
466                               "regular file", line, cr.filename, linenum)
467                    continue
468                node = self._parse(FileReader(filename), node)
469                continue
470
471            # Parse 'only' and 'no' filters
472            if words[0] in ("only", "no"):
473                if len(words) < 2:
474                    raise ParserError("Syntax error: missing parameter",
475                                      line, cr.filename, linenum)
476                try:
477                    if words[0] == "only":
478                        f = OnlyFilter(line)
479                    elif words[0] == "no":
480                        f = NoFilter(line)
481                except ParserError, e:
482                    e.line = line
483                    e.filename = cr.filename
484                    e.linenum = linenum
485                    raise
486                node.content += [(cr.filename, linenum, f)]
487                continue
488
489            # Look for operators
490            op_match = _ops_exp.search(line)
491
492            # Parse conditional blocks
493            if ":" in line:
494                index = line.index(":")
495                if not op_match or index < op_match.start():
496                    index += 1
497                    cr.set_next_line(line[index:], indent, linenum)
498                    line = line[:index]
499                    try:
500                        if line.startswith("!"):
501                            cond = NegativeCondition(line)
502                        else:
503                            cond = Condition(line)
504                    except ParserError, e:
505                        e.line = line
506                        e.filename = cr.filename
507                        e.linenum = linenum
508                        raise
509                    self._parse(cr, cond, prev_indent=indent)
510                    node.content += [(cr.filename, linenum, cond)]
511                    continue
512
513            # Parse regular operators
514            if not op_match:
515                raise ParserError("Syntax error", line, cr.filename, linenum)
516            node.content += [(cr.filename, linenum, Op(line, op_match))]
517
518        return node
519
520
521# Assignment operators
522
523_reserved_keys = set(("name", "shortname", "dep"))
524
525
526def _op_set(d, key, value):
527    if key not in _reserved_keys:
528        d[key] = value
529
530
531def _op_append(d, key, value):
532    if key not in _reserved_keys:
533        d[key] = d.get(key, "") + value
534
535
536def _op_prepend(d, key, value):
537    if key not in _reserved_keys:
538        d[key] = value + d.get(key, "")
539
540
541def _op_regex_set(d, exp, value):
542    exp = re.compile("%s$" % exp)
543    for key in d:
544        if key not in _reserved_keys and exp.match(key):
545            d[key] = value
546
547
548def _op_regex_append(d, exp, value):
549    exp = re.compile("%s$" % exp)
550    for key in d:
551        if key not in _reserved_keys and exp.match(key):
552            d[key] += value
553
554
555def _op_regex_prepend(d, exp, value):
556    exp = re.compile("%s$" % exp)
557    for key in d:
558        if key not in _reserved_keys and exp.match(key):
559            d[key] = value + d[key]
560
561
562def _op_regex_del(d, empty, exp):
563    exp = re.compile("%s$" % exp)
564    for key in d.keys():
565        if key not in _reserved_keys and exp.match(key):
566            del d[key]
567
568
569_ops = {"=": (r"\=", _op_set),
570        "+=": (r"\+\=", _op_append),
571        "<=": (r"\<\=", _op_prepend),
572        "?=": (r"\?\=", _op_regex_set),
573        "?+=": (r"\?\+\=", _op_regex_append),
574        "?<=": (r"\?\<\=", _op_regex_prepend),
575        "del": (r"^del\b", _op_regex_del)}
576
577_ops_exp = re.compile("|".join([op[0] for op in _ops.values()]))
578
579
580class Op(object):
581    def __init__(self, line, m):
582        self.func = _ops[m.group()][1]
583        self.key = line[:m.start()].strip()
584        value = line[m.end():].strip()
585        if value and (value[0] == value[-1] == '"' or
586                      value[0] == value[-1] == "'"):
587            value = value[1:-1]
588        self.value = value
589
590
591    def apply_to_dict(self, d):
592        self.func(d, self.key, self.value)
593
594
595# StrReader and FileReader
596
597class StrReader(object):
598    """
599    Preprocess an input string for easy reading.
600    """
601    def __init__(self, s):
602        """
603        Initialize the reader.
604
605        @param s: The string to parse.
606        """
607        self.filename = "<string>"
608        self._lines = []
609        self._line_index = 0
610        self._stored_line = None
611        for linenum, line in enumerate(s.splitlines()):
612            line = line.rstrip().expandtabs()
613            stripped_line = line.lstrip()
614            indent = len(line) - len(stripped_line)
615            if (not stripped_line
616                or stripped_line.startswith("#")
617                or stripped_line.startswith("//")):
618                continue
619            self._lines.append((stripped_line, indent, linenum + 1))
620
621
622    def get_next_line(self, prev_indent):
623        """
624        Get the next line in the current block.
625
626        @param prev_indent: The indentation level of the previous block.
627        @return: (line, indent, linenum), where indent is the line's
628            indentation level.  If no line is available, (None, -1, -1) is
629            returned.
630        """
631        if self._stored_line:
632            ret = self._stored_line
633            self._stored_line = None
634            return ret
635        if self._line_index >= len(self._lines):
636            return None, -1, -1
637        line, indent, linenum = self._lines[self._line_index]
638        if indent <= prev_indent:
639            return None, -1, -1
640        self._line_index += 1
641        return line, indent, linenum
642
643
644    def set_next_line(self, line, indent, linenum):
645        """
646        Make the next call to get_next_line() return the given line instead of
647        the real next line.
648        """
649        line = line.strip()
650        if line:
651            self._stored_line = line, indent, linenum
652
653
654class FileReader(StrReader):
655    """
656    Preprocess an input file for easy reading.
657    """
658    def __init__(self, filename):
659        """
660        Initialize the reader.
661
662        @parse filename: The name of the input file.
663        """
664        StrReader.__init__(self, open(filename).read())
665        self.filename = filename
666
667
668if __name__ == "__main__":
669    parser = optparse.OptionParser('usage: %prog [options] filename '
670                                   '[extra code] ...\n\nExample:\n\n    '
671                                   '%prog tests.cfg "only my_set" "no qcow2"')
672    parser.add_option("-v", "--verbose", dest="debug", action="store_true",
673                      help="include debug messages in console output")
674    parser.add_option("-f", "--fullname", dest="fullname", action="store_true",
675                      help="show full dict names instead of short names")
676    parser.add_option("-c", "--contents", dest="contents", action="store_true",
677                      help="show dict contents")
678
679    options, args = parser.parse_args()
680    if not args:
681        parser.error("filename required")
682
683    c = Parser(args[0], debug=options.debug)
684    for s in args[1:]:
685        c.parse_string(s)
686
687    for i, d in enumerate(c.get_dicts()):
688        if options.fullname:
689            print "dict %4d:  %s" % (i + 1, d["name"])
690        else:
691            print "dict %4d:  %s" % (i + 1, d["shortname"])
692        if options.contents:
693            keys = d.keys()
694            keys.sort()
695            for key in keys:
696                print "    %s = %s" % (key, d[key])
697