1# -*- coding: utf-8 -*-
2#                     The LLVM Compiler Infrastructure
3#
4# This file is distributed under the University of Illinois Open Source
5# License. See LICENSE.TXT for details.
6""" This module is responsible for to parse a compiler invocation. """
7
8import re
9import os
10import collections
11
12__all__ = ['split_command', 'classify_source', 'compiler_language']
13
14# Ignored compiler options map for compilation database creation.
15# The map is used in `split_command` method. (Which does ignore and classify
16# parameters.) Please note, that these are not the only parameters which
17# might be ignored.
18#
19# Keys are the option name, value number of options to skip
20IGNORED_FLAGS = {
21    # compiling only flag, ignored because the creator of compilation
22    # database will explicitly set it.
23    '-c': 0,
24    # preprocessor macros, ignored because would cause duplicate entries in
25    # the output (the only difference would be these flags). this is actual
26    # finding from users, who suffered longer execution time caused by the
27    # duplicates.
28    '-MD': 0,
29    '-MMD': 0,
30    '-MG': 0,
31    '-MP': 0,
32    '-MF': 1,
33    '-MT': 1,
34    '-MQ': 1,
35    # linker options, ignored because for compilation database will contain
36    # compilation commands only. so, the compiler would ignore these flags
37    # anyway. the benefit to get rid of them is to make the output more
38    # readable.
39    '-static': 0,
40    '-shared': 0,
41    '-s': 0,
42    '-rdynamic': 0,
43    '-l': 1,
44    '-L': 1,
45    '-u': 1,
46    '-z': 1,
47    '-T': 1,
48    '-Xlinker': 1
49}
50
51# Known C/C++ compiler executable name patterns
52COMPILER_PATTERNS = frozenset([
53    re.compile(r'^(intercept-|analyze-|)c(c|\+\+)$'),
54    re.compile(r'^([^-]*-)*[mg](cc|\+\+)(-\d+(\.\d+){0,2})?$'),
55    re.compile(r'^([^-]*-)*clang(\+\+)?(-\d+(\.\d+){0,2})?$'),
56    re.compile(r'^llvm-g(cc|\+\+)$'),
57])
58
59
60def split_command(command):
61    """ Returns a value when the command is a compilation, None otherwise.
62
63    The value on success is a named tuple with the following attributes:
64
65        files:    list of source files
66        flags:    list of compile options
67        compiler: string value of 'c' or 'c++' """
68
69    # the result of this method
70    result = collections.namedtuple('Compilation',
71                                    ['compiler', 'flags', 'files'])
72    result.compiler = compiler_language(command)
73    result.flags = []
74    result.files = []
75    # quit right now, if the program was not a C/C++ compiler
76    if not result.compiler:
77        return None
78    # iterate on the compile options
79    args = iter(command[1:])
80    for arg in args:
81        # quit when compilation pass is not involved
82        if arg in {'-E', '-S', '-cc1', '-M', '-MM', '-###'}:
83            return None
84        # ignore some flags
85        elif arg in IGNORED_FLAGS:
86            count = IGNORED_FLAGS[arg]
87            for _ in range(count):
88                next(args)
89        elif re.match(r'^-(l|L|Wl,).+', arg):
90            pass
91        # some parameters could look like filename, take as compile option
92        elif arg in {'-D', '-I'}:
93            result.flags.extend([arg, next(args)])
94        # parameter which looks source file is taken...
95        elif re.match(r'^[^-].+', arg) and classify_source(arg):
96            result.files.append(arg)
97        # and consider everything else as compile option.
98        else:
99            result.flags.append(arg)
100    # do extra check on number of source files
101    return result if result.files else None
102
103
104def classify_source(filename, c_compiler=True):
105    """ Return the language from file name extension. """
106
107    mapping = {
108        '.c': 'c' if c_compiler else 'c++',
109        '.i': 'c-cpp-output' if c_compiler else 'c++-cpp-output',
110        '.ii': 'c++-cpp-output',
111        '.m': 'objective-c',
112        '.mi': 'objective-c-cpp-output',
113        '.mm': 'objective-c++',
114        '.mii': 'objective-c++-cpp-output',
115        '.C': 'c++',
116        '.cc': 'c++',
117        '.CC': 'c++',
118        '.cp': 'c++',
119        '.cpp': 'c++',
120        '.cxx': 'c++',
121        '.c++': 'c++',
122        '.C++': 'c++',
123        '.txx': 'c++'
124    }
125
126    __, extension = os.path.splitext(os.path.basename(filename))
127    return mapping.get(extension)
128
129
130def compiler_language(command):
131    """ A predicate to decide the command is a compiler call or not.
132
133    Returns 'c' or 'c++' when it match. None otherwise. """
134
135    cplusplus = re.compile(r'^(.+)(\+\+)(-.+|)$')
136
137    if command:
138        executable = os.path.basename(command[0])
139        if any(pattern.match(executable) for pattern in COMPILER_PATTERNS):
140            return 'c++' if cplusplus.match(executable) else 'c'
141    return None
142