1/*
2 * Copyright (C) 2015 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#ifndef ART_CMDLINE_DETAIL_CMDLINE_PARSE_ARGUMENT_DETAIL_H_
18#define ART_CMDLINE_DETAIL_CMDLINE_PARSE_ARGUMENT_DETAIL_H_
19
20#include <type_traits>
21#include <assert.h>
22#include <functional>
23#include <vector>
24#include <algorithm>
25#include <numeric>
26#include <memory>
27
28#include "cmdline/cmdline_parse_result.h"
29#include "cmdline/token_range.h"
30#include "cmdline/unit.h"
31#include "cmdline/cmdline_types.h"
32
33namespace art {
34  // Implementation details for the parser. Do not look inside if you hate templates.
35  namespace detail {
36    // A non-templated base class for argument parsers. Used by the general parser
37    // to parse arguments, without needing to know the argument type at compile time.
38    //
39    // This is an application of the type erasure idiom.
40    struct CmdlineParseArgumentAny {
41      virtual ~CmdlineParseArgumentAny() {}
42
43      // Attempt to parse this argument starting at arguments[position].
44      // If the parsing succeeds, the parsed value will be saved as a side-effect.
45      //
46      // In most situations, the parsing will not match by returning kUnknown. In this case,
47      // no tokens were consumed and the position variable will not be updated.
48      //
49      // At other times, parsing may fail due to validation but the initial token was still matched
50      // (for example an out of range value, or passing in a string where an int was expected).
51      // In this case the tokens are still consumed, and the position variable will get incremented
52      // by all the consumed tokens.
53      //
54      // The # of tokens consumed by the parse attempt will be set as an out-parameter into
55      // consumed_tokens. The parser should skip this many tokens before parsing the next
56      // argument.
57      virtual CmdlineResult ParseArgument(const TokenRange& arguments, size_t* consumed_tokens) = 0;
58      // How many tokens should be taken off argv for parsing this argument.
59      // For example "--help" is just 1, "-compiler-option _" would be 2 (since there's a space).
60      //
61      // A [min,max] range is returned to represent argument definitions with multiple
62      // value tokens. (e.g. {"-h", "-h " } would return [1,2]).
63      virtual std::pair<size_t, size_t> GetNumTokens() const = 0;
64      // Get the run-time typename of the argument type.
65      virtual const char* GetTypeName() const = 0;
66      // Try to do a close match, returning how many tokens were matched against this argument
67      // definition. More tokens is better.
68      //
69      // Do a quick match token-by-token, and see if they match.
70      // Any tokens with a wildcard in them are only matched up until the wildcard.
71      // If this is true, then the wildcard matching later on can still fail, so this is not
72      // a guarantee that the argument is correct, it's more of a strong hint that the
73      // user-provided input *probably* was trying to match this argument.
74      //
75      // Returns how many tokens were either matched (or ignored because there was a
76      // wildcard present). 0 means no match. If the Size() tokens are returned.
77      virtual size_t MaybeMatches(const TokenRange& tokens) = 0;
78    };
79
80    template <typename T>
81    using EnableIfNumeric = std::enable_if<std::is_arithmetic<T>::value>;
82
83    template <typename T>
84    using DisableIfNumeric = std::enable_if<!std::is_arithmetic<T>::value>;
85
86    // Argument definition information, created by an ArgumentBuilder and an UntypedArgumentBuilder.
87    template <typename TArg>
88    struct CmdlineParserArgumentInfo {
89      // This version will only be used if TArg is arithmetic and thus has the <= operators.
90      template <typename T = TArg>  // Necessary to get SFINAE to kick in.
91      bool CheckRange(const TArg& value, typename EnableIfNumeric<T>::type* = 0) {
92        if (has_range_) {
93          return min_ <= value && value <= max_;
94        }
95        return true;
96      }
97
98      // This version will be used at other times when TArg is not arithmetic.
99      template <typename T = TArg>
100      bool CheckRange(const TArg&, typename DisableIfNumeric<T>::type* = 0) {
101        assert(!has_range_);
102        return true;
103      }
104
105      // Do a quick match token-by-token, and see if they match.
106      // Any tokens with a wildcard in them only match the prefix up until the wildcard.
107      //
108      // If this is true, then the wildcard matching later on can still fail, so this is not
109      // a guarantee that the argument is correct, it's more of a strong hint that the
110      // user-provided input *probably* was trying to match this argument.
111      size_t MaybeMatches(TokenRange token_list) const {
112        auto best_match = FindClosestMatch(token_list);
113
114        return best_match.second;
115      }
116
117      // Attempt to find the closest match (see MaybeMatches).
118      //
119      // Returns the token range that was the closest match and the # of tokens that
120      // this range was matched up until.
121      std::pair<const TokenRange*, size_t> FindClosestMatch(TokenRange token_list) const {
122        const TokenRange* best_match_ptr = nullptr;
123
124        size_t best_match = 0;
125        for (auto&& token_range : tokenized_names_) {
126          size_t this_match = token_range.MaybeMatches(token_list, std::string("_"));
127
128          if (this_match > best_match) {
129            best_match_ptr = &token_range;
130            best_match = this_match;
131          }
132        }
133
134        return std::make_pair(best_match_ptr, best_match);
135      }
136
137      // Mark the argument definition as completed, do not mutate the object anymore after this
138      // call is done.
139      //
140      // Performs several sanity checks and token calculations.
141      void CompleteArgument() {
142        assert(names_.size() >= 1);
143        assert(!is_completed_);
144
145        is_completed_ = true;
146
147        size_t blank_count = 0;
148        size_t token_count = 0;
149
150        size_t global_blank_count = 0;
151        size_t global_token_count = 0;
152        for (auto&& name : names_) {
153          std::string s(name);
154
155          size_t local_blank_count = std::count(s.begin(), s.end(), '_');
156          size_t local_token_count = std::count(s.begin(), s.end(), ' ');
157
158          if (global_blank_count != 0) {
159            assert(local_blank_count == global_blank_count
160                   && "Every argument descriptor string must have same amount of blanks (_)");
161          }
162
163          if (local_blank_count != 0) {
164            global_blank_count = local_blank_count;
165            blank_count++;
166
167            assert(local_blank_count == 1 && "More than one blank is not supported");
168            assert(s.back() == '_' && "The blank character must only be at the end of the string");
169          }
170
171          if (global_token_count != 0) {
172            assert(local_token_count == global_token_count
173                   && "Every argument descriptor string must have same amount of tokens (spaces)");
174          }
175
176          if (local_token_count != 0) {
177            global_token_count = local_token_count;
178            token_count++;
179          }
180
181          // Tokenize every name, turning it from a string to a token list.
182          tokenized_names_.clear();
183          for (auto&& name1 : names_) {
184            // Split along ' ' only, removing any duplicated spaces.
185            tokenized_names_.push_back(
186                TokenRange::Split(name1, {' '}).RemoveToken(" "));
187          }
188
189          // remove the _ character from each of the token ranges
190          // we will often end up with an empty token (i.e. ["-XX", "_"] -> ["-XX", ""]
191          // and this is OK because we still need an empty token to simplify
192          // range comparisons
193          simple_names_.clear();
194
195          for (auto&& tokenized_name : tokenized_names_) {
196            simple_names_.push_back(tokenized_name.RemoveCharacter('_'));
197          }
198        }
199
200        if (token_count != 0) {
201          assert(("Every argument descriptor string must have equal amount of tokens (spaces)" &&
202              token_count == names_.size()));
203        }
204
205        if (blank_count != 0) {
206          assert(("Every argument descriptor string must have an equal amount of blanks (_)" &&
207              blank_count == names_.size()));
208        }
209
210        using_blanks_ = blank_count > 0;
211        {
212          size_t smallest_name_token_range_size =
213              std::accumulate(tokenized_names_.begin(), tokenized_names_.end(), ~(0u),
214                              [](size_t min, const TokenRange& cur) {
215                                return std::min(min, cur.Size());
216                              });
217          size_t largest_name_token_range_size =
218              std::accumulate(tokenized_names_.begin(), tokenized_names_.end(), 0u,
219                              [](size_t max, const TokenRange& cur) {
220                                return std::max(max, cur.Size());
221                              });
222
223          token_range_size_ = std::make_pair(smallest_name_token_range_size,
224                                             largest_name_token_range_size);
225        }
226
227        if (has_value_list_) {
228          assert(names_.size() == value_list_.size()
229                 && "Number of arg descriptors must match number of values");
230          assert(!has_value_map_);
231        }
232        if (has_value_map_) {
233          if (!using_blanks_) {
234            assert(names_.size() == value_map_.size() &&
235                   "Since no blanks were specified, each arg is mapped directly into a mapped "
236                   "value without parsing; sizes must match");
237          }
238
239          assert(!has_value_list_);
240        }
241
242        if (!using_blanks_ && !CmdlineType<TArg>::kCanParseBlankless) {
243          assert((has_value_map_ || has_value_list_) &&
244                 "Arguments without a blank (_) must provide either a value map or a value list");
245        }
246
247        TypedCheck();
248      }
249
250      // List of aliases for a single argument definition, e.g. {"-Xdex2oat", "-Xnodex2oat"}.
251      std::vector<const char*> names_;
252      // Is there at least 1 wildcard '_' in the argument definition?
253      bool using_blanks_ = false;
254      // [min, max] token counts in each arg def
255      std::pair<size_t, size_t> token_range_size_;
256
257      // contains all the names in a tokenized form, i.e. as a space-delimited list
258      std::vector<TokenRange> tokenized_names_;
259
260      // contains the tokenized names, but with the _ character stripped
261      std::vector<TokenRange> simple_names_;
262
263      // For argument definitions created with '.AppendValues()'
264      // Meaning that parsing should mutate the existing value in-place if possible.
265      bool appending_values_ = false;
266
267      // For argument definitions created with '.WithRange(min, max)'
268      bool has_range_ = false;
269      TArg min_;
270      TArg max_;
271
272      // For argument definitions created with '.WithValueMap'
273      bool has_value_map_ = false;
274      std::vector<std::pair<const char*, TArg>> value_map_;
275
276      // For argument definitions created with '.WithValues'
277      bool has_value_list_ = false;
278      std::vector<TArg> value_list_;
279
280      // Make sure there's a default constructor.
281      CmdlineParserArgumentInfo() = default;
282
283      // Ensure there's a default move constructor.
284      CmdlineParserArgumentInfo(CmdlineParserArgumentInfo&&) = default;
285
286     private:
287      // Perform type-specific checks at runtime.
288      template <typename T = TArg>
289      void TypedCheck(typename std::enable_if<std::is_same<Unit, T>::value>::type* = 0) {
290        assert(!using_blanks_ &&
291               "Blanks are not supported in Unit arguments; since a Unit has no parse-able value");
292      }
293
294      void TypedCheck() {}
295
296      bool is_completed_ = false;
297    };
298
299    // A virtual-implementation of the necessary argument information in order to
300    // be able to parse arguments.
301    template <typename TArg>
302    struct CmdlineParseArgument : CmdlineParseArgumentAny {
303      explicit CmdlineParseArgument(CmdlineParserArgumentInfo<TArg>&& argument_info,
304                                    std::function<void(TArg&)>&& save_argument,
305                                    std::function<TArg&(void)>&& load_argument)
306          : argument_info_(std::forward<decltype(argument_info)>(argument_info)),
307            save_argument_(std::forward<decltype(save_argument)>(save_argument)),
308            load_argument_(std::forward<decltype(load_argument)>(load_argument)) {
309      }
310
311      using UserTypeInfo = CmdlineType<TArg>;
312
313      virtual CmdlineResult ParseArgument(const TokenRange& arguments, size_t* consumed_tokens) {
314        assert(arguments.Size() > 0);
315        assert(consumed_tokens != nullptr);
316
317        auto closest_match_res = argument_info_.FindClosestMatch(arguments);
318        size_t best_match_size = closest_match_res.second;
319        const TokenRange* best_match_arg_def = closest_match_res.first;
320
321        if (best_match_size > arguments.Size()) {
322          // The best match has more tokens than were provided.
323          // Shouldn't happen in practice since the outer parser does this check.
324          return CmdlineResult(CmdlineResult::kUnknown, "Size mismatch");
325        }
326
327        assert(best_match_arg_def != nullptr);
328        *consumed_tokens = best_match_arg_def->Size();
329
330        if (!argument_info_.using_blanks_) {
331          return ParseArgumentSingle(arguments.Join(' '));
332        }
333
334        // Extract out the blank value from arguments
335        // e.g. for a def of "foo:_" and input "foo:bar", blank_value == "bar"
336        std::string blank_value = "";
337        size_t idx = 0;
338        for (auto&& def_token : *best_match_arg_def) {
339          auto&& arg_token = arguments[idx];
340
341          // Does this definition-token have a wildcard in it?
342          if (def_token.find('_') == std::string::npos) {
343            // No, regular token. Match 1:1 against the argument token.
344            bool token_match = def_token == arg_token;
345
346            if (!token_match) {
347              return CmdlineResult(CmdlineResult::kFailure,
348                                   std::string("Failed to parse ") + best_match_arg_def->GetToken(0)
349                                   + " at token " + std::to_string(idx));
350            }
351          } else {
352            // This is a wild-carded token.
353            TokenRange def_split_wildcards = TokenRange::Split(def_token, {'_'});
354
355            // Extract the wildcard contents out of the user-provided arg_token.
356            std::unique_ptr<TokenRange> arg_matches =
357                def_split_wildcards.MatchSubstrings(arg_token, "_");
358            if (arg_matches == nullptr) {
359              return CmdlineResult(CmdlineResult::kFailure,
360                                   std::string("Failed to parse ") + best_match_arg_def->GetToken(0)
361                                   + ", with a wildcard pattern " + def_token
362                                   + " at token " + std::to_string(idx));
363            }
364
365            // Get the corresponding wildcard tokens from arg_matches,
366            // and concatenate it to blank_value.
367            for (size_t sub_idx = 0;
368                sub_idx < def_split_wildcards.Size() && sub_idx < arg_matches->Size(); ++sub_idx) {
369              if (def_split_wildcards[sub_idx] == "_") {
370                blank_value += arg_matches->GetToken(sub_idx);
371              }
372            }
373          }
374
375          ++idx;
376        }
377
378        return ParseArgumentSingle(blank_value);
379      }
380
381     private:
382      virtual CmdlineResult ParseArgumentSingle(const std::string& argument) {
383        // TODO: refactor to use LookupValue for the value lists/maps
384
385        // Handle the 'WithValueMap(...)' argument definition
386        if (argument_info_.has_value_map_) {
387          for (auto&& value_pair : argument_info_.value_map_) {
388            const char* name = value_pair.first;
389
390            if (argument == name) {
391              return SaveArgument(value_pair.second);
392            }
393          }
394
395          // Error case: Fail, telling the user what the allowed values were.
396          std::vector<std::string> allowed_values;
397          for (auto&& value_pair : argument_info_.value_map_) {
398            const char* name = value_pair.first;
399            allowed_values.push_back(name);
400          }
401
402          std::string allowed_values_flat = Join(allowed_values, ',');
403          return CmdlineResult(CmdlineResult::kFailure,
404                               "Argument value '" + argument + "' does not match any of known valid"
405                                "values: {" + allowed_values_flat + "}");
406        }
407
408        // Handle the 'WithValues(...)' argument definition
409        if (argument_info_.has_value_list_) {
410          size_t arg_def_idx = 0;
411          for (auto&& value : argument_info_.value_list_) {
412            auto&& arg_def_token = argument_info_.names_[arg_def_idx];
413
414            if (arg_def_token == argument) {
415              return SaveArgument(value);
416            }
417            ++arg_def_idx;
418          }
419
420          assert(arg_def_idx + 1 == argument_info_.value_list_.size() &&
421                 "Number of named argument definitions must match number of values defined");
422
423          // Error case: Fail, telling the user what the allowed values were.
424          std::vector<std::string> allowed_values;
425          for (auto&& arg_name : argument_info_.names_) {
426            allowed_values.push_back(arg_name);
427          }
428
429          std::string allowed_values_flat = Join(allowed_values, ',');
430          return CmdlineResult(CmdlineResult::kFailure,
431                               "Argument value '" + argument + "' does not match any of known valid"
432                                "values: {" + allowed_values_flat + "}");
433        }
434
435        // Handle the regular case where we parsed an unknown value from a blank.
436        UserTypeInfo type_parser;
437
438        if (argument_info_.appending_values_) {
439          TArg& existing = load_argument_();
440          CmdlineParseResult<TArg> result = type_parser.ParseAndAppend(argument, existing);
441
442          assert(!argument_info_.has_range_);
443
444          return result;
445        }
446
447        CmdlineParseResult<TArg> result = type_parser.Parse(argument);
448
449        if (result.IsSuccess()) {
450          TArg& value = result.GetValue();
451
452          // Do a range check for 'WithRange(min,max)' argument definition.
453          if (!argument_info_.CheckRange(value)) {
454            return CmdlineParseResult<TArg>::OutOfRange(
455                value, argument_info_.min_, argument_info_.max_);
456          }
457
458          return SaveArgument(value);
459        }
460
461        // Some kind of type-specific parse error. Pass the result as-is.
462        CmdlineResult raw_result = std::move(result);
463        return raw_result;
464      }
465
466     public:
467      virtual const char* GetTypeName() const {
468        // TODO: Obviate the need for each type specialization to hardcode the type name
469        return UserTypeInfo::Name();
470      }
471
472      // How many tokens should be taken off argv for parsing this argument.
473      // For example "--help" is just 1, "-compiler-option _" would be 2 (since there's a space).
474      //
475      // A [min,max] range is returned to represent argument definitions with multiple
476      // value tokens. (e.g. {"-h", "-h " } would return [1,2]).
477      virtual std::pair<size_t, size_t> GetNumTokens() const {
478        return argument_info_.token_range_size_;
479      }
480
481      // See if this token range might begin the same as the argument definition.
482      virtual size_t MaybeMatches(const TokenRange& tokens) {
483        return argument_info_.MaybeMatches(tokens);
484      }
485
486     private:
487      CmdlineResult SaveArgument(const TArg& value) {
488        assert(!argument_info_.appending_values_
489               && "If the values are being appended, then the updated parse value is "
490                   "updated by-ref as a side effect and shouldn't be stored directly");
491        TArg val = value;
492        save_argument_(val);
493        return CmdlineResult(CmdlineResult::kSuccess);
494      }
495
496      CmdlineParserArgumentInfo<TArg> argument_info_;
497      std::function<void(TArg&)> save_argument_;
498      std::function<TArg&(void)> load_argument_;
499    };
500  } // namespace detail // NOLINT [readability/namespace] [5] [whitespace/comments] [2]
501}  // namespace art
502
503#endif  // ART_CMDLINE_DETAIL_CMDLINE_PARSE_ARGUMENT_DETAIL_H_
504