1/* expr.c - evaluate expression
2 *
3 * Copyright 2016 Google Inc.
4 * Copyright 2013 Daniel Verkamp <daniel@drv.nu>
5 *
6 * http://pubs.opengroup.org/onlinepubs/9699919799/utilities/expr.html
7 *
8 * The web standard is incomplete (precedence grouping missing), see:
9 * http://permalink.gmane.org/gmane.comp.standards.posix.austin.general/10141
10 *
11 * eval_expr() uses the recursive "Precedence Climbing" algorithm:
12 *
13 * Clarke, Keith. "The top-down parsing of expressions." University of London.
14 * Queen Mary College. Department of Computer Science and Statistics, 1986.
15 *
16 * http://www.antlr.org/papers/Clarke-expr-parsing-1986.pdf
17 *
18 * Nice explanation and Python implementation:
19 * http://eli.thegreenplace.net/2012/08/02/parsing-expressions-by-precedence-climbing
20
21USE_EXPR(NEWTOY(expr, NULL, TOYFLAG_USR|TOYFLAG_BIN))
22
23config EXPR
24  bool "expr"
25  default n
26  help
27    usage: expr ARG1 OPERATOR ARG2...
28
29    Evaluate expression and print result. For example, "expr 1 + 2".
30
31    The supported operators are (grouped from highest to lowest priority):
32
33      ( )    :    * / %    + -    != <= < >= > =    &    |
34
35    Each constant and operator must be a separate command line argument.
36    All operators are infix, meaning they expect a constant (or expression
37    that resolves to a constant) on each side of the operator. Operators of
38    the same priority (within each group above) are evaluated left to right.
39    Parentheses may be used (as separate arguments) to elevate the priority
40    of expressions.
41
42    Calling expr from a command shell requires a lot of \( or '*' escaping
43    to avoid interpreting shell control characters.
44
45    The & and | operators are logical (not bitwise) and may operate on
46    strings (a blank string is "false"). Comparison operators may also
47    operate on strings (alphabetical sort).
48
49    Constants may be strings or integers. Comparison, logical, and regex
50    operators may operate on strings (a blank string is "false"), other
51    operators require integers.
52*/
53
54// TODO: int overflow checking
55
56#define FOR_expr
57#include "toys.h"
58
59GLOBALS(
60  char **tok; // current token, not on the stack since recursive calls mutate it
61
62  char *refree;
63)
64
65// Scalar value.  If s != NULL, it's a string, otherwise it's an int.
66struct value {
67  char *s;
68  long long i;
69};
70
71// Get the value as a string.
72char *get_str(struct value *v)
73{
74  if (v->s) return v->s;
75  else return xmprintf("%lld", v->i);
76}
77
78// Get the value as an integer and return 1, or return 0 on error.
79int get_int(struct value *v, long long *ret)
80{
81  if (v->s) {
82    char *endp;
83
84    *ret = strtoll(v->s, &endp, 10);
85
86    if (*endp) return 0; // If endp points to NUL, all chars were converted
87  } else *ret = v->i;
88
89  return 1;
90}
91
92// Preserve the invariant that v.s is NULL when the value is an integer.
93void assign_int(struct value *v, long long i)
94{
95  v->i = i;
96  v->s = NULL;
97}
98
99// Check if v is 0 or the empty string.
100static int is_false(struct value *v)
101{
102  return get_int(v, &v->i) && !v->i;
103}
104
105// 'ret' is filled with a string capture or int match position.
106static void re(char *target, char *pattern, struct value *ret)
107{
108  regex_t pat;
109  regmatch_t m[2];
110
111  xregcomp(&pat, pattern, 0);
112  // must match at pos 0
113  if (!regexec(&pat, target, 2, m, 0) && !m[0].rm_so) {
114    // Return first parenthesized subexpression as string, or length of match
115    if (pat.re_nsub>0) {
116      ret->s = xmprintf("%.*s", (int)(m[1].rm_eo-m[1].rm_so),
117          target+m[1].rm_so);
118      if (TT.refree) free(TT.refree);
119      TT.refree = ret->s;
120    } else assign_int(ret, m[0].rm_eo);
121  } else {
122    if (pat.re_nsub>0) ret->s = "";
123    else assign_int(ret, 0);
124  }
125  regfree(&pat);
126}
127
128// 4 different signatures of operators.  S = string, I = int, SI = string or
129// int.
130enum { SI_TO_SI = 1, SI_TO_I, I_TO_I, S_TO_SI };
131
132enum { OR = 1, AND, EQ, NE, GT, GTE, LT, LTE, ADD, SUB, MUL, DIVI, MOD, RE };
133
134// operators grouped by precedence
135static struct op_def {
136  char *tok;
137  char prec, sig, op; // precedence, signature for type coercion, operator ID
138} OPS[] = {
139  // logical ops, precedence 1 and 2, signature SI_TO_SI
140  {"|", 1, SI_TO_SI, OR  },
141  {"&", 2, SI_TO_SI, AND },
142  // comparison ops, precedence 3, signature SI_TO_I
143  {"=", 3, SI_TO_I, EQ }, {"==", 3, SI_TO_I, EQ  }, {"!=", 3, SI_TO_I, NE },
144  {">", 3, SI_TO_I, GT }, {">=", 3, SI_TO_I, GTE },
145  {"<", 3, SI_TO_I, LT }, {"<=", 3, SI_TO_I, LTE },
146  // arithmetic ops, precedence 4 and 5, signature I_TO_I
147  {"+", 4, I_TO_I, ADD }, {"-",  4, I_TO_I, SUB },
148  {"*", 5, I_TO_I, MUL }, {"/",  5, I_TO_I, DIVI }, {"%", 5, I_TO_I, MOD },
149  // regex match, precedence 6, signature S_TO_SI
150  {":", 6, S_TO_SI, RE },
151  {NULL, 0, 0, 0}, // sentinel
152};
153
154void eval_op(struct op_def *o, struct value *ret, struct value *rhs)
155{
156  long long a, b, x = 0; // x = a OP b for ints.
157  char *s, *t; // string operands
158  int cmp;
159
160  switch (o->sig) {
161
162  case SI_TO_SI:
163    switch (o->op) {
164    case OR:  if (is_false(ret)) *ret = *rhs; break;
165    case AND: if (is_false(ret) || is_false(rhs)) assign_int(ret, 0); break;
166    }
167    break;
168
169  case SI_TO_I:
170    if (get_int(ret, &a) && get_int(rhs, &b)) { // both are ints
171      cmp = a - b;
172    } else { // otherwise compare both as strings
173      cmp = strcmp(s = get_str(ret), t = get_str(rhs));
174      if (ret->s != s) free(s);
175      if (rhs->s != t) free(t);
176    }
177    switch (o->op) {
178    case EQ:  x = cmp == 0; break;
179    case NE:  x = cmp != 0; break;
180    case GT:  x = cmp >  0; break;
181    case GTE: x = cmp >= 0; break;
182    case LT:  x = cmp <  0; break;
183    case LTE: x = cmp <= 0; break;
184    }
185    assign_int(ret, x);
186    break;
187
188  case I_TO_I:
189    if (!get_int(ret, &a) || !get_int(rhs, &b))
190      error_exit("non-integer argument");
191    switch (o->op) {
192    case ADD: x = a + b; break;
193    case SUB: x = a - b; break;
194    case MUL: x = a * b; break;
195    case DIVI: if (b == 0) error_exit("division by zero"); x = a / b; break;
196    case MOD:  if (b == 0) error_exit("division by zero"); x = a % b; break;
197    }
198    assign_int(ret, x);
199    break;
200
201  case S_TO_SI: // op == RE
202    s = get_str(ret);
203    cmp = ret->s!=s; // ret overwritten by re so check now
204    re(s, t = get_str(rhs), ret);
205    if (cmp) free(s);
206    if (rhs->s!=t) free(t);
207    break;
208  }
209}
210
211// Evalute a compound expression using recursive "Precedence Climbing"
212// algorithm, setting 'ret'.
213static void eval_expr(struct value *ret, int min_prec)
214{
215  if (!*TT.tok) error_exit("Unexpected end of input");
216
217  // Evaluate LHS atom, setting 'ret'.
218  if (!strcmp(*TT.tok, "(")) { // parenthesized expression
219    TT.tok++;  // consume (
220
221    eval_expr(ret, 1);        // We're inside ( ), so min_prec = 1
222    if (ret->s && !strcmp(ret->s, ")")) error_exit("empty ( )");
223    if (!*TT.tok) error_exit("Expected )");
224    if (strcmp(*TT.tok, ")")) error_exit("Expected ) but got %s", *TT.tok);
225  } else ret->s = *TT.tok;  // simple literal, all values start as strings
226  TT.tok++;
227
228  // Evaluate RHS and apply operator until precedence is too low.
229  struct value rhs;
230  while (*TT.tok) {
231    struct op_def *o = OPS;
232
233    while (o->tok) { // Look up operator
234      if (!strcmp(*TT.tok, o->tok)) break;
235      o++;
236    }
237    if (!o->tok) break; // Not an operator (extra input will fail later)
238    if (o->prec < min_prec) break; // Precedence too low, pop a stack frame
239    TT.tok++;
240
241    eval_expr(&rhs, o->prec + 1); // Evaluate RHS, with higher min precedence
242    eval_op(o, ret, &rhs); // Apply operator, setting 'ret'
243  }
244}
245
246void expr_main(void)
247{
248  struct value ret = {0};
249
250  toys.exitval = 2; // if exiting early, indicate error
251  TT.tok = toys.optargs; // initialize global token
252  eval_expr(&ret, 1);
253  if (*TT.tok) error_exit("Unexpected extra input '%s'\n", *TT.tok);
254
255  if (ret.s) printf("%s\n", ret.s);
256  else printf("%lld\n", ret.i);
257
258  toys.exitval = is_false(&ret);
259
260  if (TT.refree) free(TT.refree);
261}
262