Twine.h revision 050578fb4a006d7a183662f83fc22f7c78475605
1//===-- Twine.h - Fast Temporary String Concatenation -----------*- C++ -*-===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9
10#ifndef LLVM_ADT_TWINE_H
11#define LLVM_ADT_TWINE_H
12
13#include "llvm/ADT/StringRef.h"
14#include <cassert>
15#include <string>
16
17namespace llvm {
18  template <typename T>
19  class SmallVectorImpl;
20  class StringRef;
21  class raw_ostream;
22
23  /// Twine - A lightweight data structure for efficiently representing the
24  /// concatenation of temporary values as strings.
25  ///
26  /// A Twine is a kind of rope, it represents a concatenated string using a
27  /// binary-tree, where the string is the preorder of the nodes. Since the
28  /// Twine can be efficiently rendered into a buffer when its result is used,
29  /// it avoids the cost of generating temporary values for intermediate string
30  /// results -- particularly in cases when the Twine result is never
31  /// required. By explicitly tracking the type of leaf nodes, we can also avoid
32  /// the creation of temporary strings for conversions operations (such as
33  /// appending an integer to a string).
34  ///
35  /// A Twine is not intended for use directly and should not be stored, its
36  /// implementation relies on the ability to store pointers to temporary stack
37  /// objects which may be deallocated at the end of a statement. Twines should
38  /// only be used accepted as const references in arguments, when an API wishes
39  /// to accept possibly-concatenated strings.
40  ///
41  /// Twines support a special 'null' value, which always concatenates to form
42  /// itself, and renders as an empty string. This can be returned from APIs to
43  /// effectively nullify any concatenations performed on the result.
44  ///
45  /// \b Implementation \n
46  ///
47  /// Given the nature of a Twine, it is not possible for the Twine's
48  /// concatenation method to construct interior nodes; the result must be
49  /// represented inside the returned value. For this reason a Twine object
50  /// actually holds two values, the left- and right-hand sides of a
51  /// concatenation. We also have nullary Twine objects, which are effectively
52  /// sentinel values that represent empty strings.
53  ///
54  /// Thus, a Twine can effectively have zero, one, or two children. The \see
55  /// isNullary(), \see isUnary(), and \see isBinary() predicates exist for
56  /// testing the number of children.
57  ///
58  /// We maintain a number of invariants on Twine objects (FIXME: Why):
59  ///  - Nullary twines are always represented with their Kind on the left-hand
60  ///    side, and the Empty kind on the right-hand side.
61  ///  - Unary twines are always represented with the value on the left-hand
62  ///    side, and the Empty kind on the right-hand side.
63  ///  - If a Twine has another Twine as a child, that child should always be
64  ///    binary (otherwise it could have been folded into the parent).
65  ///
66  /// These invariants are check by \see isValid().
67  ///
68  /// \b Efficiency Considerations \n
69  ///
70  /// The Twine is designed to yield efficient and small code for common
71  /// situations. For this reason, the concat() method is inlined so that
72  /// concatenations of leaf nodes can be optimized into stores directly into a
73  /// single stack allocated object.
74  ///
75  /// In practice, not all compilers can be trusted to optimize concat() fully,
76  /// so we provide two additional methods (and accompanying operator+
77  /// overloads) to guarantee that particularly important cases (cstring plus
78  /// StringRef) codegen as desired.
79  class Twine {
80    /// NodeKind - Represent the type of an argument.
81    enum NodeKind {
82      /// An empty string; the result of concatenating anything with it is also
83      /// empty.
84      NullKind,
85
86      /// The empty string.
87      EmptyKind,
88
89      /// A pointer to a C string instance.
90      CStringKind,
91
92      /// A pointer to an std::string instance.
93      StdStringKind,
94
95      /// A pointer to a StringRef instance.
96      StringRefKind,
97
98      /// A pointer to a Twine instance.
99      TwineKind
100    };
101
102  private:
103    /// LHS - The prefix in the concatenation, which may be uninitialized for
104    /// Null or Empty kinds.
105    const void *LHS;
106    /// RHS - The suffix in the concatenation, which may be uninitialized for
107    /// Null or Empty kinds.
108    const void *RHS;
109    /// LHSKind - The NodeKind of the left hand side, \see getLHSKind().
110    NodeKind LHSKind : 8;
111    /// RHSKind - The NodeKind of the left hand side, \see getLHSKind().
112    NodeKind RHSKind : 8;
113
114  private:
115    /// Construct a nullary twine; the kind must be NullKind or EmptyKind.
116    explicit Twine(NodeKind Kind)
117      : LHSKind(Kind), RHSKind(EmptyKind) {
118      assert(isNullary() && "Invalid kind!");
119    }
120
121    /// Construct a binary twine.
122    explicit Twine(const Twine &_LHS, const Twine &_RHS)
123      : LHS(&_LHS), RHS(&_RHS), LHSKind(TwineKind), RHSKind(TwineKind) {
124      assert(isValid() && "Invalid twine!");
125    }
126
127    /// Construct a twine from explicit values.
128    explicit Twine(const void *_LHS, NodeKind _LHSKind,
129                   const void *_RHS, NodeKind _RHSKind)
130      : LHS(_LHS), RHS(_RHS), LHSKind(_LHSKind), RHSKind(_RHSKind) {
131      assert(isValid() && "Invalid twine!");
132    }
133
134    /// isNull - Check for the null twine.
135    bool isNull() const {
136      return getLHSKind() == NullKind;
137    }
138
139    /// isEmpty - Check for the empty twine.
140    bool isEmpty() const {
141      return getLHSKind() == EmptyKind;
142    }
143
144    /// isNullary - Check if this is a nullary twine (null or empty).
145    bool isNullary() const {
146      return isNull() || isEmpty();
147    }
148
149    /// isUnary - Check if this is a unary twine.
150    bool isUnary() const {
151      return getRHSKind() == EmptyKind && !isNullary();
152    }
153
154    /// isBinary - Check if this is a binary twine.
155    bool isBinary() const {
156      return getLHSKind() != NullKind && getRHSKind() != EmptyKind;
157    }
158
159    /// isValid - Check if this is a valid twine (satisfying the invariants on
160    /// order and number of arguments).
161    bool isValid() const {
162      // Nullary twines always have Empty on the RHS.
163      if (isNullary() && getRHSKind() != EmptyKind)
164        return false;
165
166      // Null should never appear on the RHS.
167      if (getRHSKind() == NullKind)
168        return false;
169
170      // The RHS cannot be non-empty if the LHS is empty.
171      if (getRHSKind() != EmptyKind && getLHSKind() == EmptyKind)
172        return false;
173
174      // A twine child should always be binary.
175      if (getLHSKind() == TwineKind &&
176          !static_cast<const Twine*>(LHS)->isBinary())
177        return false;
178      if (getRHSKind() == TwineKind &&
179          !static_cast<const Twine*>(RHS)->isBinary())
180        return false;
181
182      return true;
183    }
184
185    /// getLHSKind - Get the NodeKind of the left-hand side.
186    NodeKind getLHSKind() const { return LHSKind; }
187
188    /// getRHSKind - Get the NodeKind of the left-hand side.
189    NodeKind getRHSKind() const { return RHSKind; }
190
191    /// printOneChild - Print one child from a twine.
192    void printOneChild(raw_ostream &OS, const void *Ptr, NodeKind Kind) const;
193
194    /// printOneChildRepr - Print the representation of one child from a twine.
195    void printOneChildRepr(raw_ostream &OS, const void *Ptr,
196                           NodeKind Kind) const;
197
198  public:
199    /// @name Constructors
200    /// @{
201
202    /// Construct from an empty string.
203    /*implicit*/ Twine() : LHSKind(EmptyKind), RHSKind(EmptyKind) {
204      assert(isValid() && "Invalid twine!");
205    }
206
207    /// Construct from a C string.
208    ///
209    /// We take care here to optimize "" into the empty twine -- this will be
210    /// optimized out for string constants. This allows Twine arguments have
211    /// default "" values, without introducing unnecessary string constants.
212    /*implicit*/ Twine(const char *Str)
213      : RHSKind(EmptyKind) {
214      if (Str[0] != '\0') {
215        LHS = Str;
216        LHSKind = CStringKind;
217      } else
218        LHSKind = EmptyKind;
219
220      assert(isValid() && "Invalid twine!");
221    }
222
223    /// Construct from an std::string.
224    /*implicit*/ Twine(const std::string &Str)
225      : LHS(&Str), LHSKind(StdStringKind), RHSKind(EmptyKind) {
226      assert(isValid() && "Invalid twine!");
227    }
228
229    /// Construct from a StringRef.
230    /*implicit*/ Twine(const StringRef &Str)
231      : LHS(&Str), LHSKind(StringRefKind), RHSKind(EmptyKind) {
232      assert(isValid() && "Invalid twine!");
233    }
234
235    /// Create a 'null' string, which is an empty string that always
236    /// concatenates to form another empty string.
237    static Twine createNull() {
238      return Twine(NullKind);
239    }
240
241    // FIXME: Unfortunately, to make sure this is as efficient as possible we
242    // need extra binary constructors from particular types. We can't rely on
243    // the compiler to be smart enough to fold operator+()/concat() down to the
244    // right thing. Yet.
245
246    /// Construct as the concatenation of a C string and a StringRef.
247    /*implicit*/ Twine(const char *_LHS, const StringRef &_RHS)
248      : LHS(_LHS), RHS(&_RHS), LHSKind(CStringKind), RHSKind(StringRefKind) {
249      assert(isValid() && "Invalid twine!");
250    }
251
252    /// Construct as the concatenation of a StringRef and a C string.
253    /*implicit*/ Twine(const StringRef &_LHS, const char *_RHS)
254      : LHS(&_LHS), RHS(_RHS), LHSKind(StringRefKind), RHSKind(CStringKind) {
255      assert(isValid() && "Invalid twine!");
256    }
257
258    /// @}
259    /// @name String Operations
260    /// @{
261
262    Twine concat(const Twine &Suffix) const;
263
264    /// @}
265    /// @name Output & Conversion.
266    /// @{
267
268    /// str - Return the twine contents as a std::string.
269    std::string str() const;
270
271    /// toVector - Write the concatenated string into the given SmallString or
272    /// SmallVector.
273    void toVector(SmallVectorImpl<char> &Out) const;
274
275    /// print - Write the concatenated string represented by this twine to the
276    /// stream \arg OS.
277    void print(raw_ostream &OS) const;
278
279    /// dump - Dump the concatenated string represented by this twine to stderr.
280    void dump() const;
281
282    /// print - Write the representation of this twine to the stream \arg OS.
283    void printRepr(raw_ostream &OS) const;
284
285    /// dumpRepr - Dump the representation of this twine to stderr.
286    void dumpRepr() const;
287
288    /// @}
289  };
290
291  /// @name Twine Inline Implementations
292  /// @{
293
294  inline Twine Twine::concat(const Twine &Suffix) const {
295    // Concatenation with null is null.
296    if (isNull() || Suffix.isNull())
297      return Twine(NullKind);
298
299    // Concatenation with empty yields the other side.
300    if (isEmpty())
301      return Suffix;
302    if (Suffix.isEmpty())
303      return *this;
304
305    // Otherwise we need to create a new node, taking care to fold in unary
306    // twines.
307    const void *NewLHS = this, *NewRHS = &Suffix;
308    NodeKind NewLHSKind = TwineKind, NewRHSKind = TwineKind;
309    if (isUnary()) {
310      NewLHS = LHS;
311      NewLHSKind = getLHSKind();
312    }
313    if (Suffix.isUnary()) {
314      NewRHS = Suffix.LHS;
315      NewRHSKind = Suffix.getLHSKind();
316    }
317
318    return Twine(NewLHS, NewLHSKind, NewRHS, NewRHSKind);
319  }
320
321  inline Twine operator+(const Twine &LHS, const Twine &RHS) {
322    return LHS.concat(RHS);
323  }
324
325  /// Additional overload to guarantee simplified codegen; this is equivalent to
326  /// concat().
327
328  inline Twine operator+(const char *LHS, const StringRef &RHS) {
329    return Twine(LHS, RHS);
330  }
331
332  /// Additional overload to guarantee simplified codegen; this is equivalent to
333  /// concat().
334
335  inline Twine operator+(const StringRef &LHS, const char *RHS) {
336    return Twine(LHS, RHS);
337  }
338
339  inline raw_ostream &operator<<(raw_ostream &OS, const Twine &RHS) {
340    RHS.print(OS);
341    return OS;
342  }
343
344  /// @}
345}
346
347#endif
348