1// Copyright 2012 the V8 project authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5
6// Declares a Simulator for ARM instructions if we are not generating a native
7// ARM binary. This Simulator allows us to run and debug ARM code generation on
8// regular desktop machines.
9// V8 calls into generated code by "calling" the CALL_GENERATED_CODE macro,
10// which will start execution in the Simulator or forwards to the real entry
11// on a ARM HW platform.
12
13#ifndef V8_ARM_SIMULATOR_ARM_H_
14#define V8_ARM_SIMULATOR_ARM_H_
15
16#include "src/allocation.h"
17#include "src/base/lazy-instance.h"
18#include "src/base/platform/mutex.h"
19
20#if !defined(USE_SIMULATOR)
21// Running without a simulator on a native arm platform.
22
23namespace v8 {
24namespace internal {
25
26// When running without a simulator we call the entry directly.
27#define CALL_GENERATED_CODE(isolate, entry, p0, p1, p2, p3, p4) \
28  (entry(p0, p1, p2, p3, p4))
29
30typedef int (*arm_regexp_matcher)(String*, int, const byte*, const byte*,
31                                  void*, int*, int, Address, int, Isolate*);
32
33
34// Call the generated regexp code directly. The code at the entry address
35// should act as a function matching the type arm_regexp_matcher.
36// The fifth argument is a dummy that reserves the space used for
37// the return address added by the ExitFrame in native calls.
38#define CALL_GENERATED_REGEXP_CODE(isolate, entry, p0, p1, p2, p3, p4, p5, p6, \
39                                   p7, p8)                                     \
40  (FUNCTION_CAST<arm_regexp_matcher>(entry)(p0, p1, p2, p3, NULL, p4, p5, p6,  \
41                                            p7, p8))
42
43// The stack limit beyond which we will throw stack overflow errors in
44// generated code. Because generated code on arm uses the C stack, we
45// just use the C stack limit.
46class SimulatorStack : public v8::internal::AllStatic {
47 public:
48  static inline uintptr_t JsLimitFromCLimit(v8::internal::Isolate* isolate,
49                                            uintptr_t c_limit) {
50    USE(isolate);
51    return c_limit;
52  }
53
54  static inline uintptr_t RegisterCTryCatch(v8::internal::Isolate* isolate,
55                                            uintptr_t try_catch_address) {
56    USE(isolate);
57    return try_catch_address;
58  }
59
60  static inline void UnregisterCTryCatch(v8::internal::Isolate* isolate) {
61    USE(isolate);
62  }
63};
64
65}  // namespace internal
66}  // namespace v8
67
68#else  // !defined(USE_SIMULATOR)
69// Running with a simulator.
70
71#include "src/arm/constants-arm.h"
72#include "src/assembler.h"
73#include "src/base/hashmap.h"
74
75namespace v8 {
76namespace internal {
77
78class CachePage {
79 public:
80  static const int LINE_VALID = 0;
81  static const int LINE_INVALID = 1;
82
83  static const int kPageShift = 12;
84  static const int kPageSize = 1 << kPageShift;
85  static const int kPageMask = kPageSize - 1;
86  static const int kLineShift = 2;  // The cache line is only 4 bytes right now.
87  static const int kLineLength = 1 << kLineShift;
88  static const int kLineMask = kLineLength - 1;
89
90  CachePage() {
91    memset(&validity_map_, LINE_INVALID, sizeof(validity_map_));
92  }
93
94  char* ValidityByte(int offset) {
95    return &validity_map_[offset >> kLineShift];
96  }
97
98  char* CachedData(int offset) {
99    return &data_[offset];
100  }
101
102 private:
103  char data_[kPageSize];   // The cached data.
104  static const int kValidityMapSize = kPageSize >> kLineShift;
105  char validity_map_[kValidityMapSize];  // One byte per line.
106};
107
108
109class Simulator {
110 public:
111  friend class ArmDebugger;
112  enum Register {
113    no_reg = -1,
114    r0 = 0, r1, r2, r3, r4, r5, r6, r7,
115    r8, r9, r10, r11, r12, r13, r14, r15,
116    num_registers,
117    sp = 13,
118    lr = 14,
119    pc = 15,
120    s0 = 0, s1, s2, s3, s4, s5, s6, s7,
121    s8, s9, s10, s11, s12, s13, s14, s15,
122    s16, s17, s18, s19, s20, s21, s22, s23,
123    s24, s25, s26, s27, s28, s29, s30, s31,
124    num_s_registers = 32,
125    d0 = 0, d1, d2, d3, d4, d5, d6, d7,
126    d8, d9, d10, d11, d12, d13, d14, d15,
127    d16, d17, d18, d19, d20, d21, d22, d23,
128    d24, d25, d26, d27, d28, d29, d30, d31,
129    num_d_registers = 32,
130    q0 = 0, q1, q2, q3, q4, q5, q6, q7,
131    q8, q9, q10, q11, q12, q13, q14, q15,
132    num_q_registers = 16
133  };
134
135  explicit Simulator(Isolate* isolate);
136  ~Simulator();
137
138  // The currently executing Simulator instance. Potentially there can be one
139  // for each native thread.
140  static Simulator* current(v8::internal::Isolate* isolate);
141
142  // Accessors for register state. Reading the pc value adheres to the ARM
143  // architecture specification and is off by a 8 from the currently executing
144  // instruction.
145  void set_register(int reg, int32_t value);
146  int32_t get_register(int reg) const;
147  double get_double_from_register_pair(int reg);
148  void set_register_pair_from_double(int reg, double* value);
149  void set_dw_register(int dreg, const int* dbl);
150
151  // Support for VFP.
152  void get_d_register(int dreg, uint64_t* value);
153  void set_d_register(int dreg, const uint64_t* value);
154  void get_d_register(int dreg, uint32_t* value);
155  void set_d_register(int dreg, const uint32_t* value);
156  // Support for NEON.
157  template <typename T>
158  void get_q_register(int qreg, T* value);
159  template <typename T>
160  void set_q_register(int qreg, const T* value);
161
162  void set_s_register(int reg, unsigned int value);
163  unsigned int get_s_register(int reg) const;
164
165  void set_d_register_from_double(int dreg, const double& dbl) {
166    SetVFPRegister<double, 2>(dreg, dbl);
167  }
168
169  double get_double_from_d_register(int dreg) {
170    return GetFromVFPRegister<double, 2>(dreg);
171  }
172
173  void set_s_register_from_float(int sreg, const float flt) {
174    SetVFPRegister<float, 1>(sreg, flt);
175  }
176
177  float get_float_from_s_register(int sreg) {
178    return GetFromVFPRegister<float, 1>(sreg);
179  }
180
181  void set_s_register_from_sinteger(int sreg, const int sint) {
182    SetVFPRegister<int, 1>(sreg, sint);
183  }
184
185  int get_sinteger_from_s_register(int sreg) {
186    return GetFromVFPRegister<int, 1>(sreg);
187  }
188
189  // Special case of set_register and get_register to access the raw PC value.
190  void set_pc(int32_t value);
191  int32_t get_pc() const;
192
193  Address get_sp() const {
194    return reinterpret_cast<Address>(static_cast<intptr_t>(get_register(sp)));
195  }
196
197  // Accessor to the internal simulator stack area.
198  uintptr_t StackLimit(uintptr_t c_limit) const;
199
200  // Executes ARM instructions until the PC reaches end_sim_pc.
201  void Execute();
202
203  // Call on program start.
204  static void Initialize(Isolate* isolate);
205
206  static void TearDown(base::CustomMatcherHashMap* i_cache, Redirection* first);
207
208  // V8 generally calls into generated JS code with 5 parameters and into
209  // generated RegExp code with 7 parameters. This is a convenience function,
210  // which sets up the simulator state and grabs the result on return.
211  int32_t Call(byte* entry, int argument_count, ...);
212  // Alternative: call a 2-argument double function.
213  void CallFP(byte* entry, double d0, double d1);
214  int32_t CallFPReturnsInt(byte* entry, double d0, double d1);
215  double CallFPReturnsDouble(byte* entry, double d0, double d1);
216
217  // Push an address onto the JS stack.
218  uintptr_t PushAddress(uintptr_t address);
219
220  // Pop an address from the JS stack.
221  uintptr_t PopAddress();
222
223  // Debugger input.
224  void set_last_debugger_input(char* input);
225  char* last_debugger_input() { return last_debugger_input_; }
226
227  // ICache checking.
228  static void FlushICache(base::CustomMatcherHashMap* i_cache, void* start,
229                          size_t size);
230
231  // Returns true if pc register contains one of the 'special_values' defined
232  // below (bad_lr, end_sim_pc).
233  bool has_bad_pc() const;
234
235  // EABI variant for double arguments in use.
236  bool use_eabi_hardfloat() {
237#if USE_EABI_HARDFLOAT
238    return true;
239#else
240    return false;
241#endif
242  }
243
244 private:
245  enum special_values {
246    // Known bad pc value to ensure that the simulator does not execute
247    // without being properly setup.
248    bad_lr = -1,
249    // A pc value used to signal the simulator to stop execution.  Generally
250    // the lr is set to this value on transition from native C code to
251    // simulated execution, so that the simulator can "return" to the native
252    // C code.
253    end_sim_pc = -2
254  };
255
256  // Unsupported instructions use Format to print an error and stop execution.
257  void Format(Instruction* instr, const char* format);
258
259  // Checks if the current instruction should be executed based on its
260  // condition bits.
261  inline bool ConditionallyExecute(Instruction* instr);
262
263  // Helper functions to set the conditional flags in the architecture state.
264  void SetNZFlags(int32_t val);
265  void SetCFlag(bool val);
266  void SetVFlag(bool val);
267  bool CarryFrom(int32_t left, int32_t right, int32_t carry = 0);
268  bool BorrowFrom(int32_t left, int32_t right, int32_t carry = 1);
269  bool OverflowFrom(int32_t alu_out,
270                    int32_t left,
271                    int32_t right,
272                    bool addition);
273
274  inline int GetCarry() {
275    return c_flag_ ? 1 : 0;
276  }
277
278  // Support for VFP.
279  void Compute_FPSCR_Flags(float val1, float val2);
280  void Compute_FPSCR_Flags(double val1, double val2);
281  void Copy_FPSCR_to_APSR();
282  inline float canonicalizeNaN(float value);
283  inline double canonicalizeNaN(double value);
284
285  // Helper functions to decode common "addressing" modes
286  int32_t GetShiftRm(Instruction* instr, bool* carry_out);
287  int32_t GetImm(Instruction* instr, bool* carry_out);
288  int32_t ProcessPU(Instruction* instr,
289                    int num_regs,
290                    int operand_size,
291                    intptr_t* start_address,
292                    intptr_t* end_address);
293  void HandleRList(Instruction* instr, bool load);
294  void HandleVList(Instruction* inst);
295  void SoftwareInterrupt(Instruction* instr);
296
297  // Stop helper functions.
298  inline bool isStopInstruction(Instruction* instr);
299  inline bool isWatchedStop(uint32_t bkpt_code);
300  inline bool isEnabledStop(uint32_t bkpt_code);
301  inline void EnableStop(uint32_t bkpt_code);
302  inline void DisableStop(uint32_t bkpt_code);
303  inline void IncreaseStopCounter(uint32_t bkpt_code);
304  void PrintStopInfo(uint32_t code);
305
306  // Read and write memory.
307  // The *Ex functions are exclusive access. The writes return the strex status:
308  // 0 if the write succeeds, and 1 if the write fails.
309  inline uint8_t ReadBU(int32_t addr);
310  inline int8_t ReadB(int32_t addr);
311  uint8_t ReadExBU(int32_t addr);
312  inline void WriteB(int32_t addr, uint8_t value);
313  inline void WriteB(int32_t addr, int8_t value);
314  int WriteExB(int32_t addr, uint8_t value);
315
316  inline uint16_t ReadHU(int32_t addr, Instruction* instr);
317  inline int16_t ReadH(int32_t addr, Instruction* instr);
318  uint16_t ReadExHU(int32_t addr, Instruction* instr);
319  // Note: Overloaded on the sign of the value.
320  inline void WriteH(int32_t addr, uint16_t value, Instruction* instr);
321  inline void WriteH(int32_t addr, int16_t value, Instruction* instr);
322  int WriteExH(int32_t addr, uint16_t value, Instruction* instr);
323
324  inline int ReadW(int32_t addr, Instruction* instr);
325  int ReadExW(int32_t addr, Instruction* instr);
326  inline void WriteW(int32_t addr, int value, Instruction* instr);
327  int WriteExW(int32_t addr, int value, Instruction* instr);
328
329  int32_t* ReadDW(int32_t addr);
330  void WriteDW(int32_t addr, int32_t value1, int32_t value2);
331
332  // Executing is handled based on the instruction type.
333  // Both type 0 and type 1 rolled into one.
334  void DecodeType01(Instruction* instr);
335  void DecodeType2(Instruction* instr);
336  void DecodeType3(Instruction* instr);
337  void DecodeType4(Instruction* instr);
338  void DecodeType5(Instruction* instr);
339  void DecodeType6(Instruction* instr);
340  void DecodeType7(Instruction* instr);
341
342  // CP15 coprocessor instructions.
343  void DecodeTypeCP15(Instruction* instr);
344
345  // Support for VFP.
346  void DecodeTypeVFP(Instruction* instr);
347  void DecodeType6CoprocessorIns(Instruction* instr);
348  void DecodeSpecialCondition(Instruction* instr);
349
350  void DecodeVMOVBetweenCoreAndSinglePrecisionRegisters(Instruction* instr);
351  void DecodeVCMP(Instruction* instr);
352  void DecodeVCVTBetweenDoubleAndSingle(Instruction* instr);
353  int32_t ConvertDoubleToInt(double val, bool unsigned_integer,
354                             VFPRoundingMode mode);
355  void DecodeVCVTBetweenFloatingPointAndInteger(Instruction* instr);
356
357  // Executes one instruction.
358  void InstructionDecode(Instruction* instr);
359
360  // ICache.
361  static void CheckICache(base::CustomMatcherHashMap* i_cache,
362                          Instruction* instr);
363  static void FlushOnePage(base::CustomMatcherHashMap* i_cache, intptr_t start,
364                           int size);
365  static CachePage* GetCachePage(base::CustomMatcherHashMap* i_cache,
366                                 void* page);
367
368  // Runtime call support.
369  static void* RedirectExternalReference(
370      Isolate* isolate, void* external_function,
371      v8::internal::ExternalReference::Type type);
372
373  // Handle arguments and return value for runtime FP functions.
374  void GetFpArgs(double* x, double* y, int32_t* z);
375  void SetFpResult(const double& result);
376  void TrashCallerSaveRegisters();
377
378  template<class ReturnType, int register_size>
379      ReturnType GetFromVFPRegister(int reg_index);
380
381  template<class InputType, int register_size>
382      void SetVFPRegister(int reg_index, const InputType& value);
383
384  void SetSpecialRegister(SRegisterFieldMask reg_and_mask, uint32_t value);
385  uint32_t GetFromSpecialRegister(SRegister reg);
386
387  void CallInternal(byte* entry);
388
389  // Architecture state.
390  // Saturating instructions require a Q flag to indicate saturation.
391  // There is currently no way to read the CPSR directly, and thus read the Q
392  // flag, so this is left unimplemented.
393  int32_t registers_[16];
394  bool n_flag_;
395  bool z_flag_;
396  bool c_flag_;
397  bool v_flag_;
398
399  // VFP architecture state.
400  unsigned int vfp_registers_[num_d_registers * 2];
401  bool n_flag_FPSCR_;
402  bool z_flag_FPSCR_;
403  bool c_flag_FPSCR_;
404  bool v_flag_FPSCR_;
405
406  // VFP rounding mode. See ARM DDI 0406B Page A2-29.
407  VFPRoundingMode FPSCR_rounding_mode_;
408  bool FPSCR_default_NaN_mode_;
409
410  // VFP FP exception flags architecture state.
411  bool inv_op_vfp_flag_;
412  bool div_zero_vfp_flag_;
413  bool overflow_vfp_flag_;
414  bool underflow_vfp_flag_;
415  bool inexact_vfp_flag_;
416
417  // Simulator support.
418  char* stack_;
419  bool pc_modified_;
420  int icount_;
421
422  // Debugger input.
423  char* last_debugger_input_;
424
425  // Icache simulation
426  base::CustomMatcherHashMap* i_cache_;
427
428  // Registered breakpoints.
429  Instruction* break_pc_;
430  Instr break_instr_;
431
432  v8::internal::Isolate* isolate_;
433
434  // A stop is watched if its code is less than kNumOfWatchedStops.
435  // Only watched stops support enabling/disabling and the counter feature.
436  static const uint32_t kNumOfWatchedStops = 256;
437
438  // Breakpoint is disabled if bit 31 is set.
439  static const uint32_t kStopDisabledBit = 1 << 31;
440
441  // A stop is enabled, meaning the simulator will stop when meeting the
442  // instruction, if bit 31 of watched_stops_[code].count is unset.
443  // The value watched_stops_[code].count & ~(1 << 31) indicates how many times
444  // the breakpoint was hit or gone through.
445  struct StopCountAndDesc {
446    uint32_t count;
447    char* desc;
448  };
449  StopCountAndDesc watched_stops_[kNumOfWatchedStops];
450
451  // Syncronization primitives. See ARM DDI 0406C.b, A2.9.
452  enum class MonitorAccess {
453    Open,
454    Exclusive,
455  };
456
457  enum class TransactionSize {
458    None = 0,
459    Byte = 1,
460    HalfWord = 2,
461    Word = 4,
462  };
463
464  // The least-significant bits of the address are ignored. The number of bits
465  // is implementation-defined, between 3 and 11. See ARM DDI 0406C.b, A3.4.3.
466  static const int32_t kExclusiveTaggedAddrMask = ~((1 << 11) - 1);
467
468  class LocalMonitor {
469   public:
470    LocalMonitor();
471
472    // These functions manage the state machine for the local monitor, but do
473    // not actually perform loads and stores. NotifyStoreExcl only returns
474    // true if the exclusive store is allowed; the global monitor will still
475    // have to be checked to see whether the memory should be updated.
476    void NotifyLoad(int32_t addr);
477    void NotifyLoadExcl(int32_t addr, TransactionSize size);
478    void NotifyStore(int32_t addr);
479    bool NotifyStoreExcl(int32_t addr, TransactionSize size);
480
481   private:
482    void Clear();
483
484    MonitorAccess access_state_;
485    int32_t tagged_addr_;
486    TransactionSize size_;
487  };
488
489  class GlobalMonitor {
490   public:
491    GlobalMonitor();
492
493    class Processor {
494     public:
495      Processor();
496
497     private:
498      friend class GlobalMonitor;
499      // These functions manage the state machine for the global monitor, but do
500      // not actually perform loads and stores.
501      void Clear_Locked();
502      void NotifyLoadExcl_Locked(int32_t addr);
503      void NotifyStore_Locked(int32_t addr, bool is_requesting_processor);
504      bool NotifyStoreExcl_Locked(int32_t addr, bool is_requesting_processor);
505
506      MonitorAccess access_state_;
507      int32_t tagged_addr_;
508      Processor* next_;
509      Processor* prev_;
510      // A strex can fail due to background cache evictions. Rather than
511      // simulating this, we'll just occasionally introduce cases where an
512      // exclusive store fails. This will happen once after every
513      // kMaxFailureCounter exclusive stores.
514      static const int kMaxFailureCounter = 5;
515      int failure_counter_;
516    };
517
518    // Exposed so it can be accessed by Simulator::{Read,Write}Ex*.
519    base::Mutex mutex;
520
521    void NotifyLoadExcl_Locked(int32_t addr, Processor* processor);
522    void NotifyStore_Locked(int32_t addr, Processor* processor);
523    bool NotifyStoreExcl_Locked(int32_t addr, Processor* processor);
524
525    // Called when the simulator is destroyed.
526    void RemoveProcessor(Processor* processor);
527
528   private:
529    bool IsProcessorInLinkedList_Locked(Processor* processor) const;
530    void PrependProcessor_Locked(Processor* processor);
531
532    Processor* head_;
533  };
534
535  LocalMonitor local_monitor_;
536  GlobalMonitor::Processor global_monitor_processor_;
537  static base::LazyInstance<GlobalMonitor>::type global_monitor_;
538};
539
540
541// When running with the simulator transition into simulated execution at this
542// point.
543#define CALL_GENERATED_CODE(isolate, entry, p0, p1, p2, p3, p4) \
544  reinterpret_cast<Object*>(Simulator::current(isolate)->Call(  \
545      FUNCTION_ADDR(entry), 5, p0, p1, p2, p3, p4))
546
547#define CALL_GENERATED_FP_INT(isolate, entry, p0, p1) \
548  Simulator::current(isolate)->CallFPReturnsInt(FUNCTION_ADDR(entry), p0, p1)
549
550#define CALL_GENERATED_REGEXP_CODE(isolate, entry, p0, p1, p2, p3, p4, p5, p6, \
551                                   p7, p8)                                     \
552  Simulator::current(isolate)                                                  \
553      ->Call(entry, 10, p0, p1, p2, p3, NULL, p4, p5, p6, p7, p8)
554
555
556// The simulator has its own stack. Thus it has a different stack limit from
557// the C-based native code.  The JS-based limit normally points near the end of
558// the simulator stack.  When the C-based limit is exhausted we reflect that by
559// lowering the JS-based limit as well, to make stack checks trigger.
560class SimulatorStack : public v8::internal::AllStatic {
561 public:
562  static inline uintptr_t JsLimitFromCLimit(v8::internal::Isolate* isolate,
563                                            uintptr_t c_limit) {
564    return Simulator::current(isolate)->StackLimit(c_limit);
565  }
566
567  static inline uintptr_t RegisterCTryCatch(v8::internal::Isolate* isolate,
568                                            uintptr_t try_catch_address) {
569    Simulator* sim = Simulator::current(isolate);
570    return sim->PushAddress(try_catch_address);
571  }
572
573  static inline void UnregisterCTryCatch(v8::internal::Isolate* isolate) {
574    Simulator::current(isolate)->PopAddress();
575  }
576};
577
578}  // namespace internal
579}  // namespace v8
580
581#endif  // !defined(USE_SIMULATOR)
582#endif  // V8_ARM_SIMULATOR_ARM_H_
583