1/*
2 * Copyright (C) 2014 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "parallel_move_resolver.h"
18
19#include "base/stl_util.h"
20#include "nodes.h"
21
22namespace art {
23
24void ParallelMoveResolver::BuildInitialMoveList(HParallelMove* parallel_move) {
25  // Perform a linear sweep of the moves to add them to the initial list of
26  // moves to perform, ignoring any move that is redundant (the source is
27  // the same as the destination, the destination is ignored and
28  // unallocated, or the move was already eliminated).
29  for (size_t i = 0; i < parallel_move->NumMoves(); ++i) {
30    MoveOperands* move = parallel_move->MoveOperandsAt(i);
31    if (!move->IsRedundant()) {
32      moves_.push_back(move);
33    }
34  }
35}
36
37void ParallelMoveResolverWithSwap::EmitNativeCode(HParallelMove* parallel_move) {
38  DCHECK(moves_.empty());
39  // Build up a worklist of moves.
40  BuildInitialMoveList(parallel_move);
41
42  // Move stack/stack slot to take advantage of a free register on constrained machines.
43  for (size_t i = 0; i < moves_.size(); ++i) {
44    const MoveOperands& move = *moves_[i];
45    // Ignore constants and moves already eliminated.
46    if (move.IsEliminated() || move.GetSource().IsConstant()) {
47      continue;
48    }
49
50    if ((move.GetSource().IsStackSlot() || move.GetSource().IsDoubleStackSlot()) &&
51        (move.GetDestination().IsStackSlot() || move.GetDestination().IsDoubleStackSlot())) {
52      PerformMove(i);
53    }
54  }
55
56  for (size_t i = 0; i < moves_.size(); ++i) {
57    const MoveOperands& move = *moves_[i];
58    // Skip constants to perform them last.  They don't block other moves
59    // and skipping such moves with register destinations keeps those
60    // registers free for the whole algorithm.
61    if (!move.IsEliminated() && !move.GetSource().IsConstant()) {
62      PerformMove(i);
63    }
64  }
65
66  // Perform the moves with constant sources.
67  for (size_t i = 0; i < moves_.size(); ++i) {
68    MoveOperands* move = moves_[i];
69    if (!move->IsEliminated()) {
70      DCHECK(move->GetSource().IsConstant());
71      EmitMove(i);
72      // Eliminate the move, in case following moves need a scratch register.
73      move->Eliminate();
74    }
75  }
76
77  moves_.clear();
78}
79
80Location LowOf(Location location) {
81  if (location.IsRegisterPair()) {
82    return Location::RegisterLocation(location.low());
83  } else if (location.IsFpuRegisterPair()) {
84    return Location::FpuRegisterLocation(location.low());
85  } else if (location.IsDoubleStackSlot()) {
86    return Location::StackSlot(location.GetStackIndex());
87  } else {
88    return Location::NoLocation();
89  }
90}
91
92Location HighOf(Location location) {
93  if (location.IsRegisterPair()) {
94    return Location::RegisterLocation(location.high());
95  } else if (location.IsFpuRegisterPair()) {
96    return Location::FpuRegisterLocation(location.high());
97  } else if (location.IsDoubleStackSlot()) {
98    return Location::StackSlot(location.GetHighStackIndex(4));
99  } else {
100    return Location::NoLocation();
101  }
102}
103
104// Update the source of `move`, knowing that `updated_location` has been swapped
105// with `new_source`. Note that `updated_location` can be a pair, therefore if
106// `move` is non-pair, we need to extract which register to use.
107static void UpdateSourceOf(MoveOperands* move, Location updated_location, Location new_source) {
108  Location source = move->GetSource();
109  if (LowOf(updated_location).Equals(source)) {
110    move->SetSource(LowOf(new_source));
111  } else if (HighOf(updated_location).Equals(source)) {
112    move->SetSource(HighOf(new_source));
113  } else {
114    DCHECK(updated_location.Equals(source)) << updated_location << " " << source;
115    move->SetSource(new_source);
116  }
117}
118
119MoveOperands* ParallelMoveResolverWithSwap::PerformMove(size_t index) {
120  // Each call to this function performs a move and deletes it from the move
121  // graph.  We first recursively perform any move blocking this one.  We
122  // mark a move as "pending" on entry to PerformMove in order to detect
123  // cycles in the move graph.  We use operand swaps to resolve cycles,
124  // which means that a call to PerformMove could change any source operand
125  // in the move graph.
126
127  MoveOperands* move = moves_[index];
128  DCHECK(!move->IsPending());
129  if (move->IsRedundant()) {
130    // Because we swap register pairs first, following, un-pending
131    // moves may become redundant.
132    move->Eliminate();
133    return nullptr;
134  }
135
136  // Clear this move's destination to indicate a pending move.  The actual
137  // destination is saved in a stack-allocated local.  Recursion may allow
138  // multiple moves to be pending.
139  DCHECK(!move->GetSource().IsInvalid());
140  Location destination = move->MarkPending();
141
142  // Perform a depth-first traversal of the move graph to resolve
143  // dependencies.  Any unperformed, unpending move with a source the same
144  // as this one's destination blocks this one so recursively perform all
145  // such moves.
146  MoveOperands* required_swap = nullptr;
147  for (size_t i = 0; i < moves_.size(); ++i) {
148    const MoveOperands& other_move = *moves_[i];
149    if (other_move.Blocks(destination) && !other_move.IsPending()) {
150      // Though PerformMove can change any source operand in the move graph,
151      // calling `PerformMove` cannot create a blocking move via a swap
152      // (this loop does not miss any).
153      // For example, assume there is a non-blocking move with source A
154      // and this move is blocked on source B and there is a swap of A and
155      // B.  Then A and B must be involved in the same cycle (or they would
156      // not be swapped).  Since this move's destination is B and there is
157      // only a single incoming edge to an operand, this move must also be
158      // involved in the same cycle.  In that case, the blocking move will
159      // be created but will be "pending" when we return from PerformMove.
160      required_swap = PerformMove(i);
161
162      if (required_swap == move) {
163        // If this move is required to swap, we do so without looking
164        // at the next moves. Swapping is not blocked by anything, it just
165        // updates other moves's source.
166        break;
167      } else if (required_swap == moves_[i]) {
168        // If `other_move` was swapped, we iterate again to find a new
169        // potential cycle.
170        required_swap = nullptr;
171        i = -1;
172      } else if (required_swap != nullptr) {
173        // A move is required to swap. We walk back the cycle to find the
174        // move by just returning from this `PerformMove`.
175        moves_[index]->ClearPending(destination);
176        return required_swap;
177      }
178    }
179  }
180
181  // We are about to resolve this move and don't need it marked as
182  // pending, so restore its destination.
183  move->ClearPending(destination);
184
185  // This move's source may have changed due to swaps to resolve cycles and
186  // so it may now be the last move in the cycle.  If so remove it.
187  if (move->GetSource().Equals(destination)) {
188    move->Eliminate();
189    DCHECK(required_swap == nullptr);
190    return nullptr;
191  }
192
193  // The move may be blocked on a (at most one) pending move, in which case
194  // we have a cycle.  Search for such a blocking move and perform a swap to
195  // resolve it.
196  bool do_swap = false;
197  if (required_swap != nullptr) {
198    DCHECK_EQ(required_swap, move);
199    do_swap = true;
200  } else {
201    for (MoveOperands* other_move : moves_) {
202      if (other_move->Blocks(destination)) {
203        DCHECK(other_move->IsPending()) << "move=" << *move << " other_move=" << *other_move;
204        if (!move->Is64BitMove() && other_move->Is64BitMove()) {
205          // We swap 64bits moves before swapping 32bits moves. Go back from the
206          // cycle by returning the move that must be swapped.
207          return other_move;
208        }
209        do_swap = true;
210        break;
211      }
212    }
213  }
214
215  if (do_swap) {
216    EmitSwap(index);
217    // Any unperformed (including pending) move with a source of either
218    // this move's source or destination needs to have their source
219    // changed to reflect the state of affairs after the swap.
220    Location source = move->GetSource();
221    Location swap_destination = move->GetDestination();
222    move->Eliminate();
223    for (MoveOperands* other_move : moves_) {
224      if (other_move->Blocks(source)) {
225        UpdateSourceOf(other_move, source, swap_destination);
226      } else if (other_move->Blocks(swap_destination)) {
227        UpdateSourceOf(other_move, swap_destination, source);
228      }
229    }
230    // If the swap was required because of a 64bits move in the middle of a cycle,
231    // we return the swapped move, so that the caller knows it needs to re-iterate
232    // its dependency loop.
233    return required_swap;
234  } else {
235    // This move is not blocked.
236    EmitMove(index);
237    move->Eliminate();
238    DCHECK(required_swap == nullptr);
239    return nullptr;
240  }
241}
242
243bool ParallelMoveResolverWithSwap::IsScratchLocation(Location loc) {
244  for (MoveOperands* move : moves_) {
245    if (move->Blocks(loc)) {
246      return false;
247    }
248  }
249
250  for (MoveOperands* move : moves_) {
251    if (move->GetDestination().Equals(loc)) {
252      return true;
253    }
254  }
255
256  return false;
257}
258
259int ParallelMoveResolverWithSwap::AllocateScratchRegister(int blocked,
260                                                          int register_count,
261                                                          int if_scratch,
262                                                          bool* spilled) {
263  DCHECK_NE(blocked, if_scratch);
264  int scratch = -1;
265  for (int reg = 0; reg < register_count; ++reg) {
266    if ((blocked != reg) && IsScratchLocation(Location::RegisterLocation(reg))) {
267      scratch = reg;
268      break;
269    }
270  }
271
272  if (scratch == -1) {
273    *spilled = true;
274    scratch = if_scratch;
275  } else {
276    *spilled = false;
277  }
278
279  return scratch;
280}
281
282
283ParallelMoveResolverWithSwap::ScratchRegisterScope::ScratchRegisterScope(
284    ParallelMoveResolverWithSwap* resolver, int blocked, int if_scratch, int number_of_registers)
285    : resolver_(resolver),
286      reg_(kNoRegister),
287      spilled_(false) {
288  reg_ = resolver_->AllocateScratchRegister(blocked, number_of_registers, if_scratch, &spilled_);
289
290  if (spilled_) {
291    resolver->SpillScratch(reg_);
292  }
293}
294
295
296ParallelMoveResolverWithSwap::ScratchRegisterScope::~ScratchRegisterScope() {
297  if (spilled_) {
298    resolver_->RestoreScratch(reg_);
299  }
300}
301
302void ParallelMoveResolverNoSwap::EmitNativeCode(HParallelMove* parallel_move) {
303  DCHECK_EQ(GetNumberOfPendingMoves(), 0u);
304  DCHECK(moves_.empty());
305  DCHECK(scratches_.empty());
306
307  // Backend dependent initialization.
308  PrepareForEmitNativeCode();
309
310  // Build up a worklist of moves.
311  BuildInitialMoveList(parallel_move);
312
313  for (size_t i = 0; i < moves_.size(); ++i) {
314    const MoveOperands& move = *moves_[i];
315    // Skip constants to perform them last. They don't block other moves and
316    // skipping such moves with register destinations keeps those registers
317    // free for the whole algorithm.
318    if (!move.IsEliminated() && !move.GetSource().IsConstant()) {
319      PerformMove(i);
320    }
321  }
322
323  // Perform the moves with constant sources and register destinations with UpdateMoveSource()
324  // to reduce the number of literal loads. Stack destinations are skipped since we won't be benefit
325  // from changing the constant sources to stack locations.
326  for (size_t i = 0; i < moves_.size(); ++i) {
327    MoveOperands* move = moves_[i];
328    Location destination = move->GetDestination();
329    if (!move->IsEliminated() && !destination.IsStackSlot() && !destination.IsDoubleStackSlot()) {
330      Location source = move->GetSource();
331      EmitMove(i);
332      move->Eliminate();
333      // This may introduce additional instruction dependency, but reduce number
334      // of moves and possible literal loads. For example,
335      // Original moves:
336      //   1234.5678 -> D0
337      //   1234.5678 -> D1
338      // Updated moves:
339      //   1234.5678 -> D0
340      //   D0 -> D1
341      UpdateMoveSource(source, destination);
342    }
343  }
344
345  // Perform the rest of the moves.
346  for (size_t i = 0; i < moves_.size(); ++i) {
347    MoveOperands* move = moves_[i];
348    if (!move->IsEliminated()) {
349      EmitMove(i);
350      move->Eliminate();
351    }
352  }
353
354  // All pending moves that we have added for resolve cycles should be performed.
355  DCHECK_EQ(GetNumberOfPendingMoves(), 0u);
356
357  // Backend dependent cleanup.
358  FinishEmitNativeCode();
359
360  moves_.clear();
361  scratches_.clear();
362}
363
364Location ParallelMoveResolverNoSwap::GetScratchLocation(Location::Kind kind) {
365  for (Location loc : scratches_) {
366    if (loc.GetKind() == kind && !IsBlockedByMoves(loc)) {
367      return loc;
368    }
369  }
370  for (MoveOperands* move : moves_) {
371    Location loc = move->GetDestination();
372    if (loc.GetKind() == kind && !IsBlockedByMoves(loc)) {
373      return loc;
374    }
375  }
376  return Location::NoLocation();
377}
378
379void ParallelMoveResolverNoSwap::AddScratchLocation(Location loc) {
380  if (kIsDebugBuild) {
381    for (Location scratch : scratches_) {
382      CHECK(!loc.Equals(scratch));
383    }
384  }
385  scratches_.push_back(loc);
386}
387
388void ParallelMoveResolverNoSwap::RemoveScratchLocation(Location loc) {
389  DCHECK(!IsBlockedByMoves(loc));
390  for (auto it = scratches_.begin(), end = scratches_.end(); it != end; ++it) {
391    if (loc.Equals(*it)) {
392      scratches_.erase(it);
393      break;
394    }
395  }
396}
397
398void ParallelMoveResolverNoSwap::PerformMove(size_t index) {
399  // Each call to this function performs a move and deletes it from the move
400  // graph. We first recursively perform any move blocking this one. We mark
401  // a move as "pending" on entry to PerformMove in order to detect cycles
402  // in the move graph. We use scratch location to resolve cycles, also
403  // additional pending moves might be added. After move has been performed,
404  // we will update source operand in the move graph to reduce dependencies in
405  // the graph.
406
407  MoveOperands* move = moves_[index];
408  DCHECK(!move->IsPending());
409  DCHECK(!move->IsEliminated());
410  if (move->IsRedundant()) {
411    // Previous operations on the list of moves have caused this particular move
412    // to become a no-op, so we can safely eliminate it. Consider for example
413    // (0 -> 1) (1 -> 0) (1 -> 2). There is a cycle (0 -> 1) (1 -> 0), that we will
414    // resolve as (1 -> scratch) (0 -> 1) (scratch -> 0). If, by chance, '2' is
415    // used as the scratch location, the move (1 -> 2) will occur while resolving
416    // the cycle. When that move is emitted, the code will update moves with a '1'
417    // as their source to use '2' instead (see `UpdateMoveSource()`. In our example
418    // the initial move (1 -> 2) would then become the no-op (2 -> 2) that can be
419    // eliminated here.
420    move->Eliminate();
421    return;
422  }
423
424  // Clear this move's destination to indicate a pending move. The actual
425  // destination is saved in a stack-allocated local. Recursion may allow
426  // multiple moves to be pending.
427  DCHECK(!move->GetSource().IsInvalid());
428  Location destination = move->MarkPending();
429
430  // Perform a depth-first traversal of the move graph to resolve
431  // dependencies. Any unperformed, unpending move with a source the same
432  // as this one's destination blocks this one so recursively perform all
433  // such moves.
434  for (size_t i = 0; i < moves_.size(); ++i) {
435    const MoveOperands& other_move = *moves_[i];
436    if (other_move.Blocks(destination) && !other_move.IsPending()) {
437      PerformMove(i);
438    }
439  }
440
441  // We are about to resolve this move and don't need it marked as
442  // pending, so restore its destination.
443  move->ClearPending(destination);
444
445  // No one else should write to the move destination when the it is pending.
446  DCHECK(!move->IsRedundant());
447
448  Location source = move->GetSource();
449  // The move may be blocked on several pending moves, in case we have a cycle.
450  if (IsBlockedByMoves(destination)) {
451    // For a cycle like: (A -> B) (B -> C) (C -> A), we change it to following
452    // sequence:
453    // (C -> scratch)     # Emit right now.
454    // (A -> B) (B -> C)  # Unblocked.
455    // (scratch -> A)     # Add to pending_moves_, blocked by (A -> B).
456    Location::Kind kind = source.GetKind();
457    DCHECK_NE(kind, Location::kConstant);
458    Location scratch = AllocateScratchLocationFor(kind);
459    // We only care about the move size.
460    Primitive::Type type = move->Is64BitMove() ? Primitive::kPrimLong : Primitive::kPrimInt;
461    // Perform (C -> scratch)
462    move->SetDestination(scratch);
463    EmitMove(index);
464    move->Eliminate();
465    UpdateMoveSource(source, scratch);
466    // Add (scratch -> A).
467    AddPendingMove(scratch, destination, type);
468  } else {
469    // This move is not blocked.
470    EmitMove(index);
471    move->Eliminate();
472    UpdateMoveSource(source, destination);
473  }
474
475  // Moves in the pending list should not block any other moves. But performing
476  // unblocked moves in the pending list can free scratch registers, so we do this
477  // as early as possible.
478  MoveOperands* pending_move;
479  while ((pending_move = GetUnblockedPendingMove(source)) != nullptr) {
480    Location pending_source = pending_move->GetSource();
481    Location pending_destination = pending_move->GetDestination();
482    // We do not depend on the pending move index. So just delete the move instead
483    // of eliminating it to make the pending list cleaner.
484    DeletePendingMove(pending_move);
485    move->SetSource(pending_source);
486    move->SetDestination(pending_destination);
487    EmitMove(index);
488    move->Eliminate();
489    UpdateMoveSource(pending_source, pending_destination);
490    // Free any unblocked locations in the scratch location list.
491    // Note: Fetch size() on each iteration because scratches_ can be modified inside the loop.
492    // FIXME: If FreeScratchLocation() removes the location from scratches_,
493    // we skip the next location. This happens for arm64.
494    for (size_t i = 0; i < scratches_.size(); ++i) {
495      Location scratch = scratches_[i];
496      // Only scratch overlapping with performed move source can be unblocked.
497      if (scratch.OverlapsWith(pending_source) && !IsBlockedByMoves(scratch)) {
498        FreeScratchLocation(pending_source);
499      }
500    }
501  }
502}
503
504void ParallelMoveResolverNoSwap::UpdateMoveSource(Location from, Location to) {
505  // This function is used to reduce the dependencies in the graph after
506  // (from -> to) has been performed. Since we ensure there is no move with the same
507  // destination, (to -> X) cannot be blocked while (from -> X) might still be
508  // blocked. Consider for example the moves (0 -> 1) (1 -> 2) (1 -> 3). After
509  // (1 -> 2) has been performed, the moves left are (0 -> 1) and (1 -> 3). There is
510  // a dependency between the two. If we update the source location from 1 to 2, we
511  // will get (0 -> 1) and (2 -> 3). There is no dependency between the two.
512  //
513  // This is not something we must do, but we can use fewer scratch locations with
514  // this trick. For example, we can avoid using additional scratch locations for
515  // moves (0 -> 1), (1 -> 2), (1 -> 0).
516  for (MoveOperands* move : moves_) {
517    if (move->GetSource().Equals(from)) {
518      move->SetSource(to);
519    }
520  }
521}
522
523void ParallelMoveResolverNoSwap::AddPendingMove(Location source,
524    Location destination, Primitive::Type type) {
525  pending_moves_.push_back(new (allocator_) MoveOperands(source, destination, type, nullptr));
526}
527
528void ParallelMoveResolverNoSwap::DeletePendingMove(MoveOperands* move) {
529  RemoveElement(pending_moves_, move);
530}
531
532MoveOperands* ParallelMoveResolverNoSwap::GetUnblockedPendingMove(Location loc) {
533  for (MoveOperands* move : pending_moves_) {
534    Location destination = move->GetDestination();
535    // Only moves with destination overlapping with input loc can be unblocked.
536    if (destination.OverlapsWith(loc) && !IsBlockedByMoves(destination)) {
537      return move;
538    }
539  }
540  return nullptr;
541}
542
543bool ParallelMoveResolverNoSwap::IsBlockedByMoves(Location loc) {
544  for (MoveOperands* move : pending_moves_) {
545    if (move->Blocks(loc)) {
546      return true;
547    }
548  }
549  for (MoveOperands* move : moves_) {
550    if (move->Blocks(loc)) {
551      return true;
552    }
553  }
554  return false;
555}
556
557// So far it is only used for debugging purposes to make sure all pending moves
558// have been performed.
559size_t ParallelMoveResolverNoSwap::GetNumberOfPendingMoves() {
560  return pending_moves_.size();
561}
562
563}  // namespace art
564