1/* 2 * Copyright (C) 2015 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17#include "intrinsics_x86_64.h" 18 19#include <limits> 20 21#include "arch/x86_64/instruction_set_features_x86_64.h" 22#include "art_method-inl.h" 23#include "code_generator_x86_64.h" 24#include "entrypoints/quick/quick_entrypoints.h" 25#include "intrinsics.h" 26#include "mirror/array-inl.h" 27#include "mirror/string.h" 28#include "thread.h" 29#include "utils/x86_64/assembler_x86_64.h" 30#include "utils/x86_64/constants_x86_64.h" 31 32namespace art { 33 34namespace x86_64 { 35 36IntrinsicLocationsBuilderX86_64::IntrinsicLocationsBuilderX86_64(CodeGeneratorX86_64* codegen) 37 : arena_(codegen->GetGraph()->GetArena()), codegen_(codegen) { 38} 39 40 41X86_64Assembler* IntrinsicCodeGeneratorX86_64::GetAssembler() { 42 return reinterpret_cast<X86_64Assembler*>(codegen_->GetAssembler()); 43} 44 45ArenaAllocator* IntrinsicCodeGeneratorX86_64::GetAllocator() { 46 return codegen_->GetGraph()->GetArena(); 47} 48 49bool IntrinsicLocationsBuilderX86_64::TryDispatch(HInvoke* invoke) { 50 Dispatch(invoke); 51 const LocationSummary* res = invoke->GetLocations(); 52 return res != nullptr && res->Intrinsified(); 53} 54 55#define __ reinterpret_cast<X86_64Assembler*>(codegen->GetAssembler())-> 56 57// TODO: trg as memory. 58static void MoveFromReturnRegister(Location trg, 59 Primitive::Type type, 60 CodeGeneratorX86_64* codegen) { 61 if (!trg.IsValid()) { 62 DCHECK(type == Primitive::kPrimVoid); 63 return; 64 } 65 66 switch (type) { 67 case Primitive::kPrimBoolean: 68 case Primitive::kPrimByte: 69 case Primitive::kPrimChar: 70 case Primitive::kPrimShort: 71 case Primitive::kPrimInt: 72 case Primitive::kPrimNot: { 73 CpuRegister trg_reg = trg.AsRegister<CpuRegister>(); 74 if (trg_reg.AsRegister() != RAX) { 75 __ movl(trg_reg, CpuRegister(RAX)); 76 } 77 break; 78 } 79 case Primitive::kPrimLong: { 80 CpuRegister trg_reg = trg.AsRegister<CpuRegister>(); 81 if (trg_reg.AsRegister() != RAX) { 82 __ movq(trg_reg, CpuRegister(RAX)); 83 } 84 break; 85 } 86 87 case Primitive::kPrimVoid: 88 LOG(FATAL) << "Unexpected void type for valid location " << trg; 89 UNREACHABLE(); 90 91 case Primitive::kPrimDouble: { 92 XmmRegister trg_reg = trg.AsFpuRegister<XmmRegister>(); 93 if (trg_reg.AsFloatRegister() != XMM0) { 94 __ movsd(trg_reg, XmmRegister(XMM0)); 95 } 96 break; 97 } 98 case Primitive::kPrimFloat: { 99 XmmRegister trg_reg = trg.AsFpuRegister<XmmRegister>(); 100 if (trg_reg.AsFloatRegister() != XMM0) { 101 __ movss(trg_reg, XmmRegister(XMM0)); 102 } 103 break; 104 } 105 } 106} 107 108static void MoveArguments(HInvoke* invoke, CodeGeneratorX86_64* codegen) { 109 InvokeDexCallingConventionVisitorX86_64 calling_convention_visitor; 110 IntrinsicVisitor::MoveArguments(invoke, codegen, &calling_convention_visitor); 111} 112 113// Slow-path for fallback (calling the managed code to handle the intrinsic) in an intrinsified 114// call. This will copy the arguments into the positions for a regular call. 115// 116// Note: The actual parameters are required to be in the locations given by the invoke's location 117// summary. If an intrinsic modifies those locations before a slowpath call, they must be 118// restored! 119class IntrinsicSlowPathX86_64 : public SlowPathCodeX86_64 { 120 public: 121 explicit IntrinsicSlowPathX86_64(HInvoke* invoke) : invoke_(invoke) { } 122 123 void EmitNativeCode(CodeGenerator* codegen_in) OVERRIDE { 124 CodeGeneratorX86_64* codegen = down_cast<CodeGeneratorX86_64*>(codegen_in); 125 __ Bind(GetEntryLabel()); 126 127 SaveLiveRegisters(codegen, invoke_->GetLocations()); 128 129 MoveArguments(invoke_, codegen); 130 131 if (invoke_->IsInvokeStaticOrDirect()) { 132 codegen->GenerateStaticOrDirectCall(invoke_->AsInvokeStaticOrDirect(), CpuRegister(RDI)); 133 RecordPcInfo(codegen, invoke_, invoke_->GetDexPc()); 134 } else { 135 UNIMPLEMENTED(FATAL) << "Non-direct intrinsic slow-path not yet implemented"; 136 UNREACHABLE(); 137 } 138 139 // Copy the result back to the expected output. 140 Location out = invoke_->GetLocations()->Out(); 141 if (out.IsValid()) { 142 DCHECK(out.IsRegister()); // TODO: Replace this when we support output in memory. 143 DCHECK(!invoke_->GetLocations()->GetLiveRegisters()->ContainsCoreRegister(out.reg())); 144 MoveFromReturnRegister(out, invoke_->GetType(), codegen); 145 } 146 147 RestoreLiveRegisters(codegen, invoke_->GetLocations()); 148 __ jmp(GetExitLabel()); 149 } 150 151 private: 152 // The instruction where this slow path is happening. 153 HInvoke* const invoke_; 154 155 DISALLOW_COPY_AND_ASSIGN(IntrinsicSlowPathX86_64); 156}; 157 158#undef __ 159#define __ assembler-> 160 161static void CreateFPToIntLocations(ArenaAllocator* arena, HInvoke* invoke) { 162 LocationSummary* locations = new (arena) LocationSummary(invoke, 163 LocationSummary::kNoCall, 164 kIntrinsified); 165 locations->SetInAt(0, Location::RequiresFpuRegister()); 166 locations->SetOut(Location::RequiresRegister()); 167} 168 169static void CreateIntToFPLocations(ArenaAllocator* arena, HInvoke* invoke) { 170 LocationSummary* locations = new (arena) LocationSummary(invoke, 171 LocationSummary::kNoCall, 172 kIntrinsified); 173 locations->SetInAt(0, Location::RequiresRegister()); 174 locations->SetOut(Location::RequiresFpuRegister()); 175} 176 177static void MoveFPToInt(LocationSummary* locations, bool is64bit, X86_64Assembler* assembler) { 178 Location input = locations->InAt(0); 179 Location output = locations->Out(); 180 __ movd(output.AsRegister<CpuRegister>(), input.AsFpuRegister<XmmRegister>(), is64bit); 181} 182 183static void MoveIntToFP(LocationSummary* locations, bool is64bit, X86_64Assembler* assembler) { 184 Location input = locations->InAt(0); 185 Location output = locations->Out(); 186 __ movd(output.AsFpuRegister<XmmRegister>(), input.AsRegister<CpuRegister>(), is64bit); 187} 188 189void IntrinsicLocationsBuilderX86_64::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) { 190 CreateFPToIntLocations(arena_, invoke); 191} 192void IntrinsicLocationsBuilderX86_64::VisitDoubleLongBitsToDouble(HInvoke* invoke) { 193 CreateIntToFPLocations(arena_, invoke); 194} 195 196void IntrinsicCodeGeneratorX86_64::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) { 197 MoveFPToInt(invoke->GetLocations(), true, GetAssembler()); 198} 199void IntrinsicCodeGeneratorX86_64::VisitDoubleLongBitsToDouble(HInvoke* invoke) { 200 MoveIntToFP(invoke->GetLocations(), true, GetAssembler()); 201} 202 203void IntrinsicLocationsBuilderX86_64::VisitFloatFloatToRawIntBits(HInvoke* invoke) { 204 CreateFPToIntLocations(arena_, invoke); 205} 206void IntrinsicLocationsBuilderX86_64::VisitFloatIntBitsToFloat(HInvoke* invoke) { 207 CreateIntToFPLocations(arena_, invoke); 208} 209 210void IntrinsicCodeGeneratorX86_64::VisitFloatFloatToRawIntBits(HInvoke* invoke) { 211 MoveFPToInt(invoke->GetLocations(), false, GetAssembler()); 212} 213void IntrinsicCodeGeneratorX86_64::VisitFloatIntBitsToFloat(HInvoke* invoke) { 214 MoveIntToFP(invoke->GetLocations(), false, GetAssembler()); 215} 216 217static void CreateIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) { 218 LocationSummary* locations = new (arena) LocationSummary(invoke, 219 LocationSummary::kNoCall, 220 kIntrinsified); 221 locations->SetInAt(0, Location::RequiresRegister()); 222 locations->SetOut(Location::SameAsFirstInput()); 223} 224 225static void GenReverseBytes(LocationSummary* locations, 226 Primitive::Type size, 227 X86_64Assembler* assembler) { 228 CpuRegister out = locations->Out().AsRegister<CpuRegister>(); 229 230 switch (size) { 231 case Primitive::kPrimShort: 232 // TODO: Can be done with an xchg of 8b registers. This is straight from Quick. 233 __ bswapl(out); 234 __ sarl(out, Immediate(16)); 235 break; 236 case Primitive::kPrimInt: 237 __ bswapl(out); 238 break; 239 case Primitive::kPrimLong: 240 __ bswapq(out); 241 break; 242 default: 243 LOG(FATAL) << "Unexpected size for reverse-bytes: " << size; 244 UNREACHABLE(); 245 } 246} 247 248void IntrinsicLocationsBuilderX86_64::VisitIntegerReverseBytes(HInvoke* invoke) { 249 CreateIntToIntLocations(arena_, invoke); 250} 251 252void IntrinsicCodeGeneratorX86_64::VisitIntegerReverseBytes(HInvoke* invoke) { 253 GenReverseBytes(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler()); 254} 255 256void IntrinsicLocationsBuilderX86_64::VisitLongReverseBytes(HInvoke* invoke) { 257 CreateIntToIntLocations(arena_, invoke); 258} 259 260void IntrinsicCodeGeneratorX86_64::VisitLongReverseBytes(HInvoke* invoke) { 261 GenReverseBytes(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler()); 262} 263 264void IntrinsicLocationsBuilderX86_64::VisitShortReverseBytes(HInvoke* invoke) { 265 CreateIntToIntLocations(arena_, invoke); 266} 267 268void IntrinsicCodeGeneratorX86_64::VisitShortReverseBytes(HInvoke* invoke) { 269 GenReverseBytes(invoke->GetLocations(), Primitive::kPrimShort, GetAssembler()); 270} 271 272 273// TODO: Consider Quick's way of doing Double abs through integer operations, as the immediate we 274// need is 64b. 275 276static void CreateFloatToFloatPlusTemps(ArenaAllocator* arena, HInvoke* invoke) { 277 // TODO: Enable memory operations when the assembler supports them. 278 LocationSummary* locations = new (arena) LocationSummary(invoke, 279 LocationSummary::kNoCall, 280 kIntrinsified); 281 locations->SetInAt(0, Location::RequiresFpuRegister()); 282 // TODO: Allow x86 to work with memory. This requires assembler support, see below. 283 // locations->SetInAt(0, Location::Any()); // X86 can work on memory directly. 284 locations->SetOut(Location::SameAsFirstInput()); 285 locations->AddTemp(Location::RequiresFpuRegister()); // FP reg to hold mask. 286} 287 288static void MathAbsFP(LocationSummary* locations, 289 bool is64bit, 290 X86_64Assembler* assembler, 291 CodeGeneratorX86_64* codegen) { 292 Location output = locations->Out(); 293 294 if (output.IsFpuRegister()) { 295 // In-register 296 XmmRegister xmm_temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); 297 298 // TODO: Can mask directly with constant area using pand if we can guarantee 299 // that the literal is aligned on a 16 byte boundary. This will avoid a 300 // temporary. 301 if (is64bit) { 302 __ movsd(xmm_temp, codegen->LiteralInt64Address(INT64_C(0x7FFFFFFFFFFFFFFF))); 303 __ andpd(output.AsFpuRegister<XmmRegister>(), xmm_temp); 304 } else { 305 __ movss(xmm_temp, codegen->LiteralInt32Address(INT32_C(0x7FFFFFFF))); 306 __ andps(output.AsFpuRegister<XmmRegister>(), xmm_temp); 307 } 308 } else { 309 // TODO: update when assember support is available. 310 UNIMPLEMENTED(FATAL) << "Needs assembler support."; 311// Once assembler support is available, in-memory operations look like this: 312// if (is64bit) { 313// DCHECK(output.IsDoubleStackSlot()); 314// // No 64b and with literal. 315// __ movq(cpu_temp, Immediate(INT64_C(0x7FFFFFFFFFFFFFFF))); 316// __ andq(Address(CpuRegister(RSP), output.GetStackIndex()), cpu_temp); 317// } else { 318// DCHECK(output.IsStackSlot()); 319// // Can use and with a literal directly. 320// __ andl(Address(CpuRegister(RSP), output.GetStackIndex()), Immediate(INT64_C(0x7FFFFFFF))); 321// } 322 } 323} 324 325void IntrinsicLocationsBuilderX86_64::VisitMathAbsDouble(HInvoke* invoke) { 326 CreateFloatToFloatPlusTemps(arena_, invoke); 327} 328 329void IntrinsicCodeGeneratorX86_64::VisitMathAbsDouble(HInvoke* invoke) { 330 MathAbsFP(invoke->GetLocations(), true, GetAssembler(), codegen_); 331} 332 333void IntrinsicLocationsBuilderX86_64::VisitMathAbsFloat(HInvoke* invoke) { 334 CreateFloatToFloatPlusTemps(arena_, invoke); 335} 336 337void IntrinsicCodeGeneratorX86_64::VisitMathAbsFloat(HInvoke* invoke) { 338 MathAbsFP(invoke->GetLocations(), false, GetAssembler(), codegen_); 339} 340 341static void CreateIntToIntPlusTemp(ArenaAllocator* arena, HInvoke* invoke) { 342 LocationSummary* locations = new (arena) LocationSummary(invoke, 343 LocationSummary::kNoCall, 344 kIntrinsified); 345 locations->SetInAt(0, Location::RequiresRegister()); 346 locations->SetOut(Location::SameAsFirstInput()); 347 locations->AddTemp(Location::RequiresRegister()); 348} 349 350static void GenAbsInteger(LocationSummary* locations, bool is64bit, X86_64Assembler* assembler) { 351 Location output = locations->Out(); 352 CpuRegister out = output.AsRegister<CpuRegister>(); 353 CpuRegister mask = locations->GetTemp(0).AsRegister<CpuRegister>(); 354 355 if (is64bit) { 356 // Create mask. 357 __ movq(mask, out); 358 __ sarq(mask, Immediate(63)); 359 // Add mask. 360 __ addq(out, mask); 361 __ xorq(out, mask); 362 } else { 363 // Create mask. 364 __ movl(mask, out); 365 __ sarl(mask, Immediate(31)); 366 // Add mask. 367 __ addl(out, mask); 368 __ xorl(out, mask); 369 } 370} 371 372void IntrinsicLocationsBuilderX86_64::VisitMathAbsInt(HInvoke* invoke) { 373 CreateIntToIntPlusTemp(arena_, invoke); 374} 375 376void IntrinsicCodeGeneratorX86_64::VisitMathAbsInt(HInvoke* invoke) { 377 GenAbsInteger(invoke->GetLocations(), false, GetAssembler()); 378} 379 380void IntrinsicLocationsBuilderX86_64::VisitMathAbsLong(HInvoke* invoke) { 381 CreateIntToIntPlusTemp(arena_, invoke); 382} 383 384void IntrinsicCodeGeneratorX86_64::VisitMathAbsLong(HInvoke* invoke) { 385 GenAbsInteger(invoke->GetLocations(), true, GetAssembler()); 386} 387 388static void GenMinMaxFP(LocationSummary* locations, 389 bool is_min, 390 bool is_double, 391 X86_64Assembler* assembler, 392 CodeGeneratorX86_64* codegen) { 393 Location op1_loc = locations->InAt(0); 394 Location op2_loc = locations->InAt(1); 395 Location out_loc = locations->Out(); 396 XmmRegister out = out_loc.AsFpuRegister<XmmRegister>(); 397 398 // Shortcut for same input locations. 399 if (op1_loc.Equals(op2_loc)) { 400 DCHECK(out_loc.Equals(op1_loc)); 401 return; 402 } 403 404 // (out := op1) 405 // out <=? op2 406 // if Nan jmp Nan_label 407 // if out is min jmp done 408 // if op2 is min jmp op2_label 409 // handle -0/+0 410 // jmp done 411 // Nan_label: 412 // out := NaN 413 // op2_label: 414 // out := op2 415 // done: 416 // 417 // This removes one jmp, but needs to copy one input (op1) to out. 418 // 419 // TODO: This is straight from Quick. Make NaN an out-of-line slowpath? 420 421 XmmRegister op2 = op2_loc.AsFpuRegister<XmmRegister>(); 422 423 Label nan, done, op2_label; 424 if (is_double) { 425 __ ucomisd(out, op2); 426 } else { 427 __ ucomiss(out, op2); 428 } 429 430 __ j(Condition::kParityEven, &nan); 431 432 __ j(is_min ? Condition::kAbove : Condition::kBelow, &op2_label); 433 __ j(is_min ? Condition::kBelow : Condition::kAbove, &done); 434 435 // Handle 0.0/-0.0. 436 if (is_min) { 437 if (is_double) { 438 __ orpd(out, op2); 439 } else { 440 __ orps(out, op2); 441 } 442 } else { 443 if (is_double) { 444 __ andpd(out, op2); 445 } else { 446 __ andps(out, op2); 447 } 448 } 449 __ jmp(&done); 450 451 // NaN handling. 452 __ Bind(&nan); 453 if (is_double) { 454 __ movsd(out, codegen->LiteralInt64Address(INT64_C(0x7FF8000000000000))); 455 } else { 456 __ movss(out, codegen->LiteralInt32Address(INT32_C(0x7FC00000))); 457 } 458 __ jmp(&done); 459 460 // out := op2; 461 __ Bind(&op2_label); 462 if (is_double) { 463 __ movsd(out, op2); 464 } else { 465 __ movss(out, op2); 466 } 467 468 // Done. 469 __ Bind(&done); 470} 471 472static void CreateFPFPToFP(ArenaAllocator* arena, HInvoke* invoke) { 473 LocationSummary* locations = new (arena) LocationSummary(invoke, 474 LocationSummary::kNoCall, 475 kIntrinsified); 476 locations->SetInAt(0, Location::RequiresFpuRegister()); 477 locations->SetInAt(1, Location::RequiresFpuRegister()); 478 // The following is sub-optimal, but all we can do for now. It would be fine to also accept 479 // the second input to be the output (we can simply swap inputs). 480 locations->SetOut(Location::SameAsFirstInput()); 481} 482 483void IntrinsicLocationsBuilderX86_64::VisitMathMinDoubleDouble(HInvoke* invoke) { 484 CreateFPFPToFP(arena_, invoke); 485} 486 487void IntrinsicCodeGeneratorX86_64::VisitMathMinDoubleDouble(HInvoke* invoke) { 488 GenMinMaxFP(invoke->GetLocations(), true, true, GetAssembler(), codegen_); 489} 490 491void IntrinsicLocationsBuilderX86_64::VisitMathMinFloatFloat(HInvoke* invoke) { 492 CreateFPFPToFP(arena_, invoke); 493} 494 495void IntrinsicCodeGeneratorX86_64::VisitMathMinFloatFloat(HInvoke* invoke) { 496 GenMinMaxFP(invoke->GetLocations(), true, false, GetAssembler(), codegen_); 497} 498 499void IntrinsicLocationsBuilderX86_64::VisitMathMaxDoubleDouble(HInvoke* invoke) { 500 CreateFPFPToFP(arena_, invoke); 501} 502 503void IntrinsicCodeGeneratorX86_64::VisitMathMaxDoubleDouble(HInvoke* invoke) { 504 GenMinMaxFP(invoke->GetLocations(), false, true, GetAssembler(), codegen_); 505} 506 507void IntrinsicLocationsBuilderX86_64::VisitMathMaxFloatFloat(HInvoke* invoke) { 508 CreateFPFPToFP(arena_, invoke); 509} 510 511void IntrinsicCodeGeneratorX86_64::VisitMathMaxFloatFloat(HInvoke* invoke) { 512 GenMinMaxFP(invoke->GetLocations(), false, false, GetAssembler(), codegen_); 513} 514 515static void GenMinMax(LocationSummary* locations, bool is_min, bool is_long, 516 X86_64Assembler* assembler) { 517 Location op1_loc = locations->InAt(0); 518 Location op2_loc = locations->InAt(1); 519 520 // Shortcut for same input locations. 521 if (op1_loc.Equals(op2_loc)) { 522 // Can return immediately, as op1_loc == out_loc. 523 // Note: if we ever support separate registers, e.g., output into memory, we need to check for 524 // a copy here. 525 DCHECK(locations->Out().Equals(op1_loc)); 526 return; 527 } 528 529 CpuRegister out = locations->Out().AsRegister<CpuRegister>(); 530 CpuRegister op2 = op2_loc.AsRegister<CpuRegister>(); 531 532 // (out := op1) 533 // out <=? op2 534 // if out is min jmp done 535 // out := op2 536 // done: 537 538 if (is_long) { 539 __ cmpq(out, op2); 540 } else { 541 __ cmpl(out, op2); 542 } 543 544 __ cmov(is_min ? Condition::kGreater : Condition::kLess, out, op2, is_long); 545} 546 547static void CreateIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) { 548 LocationSummary* locations = new (arena) LocationSummary(invoke, 549 LocationSummary::kNoCall, 550 kIntrinsified); 551 locations->SetInAt(0, Location::RequiresRegister()); 552 locations->SetInAt(1, Location::RequiresRegister()); 553 locations->SetOut(Location::SameAsFirstInput()); 554} 555 556void IntrinsicLocationsBuilderX86_64::VisitMathMinIntInt(HInvoke* invoke) { 557 CreateIntIntToIntLocations(arena_, invoke); 558} 559 560void IntrinsicCodeGeneratorX86_64::VisitMathMinIntInt(HInvoke* invoke) { 561 GenMinMax(invoke->GetLocations(), true, false, GetAssembler()); 562} 563 564void IntrinsicLocationsBuilderX86_64::VisitMathMinLongLong(HInvoke* invoke) { 565 CreateIntIntToIntLocations(arena_, invoke); 566} 567 568void IntrinsicCodeGeneratorX86_64::VisitMathMinLongLong(HInvoke* invoke) { 569 GenMinMax(invoke->GetLocations(), true, true, GetAssembler()); 570} 571 572void IntrinsicLocationsBuilderX86_64::VisitMathMaxIntInt(HInvoke* invoke) { 573 CreateIntIntToIntLocations(arena_, invoke); 574} 575 576void IntrinsicCodeGeneratorX86_64::VisitMathMaxIntInt(HInvoke* invoke) { 577 GenMinMax(invoke->GetLocations(), false, false, GetAssembler()); 578} 579 580void IntrinsicLocationsBuilderX86_64::VisitMathMaxLongLong(HInvoke* invoke) { 581 CreateIntIntToIntLocations(arena_, invoke); 582} 583 584void IntrinsicCodeGeneratorX86_64::VisitMathMaxLongLong(HInvoke* invoke) { 585 GenMinMax(invoke->GetLocations(), false, true, GetAssembler()); 586} 587 588static void CreateFPToFPLocations(ArenaAllocator* arena, HInvoke* invoke) { 589 LocationSummary* locations = new (arena) LocationSummary(invoke, 590 LocationSummary::kNoCall, 591 kIntrinsified); 592 locations->SetInAt(0, Location::RequiresFpuRegister()); 593 locations->SetOut(Location::RequiresFpuRegister()); 594} 595 596void IntrinsicLocationsBuilderX86_64::VisitMathSqrt(HInvoke* invoke) { 597 CreateFPToFPLocations(arena_, invoke); 598} 599 600void IntrinsicCodeGeneratorX86_64::VisitMathSqrt(HInvoke* invoke) { 601 LocationSummary* locations = invoke->GetLocations(); 602 XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>(); 603 XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>(); 604 605 GetAssembler()->sqrtsd(out, in); 606} 607 608static void InvokeOutOfLineIntrinsic(CodeGeneratorX86_64* codegen, HInvoke* invoke) { 609 MoveArguments(invoke, codegen); 610 611 DCHECK(invoke->IsInvokeStaticOrDirect()); 612 codegen->GenerateStaticOrDirectCall(invoke->AsInvokeStaticOrDirect(), CpuRegister(RDI)); 613 codegen->RecordPcInfo(invoke, invoke->GetDexPc()); 614 615 // Copy the result back to the expected output. 616 Location out = invoke->GetLocations()->Out(); 617 if (out.IsValid()) { 618 DCHECK(out.IsRegister()); 619 MoveFromReturnRegister(out, invoke->GetType(), codegen); 620 } 621} 622 623static void CreateSSE41FPToFPLocations(ArenaAllocator* arena, 624 HInvoke* invoke, 625 CodeGeneratorX86_64* codegen) { 626 // Do we have instruction support? 627 if (codegen->GetInstructionSetFeatures().HasSSE4_1()) { 628 CreateFPToFPLocations(arena, invoke); 629 return; 630 } 631 632 // We have to fall back to a call to the intrinsic. 633 LocationSummary* locations = new (arena) LocationSummary(invoke, 634 LocationSummary::kCall); 635 InvokeRuntimeCallingConvention calling_convention; 636 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetFpuRegisterAt(0))); 637 locations->SetOut(Location::FpuRegisterLocation(XMM0)); 638 // Needs to be RDI for the invoke. 639 locations->AddTemp(Location::RegisterLocation(RDI)); 640} 641 642static void GenSSE41FPToFPIntrinsic(CodeGeneratorX86_64* codegen, 643 HInvoke* invoke, 644 X86_64Assembler* assembler, 645 int round_mode) { 646 LocationSummary* locations = invoke->GetLocations(); 647 if (locations->WillCall()) { 648 InvokeOutOfLineIntrinsic(codegen, invoke); 649 } else { 650 XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>(); 651 XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>(); 652 __ roundsd(out, in, Immediate(round_mode)); 653 } 654} 655 656void IntrinsicLocationsBuilderX86_64::VisitMathCeil(HInvoke* invoke) { 657 CreateSSE41FPToFPLocations(arena_, invoke, codegen_); 658} 659 660void IntrinsicCodeGeneratorX86_64::VisitMathCeil(HInvoke* invoke) { 661 GenSSE41FPToFPIntrinsic(codegen_, invoke, GetAssembler(), 2); 662} 663 664void IntrinsicLocationsBuilderX86_64::VisitMathFloor(HInvoke* invoke) { 665 CreateSSE41FPToFPLocations(arena_, invoke, codegen_); 666} 667 668void IntrinsicCodeGeneratorX86_64::VisitMathFloor(HInvoke* invoke) { 669 GenSSE41FPToFPIntrinsic(codegen_, invoke, GetAssembler(), 1); 670} 671 672void IntrinsicLocationsBuilderX86_64::VisitMathRint(HInvoke* invoke) { 673 CreateSSE41FPToFPLocations(arena_, invoke, codegen_); 674} 675 676void IntrinsicCodeGeneratorX86_64::VisitMathRint(HInvoke* invoke) { 677 GenSSE41FPToFPIntrinsic(codegen_, invoke, GetAssembler(), 0); 678} 679 680static void CreateSSE41FPToIntLocations(ArenaAllocator* arena, 681 HInvoke* invoke, 682 CodeGeneratorX86_64* codegen) { 683 // Do we have instruction support? 684 if (codegen->GetInstructionSetFeatures().HasSSE4_1()) { 685 LocationSummary* locations = new (arena) LocationSummary(invoke, 686 LocationSummary::kNoCall, 687 kIntrinsified); 688 locations->SetInAt(0, Location::RequiresFpuRegister()); 689 locations->SetOut(Location::RequiresRegister()); 690 locations->AddTemp(Location::RequiresFpuRegister()); 691 return; 692 } 693 694 // We have to fall back to a call to the intrinsic. 695 LocationSummary* locations = new (arena) LocationSummary(invoke, 696 LocationSummary::kCall); 697 InvokeRuntimeCallingConvention calling_convention; 698 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetFpuRegisterAt(0))); 699 locations->SetOut(Location::RegisterLocation(RAX)); 700 // Needs to be RDI for the invoke. 701 locations->AddTemp(Location::RegisterLocation(RDI)); 702} 703 704void IntrinsicLocationsBuilderX86_64::VisitMathRoundFloat(HInvoke* invoke) { 705 CreateSSE41FPToIntLocations(arena_, invoke, codegen_); 706} 707 708void IntrinsicCodeGeneratorX86_64::VisitMathRoundFloat(HInvoke* invoke) { 709 LocationSummary* locations = invoke->GetLocations(); 710 if (locations->WillCall()) { 711 InvokeOutOfLineIntrinsic(codegen_, invoke); 712 return; 713 } 714 715 // Implement RoundFloat as t1 = floor(input + 0.5f); convert to int. 716 XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>(); 717 CpuRegister out = locations->Out().AsRegister<CpuRegister>(); 718 XmmRegister inPlusPointFive = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); 719 Label done, nan; 720 X86_64Assembler* assembler = GetAssembler(); 721 722 // Load 0.5 into inPlusPointFive. 723 __ movss(inPlusPointFive, codegen_->LiteralFloatAddress(0.5f)); 724 725 // Add in the input. 726 __ addss(inPlusPointFive, in); 727 728 // And truncate to an integer. 729 __ roundss(inPlusPointFive, inPlusPointFive, Immediate(1)); 730 731 // Load maxInt into out. 732 codegen_->Load64BitValue(out, kPrimIntMax); 733 734 // if inPlusPointFive >= maxInt goto done 735 __ comiss(inPlusPointFive, codegen_->LiteralFloatAddress(static_cast<float>(kPrimIntMax))); 736 __ j(kAboveEqual, &done); 737 738 // if input == NaN goto nan 739 __ j(kUnordered, &nan); 740 741 // output = float-to-int-truncate(input) 742 __ cvttss2si(out, inPlusPointFive); 743 __ jmp(&done); 744 __ Bind(&nan); 745 746 // output = 0 747 __ xorl(out, out); 748 __ Bind(&done); 749} 750 751void IntrinsicLocationsBuilderX86_64::VisitMathRoundDouble(HInvoke* invoke) { 752 CreateSSE41FPToIntLocations(arena_, invoke, codegen_); 753} 754 755void IntrinsicCodeGeneratorX86_64::VisitMathRoundDouble(HInvoke* invoke) { 756 LocationSummary* locations = invoke->GetLocations(); 757 if (locations->WillCall()) { 758 InvokeOutOfLineIntrinsic(codegen_, invoke); 759 return; 760 } 761 762 // Implement RoundDouble as t1 = floor(input + 0.5); convert to long. 763 XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>(); 764 CpuRegister out = locations->Out().AsRegister<CpuRegister>(); 765 XmmRegister inPlusPointFive = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); 766 Label done, nan; 767 X86_64Assembler* assembler = GetAssembler(); 768 769 // Load 0.5 into inPlusPointFive. 770 __ movsd(inPlusPointFive, codegen_->LiteralDoubleAddress(0.5)); 771 772 // Add in the input. 773 __ addsd(inPlusPointFive, in); 774 775 // And truncate to an integer. 776 __ roundsd(inPlusPointFive, inPlusPointFive, Immediate(1)); 777 778 // Load maxLong into out. 779 codegen_->Load64BitValue(out, kPrimLongMax); 780 781 // if inPlusPointFive >= maxLong goto done 782 __ comisd(inPlusPointFive, codegen_->LiteralDoubleAddress(static_cast<double>(kPrimLongMax))); 783 __ j(kAboveEqual, &done); 784 785 // if input == NaN goto nan 786 __ j(kUnordered, &nan); 787 788 // output = double-to-long-truncate(input) 789 __ cvttsd2si(out, inPlusPointFive, true); 790 __ jmp(&done); 791 __ Bind(&nan); 792 793 // output = 0 794 __ xorl(out, out); 795 __ Bind(&done); 796} 797 798void IntrinsicLocationsBuilderX86_64::VisitStringCharAt(HInvoke* invoke) { 799 // The inputs plus one temp. 800 LocationSummary* locations = new (arena_) LocationSummary(invoke, 801 LocationSummary::kCallOnSlowPath, 802 kIntrinsified); 803 locations->SetInAt(0, Location::RequiresRegister()); 804 locations->SetInAt(1, Location::RequiresRegister()); 805 locations->SetOut(Location::SameAsFirstInput()); 806 locations->AddTemp(Location::RequiresRegister()); 807} 808 809void IntrinsicCodeGeneratorX86_64::VisitStringCharAt(HInvoke* invoke) { 810 LocationSummary* locations = invoke->GetLocations(); 811 812 // Location of reference to data array 813 const int32_t value_offset = mirror::String::ValueOffset().Int32Value(); 814 // Location of count 815 const int32_t count_offset = mirror::String::CountOffset().Int32Value(); 816 817 CpuRegister obj = locations->InAt(0).AsRegister<CpuRegister>(); 818 CpuRegister idx = locations->InAt(1).AsRegister<CpuRegister>(); 819 CpuRegister out = locations->Out().AsRegister<CpuRegister>(); 820 821 // TODO: Maybe we can support range check elimination. Overall, though, I think it's not worth 822 // the cost. 823 // TODO: For simplicity, the index parameter is requested in a register, so different from Quick 824 // we will not optimize the code for constants (which would save a register). 825 826 SlowPathCodeX86_64* slow_path = new (GetAllocator()) IntrinsicSlowPathX86_64(invoke); 827 codegen_->AddSlowPath(slow_path); 828 829 X86_64Assembler* assembler = GetAssembler(); 830 831 __ cmpl(idx, Address(obj, count_offset)); 832 codegen_->MaybeRecordImplicitNullCheck(invoke); 833 __ j(kAboveEqual, slow_path->GetEntryLabel()); 834 835 // out = out[2*idx]. 836 __ movzxw(out, Address(out, idx, ScaleFactor::TIMES_2, value_offset)); 837 838 __ Bind(slow_path->GetExitLabel()); 839} 840 841void IntrinsicLocationsBuilderX86_64::VisitStringCompareTo(HInvoke* invoke) { 842 LocationSummary* locations = new (arena_) LocationSummary(invoke, 843 LocationSummary::kCall, 844 kIntrinsified); 845 InvokeRuntimeCallingConvention calling_convention; 846 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); 847 locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); 848 locations->SetOut(Location::RegisterLocation(RAX)); 849} 850 851void IntrinsicCodeGeneratorX86_64::VisitStringCompareTo(HInvoke* invoke) { 852 X86_64Assembler* assembler = GetAssembler(); 853 LocationSummary* locations = invoke->GetLocations(); 854 855 // Note that the null check must have been done earlier. 856 DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0))); 857 858 CpuRegister argument = locations->InAt(1).AsRegister<CpuRegister>(); 859 __ testl(argument, argument); 860 SlowPathCodeX86_64* slow_path = new (GetAllocator()) IntrinsicSlowPathX86_64(invoke); 861 codegen_->AddSlowPath(slow_path); 862 __ j(kEqual, slow_path->GetEntryLabel()); 863 864 __ gs()->call(Address::Absolute( 865 QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pStringCompareTo), true)); 866 __ Bind(slow_path->GetExitLabel()); 867} 868 869static void CreateStringIndexOfLocations(HInvoke* invoke, 870 ArenaAllocator* allocator, 871 bool start_at_zero) { 872 LocationSummary* locations = new (allocator) LocationSummary(invoke, 873 LocationSummary::kCallOnSlowPath, 874 kIntrinsified); 875 // The data needs to be in RDI for scasw. So request that the string is there, anyways. 876 locations->SetInAt(0, Location::RegisterLocation(RDI)); 877 // If we look for a constant char, we'll still have to copy it into RAX. So just request the 878 // allocator to do that, anyways. We can still do the constant check by checking the parameter 879 // of the instruction explicitly. 880 // Note: This works as we don't clobber RAX anywhere. 881 locations->SetInAt(1, Location::RegisterLocation(RAX)); 882 if (!start_at_zero) { 883 locations->SetInAt(2, Location::RequiresRegister()); // The starting index. 884 } 885 // As we clobber RDI during execution anyways, also use it as the output. 886 locations->SetOut(Location::SameAsFirstInput()); 887 888 // repne scasw uses RCX as the counter. 889 locations->AddTemp(Location::RegisterLocation(RCX)); 890 // Need another temporary to be able to compute the result. 891 locations->AddTemp(Location::RequiresRegister()); 892} 893 894static void GenerateStringIndexOf(HInvoke* invoke, 895 X86_64Assembler* assembler, 896 CodeGeneratorX86_64* codegen, 897 ArenaAllocator* allocator, 898 bool start_at_zero) { 899 LocationSummary* locations = invoke->GetLocations(); 900 901 // Note that the null check must have been done earlier. 902 DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0))); 903 904 CpuRegister string_obj = locations->InAt(0).AsRegister<CpuRegister>(); 905 CpuRegister search_value = locations->InAt(1).AsRegister<CpuRegister>(); 906 CpuRegister counter = locations->GetTemp(0).AsRegister<CpuRegister>(); 907 CpuRegister string_length = locations->GetTemp(1).AsRegister<CpuRegister>(); 908 CpuRegister out = locations->Out().AsRegister<CpuRegister>(); 909 910 // Check our assumptions for registers. 911 DCHECK_EQ(string_obj.AsRegister(), RDI); 912 DCHECK_EQ(search_value.AsRegister(), RAX); 913 DCHECK_EQ(counter.AsRegister(), RCX); 914 DCHECK_EQ(out.AsRegister(), RDI); 915 916 // Check for code points > 0xFFFF. Either a slow-path check when we don't know statically, 917 // or directly dispatch if we have a constant. 918 SlowPathCodeX86_64* slow_path = nullptr; 919 if (invoke->InputAt(1)->IsIntConstant()) { 920 if (static_cast<uint32_t>(invoke->InputAt(1)->AsIntConstant()->GetValue()) > 921 std::numeric_limits<uint16_t>::max()) { 922 // Always needs the slow-path. We could directly dispatch to it, but this case should be 923 // rare, so for simplicity just put the full slow-path down and branch unconditionally. 924 slow_path = new (allocator) IntrinsicSlowPathX86_64(invoke); 925 codegen->AddSlowPath(slow_path); 926 __ jmp(slow_path->GetEntryLabel()); 927 __ Bind(slow_path->GetExitLabel()); 928 return; 929 } 930 } else { 931 __ cmpl(search_value, Immediate(std::numeric_limits<uint16_t>::max())); 932 slow_path = new (allocator) IntrinsicSlowPathX86_64(invoke); 933 codegen->AddSlowPath(slow_path); 934 __ j(kAbove, slow_path->GetEntryLabel()); 935 } 936 937 // From here down, we know that we are looking for a char that fits in 16 bits. 938 // Location of reference to data array within the String object. 939 int32_t value_offset = mirror::String::ValueOffset().Int32Value(); 940 // Location of count within the String object. 941 int32_t count_offset = mirror::String::CountOffset().Int32Value(); 942 943 // Load string length, i.e., the count field of the string. 944 __ movl(string_length, Address(string_obj, count_offset)); 945 946 // Do a length check. 947 // TODO: Support jecxz. 948 Label not_found_label; 949 __ testl(string_length, string_length); 950 __ j(kEqual, ¬_found_label); 951 952 if (start_at_zero) { 953 // Number of chars to scan is the same as the string length. 954 __ movl(counter, string_length); 955 956 // Move to the start of the string. 957 __ addq(string_obj, Immediate(value_offset)); 958 } else { 959 CpuRegister start_index = locations->InAt(2).AsRegister<CpuRegister>(); 960 961 // Do a start_index check. 962 __ cmpl(start_index, string_length); 963 __ j(kGreaterEqual, ¬_found_label); 964 965 // Ensure we have a start index >= 0; 966 __ xorl(counter, counter); 967 __ cmpl(start_index, Immediate(0)); 968 __ cmov(kGreater, counter, start_index, false); // 32-bit copy is enough. 969 970 // Move to the start of the string: string_obj + value_offset + 2 * start_index. 971 __ leaq(string_obj, Address(string_obj, counter, ScaleFactor::TIMES_2, value_offset)); 972 973 // Now update ecx, the work counter: it's gonna be string.length - start_index. 974 __ negq(counter); // Needs to be 64-bit negation, as the address computation is 64-bit. 975 __ leaq(counter, Address(string_length, counter, ScaleFactor::TIMES_1, 0)); 976 } 977 978 // Everything is set up for repne scasw: 979 // * Comparison address in RDI. 980 // * Counter in ECX. 981 __ repne_scasw(); 982 983 // Did we find a match? 984 __ j(kNotEqual, ¬_found_label); 985 986 // Yes, we matched. Compute the index of the result. 987 __ subl(string_length, counter); 988 __ leal(out, Address(string_length, -1)); 989 990 Label done; 991 __ jmp(&done); 992 993 // Failed to match; return -1. 994 __ Bind(¬_found_label); 995 __ movl(out, Immediate(-1)); 996 997 // And join up at the end. 998 __ Bind(&done); 999 if (slow_path != nullptr) { 1000 __ Bind(slow_path->GetExitLabel()); 1001 } 1002} 1003 1004void IntrinsicLocationsBuilderX86_64::VisitStringIndexOf(HInvoke* invoke) { 1005 CreateStringIndexOfLocations(invoke, arena_, true); 1006} 1007 1008void IntrinsicCodeGeneratorX86_64::VisitStringIndexOf(HInvoke* invoke) { 1009 GenerateStringIndexOf(invoke, GetAssembler(), codegen_, GetAllocator(), true); 1010} 1011 1012void IntrinsicLocationsBuilderX86_64::VisitStringIndexOfAfter(HInvoke* invoke) { 1013 CreateStringIndexOfLocations(invoke, arena_, false); 1014} 1015 1016void IntrinsicCodeGeneratorX86_64::VisitStringIndexOfAfter(HInvoke* invoke) { 1017 GenerateStringIndexOf(invoke, GetAssembler(), codegen_, GetAllocator(), false); 1018} 1019 1020void IntrinsicLocationsBuilderX86_64::VisitStringNewStringFromBytes(HInvoke* invoke) { 1021 LocationSummary* locations = new (arena_) LocationSummary(invoke, 1022 LocationSummary::kCall, 1023 kIntrinsified); 1024 InvokeRuntimeCallingConvention calling_convention; 1025 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); 1026 locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); 1027 locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2))); 1028 locations->SetInAt(3, Location::RegisterLocation(calling_convention.GetRegisterAt(3))); 1029 locations->SetOut(Location::RegisterLocation(RAX)); 1030} 1031 1032void IntrinsicCodeGeneratorX86_64::VisitStringNewStringFromBytes(HInvoke* invoke) { 1033 X86_64Assembler* assembler = GetAssembler(); 1034 LocationSummary* locations = invoke->GetLocations(); 1035 1036 CpuRegister byte_array = locations->InAt(0).AsRegister<CpuRegister>(); 1037 __ testl(byte_array, byte_array); 1038 SlowPathCodeX86_64* slow_path = new (GetAllocator()) IntrinsicSlowPathX86_64(invoke); 1039 codegen_->AddSlowPath(slow_path); 1040 __ j(kEqual, slow_path->GetEntryLabel()); 1041 1042 __ gs()->call(Address::Absolute( 1043 QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pAllocStringFromBytes), true)); 1044 codegen_->RecordPcInfo(invoke, invoke->GetDexPc()); 1045 __ Bind(slow_path->GetExitLabel()); 1046} 1047 1048void IntrinsicLocationsBuilderX86_64::VisitStringNewStringFromChars(HInvoke* invoke) { 1049 LocationSummary* locations = new (arena_) LocationSummary(invoke, 1050 LocationSummary::kCall, 1051 kIntrinsified); 1052 InvokeRuntimeCallingConvention calling_convention; 1053 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); 1054 locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); 1055 locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2))); 1056 locations->SetOut(Location::RegisterLocation(RAX)); 1057} 1058 1059void IntrinsicCodeGeneratorX86_64::VisitStringNewStringFromChars(HInvoke* invoke) { 1060 X86_64Assembler* assembler = GetAssembler(); 1061 1062 __ gs()->call(Address::Absolute( 1063 QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pAllocStringFromChars), true)); 1064 codegen_->RecordPcInfo(invoke, invoke->GetDexPc()); 1065} 1066 1067void IntrinsicLocationsBuilderX86_64::VisitStringNewStringFromString(HInvoke* invoke) { 1068 LocationSummary* locations = new (arena_) LocationSummary(invoke, 1069 LocationSummary::kCall, 1070 kIntrinsified); 1071 InvokeRuntimeCallingConvention calling_convention; 1072 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); 1073 locations->SetOut(Location::RegisterLocation(RAX)); 1074} 1075 1076void IntrinsicCodeGeneratorX86_64::VisitStringNewStringFromString(HInvoke* invoke) { 1077 X86_64Assembler* assembler = GetAssembler(); 1078 LocationSummary* locations = invoke->GetLocations(); 1079 1080 CpuRegister string_to_copy = locations->InAt(0).AsRegister<CpuRegister>(); 1081 __ testl(string_to_copy, string_to_copy); 1082 SlowPathCodeX86_64* slow_path = new (GetAllocator()) IntrinsicSlowPathX86_64(invoke); 1083 codegen_->AddSlowPath(slow_path); 1084 __ j(kEqual, slow_path->GetEntryLabel()); 1085 1086 __ gs()->call(Address::Absolute( 1087 QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pAllocStringFromString), true)); 1088 codegen_->RecordPcInfo(invoke, invoke->GetDexPc()); 1089 __ Bind(slow_path->GetExitLabel()); 1090} 1091 1092static void GenPeek(LocationSummary* locations, Primitive::Type size, X86_64Assembler* assembler) { 1093 CpuRegister address = locations->InAt(0).AsRegister<CpuRegister>(); 1094 CpuRegister out = locations->Out().AsRegister<CpuRegister>(); // == address, here for clarity. 1095 // x86 allows unaligned access. We do not have to check the input or use specific instructions 1096 // to avoid a SIGBUS. 1097 switch (size) { 1098 case Primitive::kPrimByte: 1099 __ movsxb(out, Address(address, 0)); 1100 break; 1101 case Primitive::kPrimShort: 1102 __ movsxw(out, Address(address, 0)); 1103 break; 1104 case Primitive::kPrimInt: 1105 __ movl(out, Address(address, 0)); 1106 break; 1107 case Primitive::kPrimLong: 1108 __ movq(out, Address(address, 0)); 1109 break; 1110 default: 1111 LOG(FATAL) << "Type not recognized for peek: " << size; 1112 UNREACHABLE(); 1113 } 1114} 1115 1116void IntrinsicLocationsBuilderX86_64::VisitMemoryPeekByte(HInvoke* invoke) { 1117 CreateIntToIntLocations(arena_, invoke); 1118} 1119 1120void IntrinsicCodeGeneratorX86_64::VisitMemoryPeekByte(HInvoke* invoke) { 1121 GenPeek(invoke->GetLocations(), Primitive::kPrimByte, GetAssembler()); 1122} 1123 1124void IntrinsicLocationsBuilderX86_64::VisitMemoryPeekIntNative(HInvoke* invoke) { 1125 CreateIntToIntLocations(arena_, invoke); 1126} 1127 1128void IntrinsicCodeGeneratorX86_64::VisitMemoryPeekIntNative(HInvoke* invoke) { 1129 GenPeek(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler()); 1130} 1131 1132void IntrinsicLocationsBuilderX86_64::VisitMemoryPeekLongNative(HInvoke* invoke) { 1133 CreateIntToIntLocations(arena_, invoke); 1134} 1135 1136void IntrinsicCodeGeneratorX86_64::VisitMemoryPeekLongNative(HInvoke* invoke) { 1137 GenPeek(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler()); 1138} 1139 1140void IntrinsicLocationsBuilderX86_64::VisitMemoryPeekShortNative(HInvoke* invoke) { 1141 CreateIntToIntLocations(arena_, invoke); 1142} 1143 1144void IntrinsicCodeGeneratorX86_64::VisitMemoryPeekShortNative(HInvoke* invoke) { 1145 GenPeek(invoke->GetLocations(), Primitive::kPrimShort, GetAssembler()); 1146} 1147 1148static void CreateIntIntToVoidLocations(ArenaAllocator* arena, HInvoke* invoke) { 1149 LocationSummary* locations = new (arena) LocationSummary(invoke, 1150 LocationSummary::kNoCall, 1151 kIntrinsified); 1152 locations->SetInAt(0, Location::RequiresRegister()); 1153 locations->SetInAt(1, Location::RegisterOrInt32LongConstant(invoke->InputAt(1))); 1154} 1155 1156static void GenPoke(LocationSummary* locations, Primitive::Type size, X86_64Assembler* assembler) { 1157 CpuRegister address = locations->InAt(0).AsRegister<CpuRegister>(); 1158 Location value = locations->InAt(1); 1159 // x86 allows unaligned access. We do not have to check the input or use specific instructions 1160 // to avoid a SIGBUS. 1161 switch (size) { 1162 case Primitive::kPrimByte: 1163 if (value.IsConstant()) { 1164 __ movb(Address(address, 0), 1165 Immediate(CodeGenerator::GetInt32ValueOf(value.GetConstant()))); 1166 } else { 1167 __ movb(Address(address, 0), value.AsRegister<CpuRegister>()); 1168 } 1169 break; 1170 case Primitive::kPrimShort: 1171 if (value.IsConstant()) { 1172 __ movw(Address(address, 0), 1173 Immediate(CodeGenerator::GetInt32ValueOf(value.GetConstant()))); 1174 } else { 1175 __ movw(Address(address, 0), value.AsRegister<CpuRegister>()); 1176 } 1177 break; 1178 case Primitive::kPrimInt: 1179 if (value.IsConstant()) { 1180 __ movl(Address(address, 0), 1181 Immediate(CodeGenerator::GetInt32ValueOf(value.GetConstant()))); 1182 } else { 1183 __ movl(Address(address, 0), value.AsRegister<CpuRegister>()); 1184 } 1185 break; 1186 case Primitive::kPrimLong: 1187 if (value.IsConstant()) { 1188 int64_t v = value.GetConstant()->AsLongConstant()->GetValue(); 1189 DCHECK(IsInt<32>(v)); 1190 int32_t v_32 = v; 1191 __ movq(Address(address, 0), Immediate(v_32)); 1192 } else { 1193 __ movq(Address(address, 0), value.AsRegister<CpuRegister>()); 1194 } 1195 break; 1196 default: 1197 LOG(FATAL) << "Type not recognized for poke: " << size; 1198 UNREACHABLE(); 1199 } 1200} 1201 1202void IntrinsicLocationsBuilderX86_64::VisitMemoryPokeByte(HInvoke* invoke) { 1203 CreateIntIntToVoidLocations(arena_, invoke); 1204} 1205 1206void IntrinsicCodeGeneratorX86_64::VisitMemoryPokeByte(HInvoke* invoke) { 1207 GenPoke(invoke->GetLocations(), Primitive::kPrimByte, GetAssembler()); 1208} 1209 1210void IntrinsicLocationsBuilderX86_64::VisitMemoryPokeIntNative(HInvoke* invoke) { 1211 CreateIntIntToVoidLocations(arena_, invoke); 1212} 1213 1214void IntrinsicCodeGeneratorX86_64::VisitMemoryPokeIntNative(HInvoke* invoke) { 1215 GenPoke(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler()); 1216} 1217 1218void IntrinsicLocationsBuilderX86_64::VisitMemoryPokeLongNative(HInvoke* invoke) { 1219 CreateIntIntToVoidLocations(arena_, invoke); 1220} 1221 1222void IntrinsicCodeGeneratorX86_64::VisitMemoryPokeLongNative(HInvoke* invoke) { 1223 GenPoke(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler()); 1224} 1225 1226void IntrinsicLocationsBuilderX86_64::VisitMemoryPokeShortNative(HInvoke* invoke) { 1227 CreateIntIntToVoidLocations(arena_, invoke); 1228} 1229 1230void IntrinsicCodeGeneratorX86_64::VisitMemoryPokeShortNative(HInvoke* invoke) { 1231 GenPoke(invoke->GetLocations(), Primitive::kPrimShort, GetAssembler()); 1232} 1233 1234void IntrinsicLocationsBuilderX86_64::VisitThreadCurrentThread(HInvoke* invoke) { 1235 LocationSummary* locations = new (arena_) LocationSummary(invoke, 1236 LocationSummary::kNoCall, 1237 kIntrinsified); 1238 locations->SetOut(Location::RequiresRegister()); 1239} 1240 1241void IntrinsicCodeGeneratorX86_64::VisitThreadCurrentThread(HInvoke* invoke) { 1242 CpuRegister out = invoke->GetLocations()->Out().AsRegister<CpuRegister>(); 1243 GetAssembler()->gs()->movl(out, Address::Absolute(Thread::PeerOffset<kX86_64WordSize>(), true)); 1244} 1245 1246static void GenUnsafeGet(LocationSummary* locations, Primitive::Type type, 1247 bool is_volatile ATTRIBUTE_UNUSED, X86_64Assembler* assembler) { 1248 CpuRegister base = locations->InAt(1).AsRegister<CpuRegister>(); 1249 CpuRegister offset = locations->InAt(2).AsRegister<CpuRegister>(); 1250 CpuRegister trg = locations->Out().AsRegister<CpuRegister>(); 1251 1252 switch (type) { 1253 case Primitive::kPrimInt: 1254 case Primitive::kPrimNot: 1255 __ movl(trg, Address(base, offset, ScaleFactor::TIMES_1, 0)); 1256 break; 1257 1258 case Primitive::kPrimLong: 1259 __ movq(trg, Address(base, offset, ScaleFactor::TIMES_1, 0)); 1260 break; 1261 1262 default: 1263 LOG(FATAL) << "Unsupported op size " << type; 1264 UNREACHABLE(); 1265 } 1266} 1267 1268static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) { 1269 LocationSummary* locations = new (arena) LocationSummary(invoke, 1270 LocationSummary::kNoCall, 1271 kIntrinsified); 1272 locations->SetInAt(0, Location::NoLocation()); // Unused receiver. 1273 locations->SetInAt(1, Location::RequiresRegister()); 1274 locations->SetInAt(2, Location::RequiresRegister()); 1275 locations->SetOut(Location::RequiresRegister()); 1276} 1277 1278void IntrinsicLocationsBuilderX86_64::VisitUnsafeGet(HInvoke* invoke) { 1279 CreateIntIntIntToIntLocations(arena_, invoke); 1280} 1281void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetVolatile(HInvoke* invoke) { 1282 CreateIntIntIntToIntLocations(arena_, invoke); 1283} 1284void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetLong(HInvoke* invoke) { 1285 CreateIntIntIntToIntLocations(arena_, invoke); 1286} 1287void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetLongVolatile(HInvoke* invoke) { 1288 CreateIntIntIntToIntLocations(arena_, invoke); 1289} 1290void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetObject(HInvoke* invoke) { 1291 CreateIntIntIntToIntLocations(arena_, invoke); 1292} 1293void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetObjectVolatile(HInvoke* invoke) { 1294 CreateIntIntIntToIntLocations(arena_, invoke); 1295} 1296 1297 1298void IntrinsicCodeGeneratorX86_64::VisitUnsafeGet(HInvoke* invoke) { 1299 GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimInt, false, GetAssembler()); 1300} 1301void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetVolatile(HInvoke* invoke) { 1302 GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimInt, true, GetAssembler()); 1303} 1304void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetLong(HInvoke* invoke) { 1305 GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimLong, false, GetAssembler()); 1306} 1307void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetLongVolatile(HInvoke* invoke) { 1308 GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimLong, true, GetAssembler()); 1309} 1310void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetObject(HInvoke* invoke) { 1311 GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimNot, false, GetAssembler()); 1312} 1313void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetObjectVolatile(HInvoke* invoke) { 1314 GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimNot, true, GetAssembler()); 1315} 1316 1317 1318static void CreateIntIntIntIntToVoidPlusTempsLocations(ArenaAllocator* arena, 1319 Primitive::Type type, 1320 HInvoke* invoke) { 1321 LocationSummary* locations = new (arena) LocationSummary(invoke, 1322 LocationSummary::kNoCall, 1323 kIntrinsified); 1324 locations->SetInAt(0, Location::NoLocation()); // Unused receiver. 1325 locations->SetInAt(1, Location::RequiresRegister()); 1326 locations->SetInAt(2, Location::RequiresRegister()); 1327 locations->SetInAt(3, Location::RequiresRegister()); 1328 if (type == Primitive::kPrimNot) { 1329 // Need temp registers for card-marking. 1330 locations->AddTemp(Location::RequiresRegister()); 1331 locations->AddTemp(Location::RequiresRegister()); 1332 } 1333} 1334 1335void IntrinsicLocationsBuilderX86_64::VisitUnsafePut(HInvoke* invoke) { 1336 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimInt, invoke); 1337} 1338void IntrinsicLocationsBuilderX86_64::VisitUnsafePutOrdered(HInvoke* invoke) { 1339 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimInt, invoke); 1340} 1341void IntrinsicLocationsBuilderX86_64::VisitUnsafePutVolatile(HInvoke* invoke) { 1342 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimInt, invoke); 1343} 1344void IntrinsicLocationsBuilderX86_64::VisitUnsafePutObject(HInvoke* invoke) { 1345 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimNot, invoke); 1346} 1347void IntrinsicLocationsBuilderX86_64::VisitUnsafePutObjectOrdered(HInvoke* invoke) { 1348 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimNot, invoke); 1349} 1350void IntrinsicLocationsBuilderX86_64::VisitUnsafePutObjectVolatile(HInvoke* invoke) { 1351 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimNot, invoke); 1352} 1353void IntrinsicLocationsBuilderX86_64::VisitUnsafePutLong(HInvoke* invoke) { 1354 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimLong, invoke); 1355} 1356void IntrinsicLocationsBuilderX86_64::VisitUnsafePutLongOrdered(HInvoke* invoke) { 1357 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimLong, invoke); 1358} 1359void IntrinsicLocationsBuilderX86_64::VisitUnsafePutLongVolatile(HInvoke* invoke) { 1360 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimLong, invoke); 1361} 1362 1363// We don't care for ordered: it requires an AnyStore barrier, which is already given by the x86 1364// memory model. 1365static void GenUnsafePut(LocationSummary* locations, Primitive::Type type, bool is_volatile, 1366 CodeGeneratorX86_64* codegen) { 1367 X86_64Assembler* assembler = reinterpret_cast<X86_64Assembler*>(codegen->GetAssembler()); 1368 CpuRegister base = locations->InAt(1).AsRegister<CpuRegister>(); 1369 CpuRegister offset = locations->InAt(2).AsRegister<CpuRegister>(); 1370 CpuRegister value = locations->InAt(3).AsRegister<CpuRegister>(); 1371 1372 if (type == Primitive::kPrimLong) { 1373 __ movq(Address(base, offset, ScaleFactor::TIMES_1, 0), value); 1374 } else { 1375 __ movl(Address(base, offset, ScaleFactor::TIMES_1, 0), value); 1376 } 1377 1378 if (is_volatile) { 1379 __ mfence(); 1380 } 1381 1382 if (type == Primitive::kPrimNot) { 1383 codegen->MarkGCCard(locations->GetTemp(0).AsRegister<CpuRegister>(), 1384 locations->GetTemp(1).AsRegister<CpuRegister>(), 1385 base, 1386 value); 1387 } 1388} 1389 1390void IntrinsicCodeGeneratorX86_64::VisitUnsafePut(HInvoke* invoke) { 1391 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, false, codegen_); 1392} 1393void IntrinsicCodeGeneratorX86_64::VisitUnsafePutOrdered(HInvoke* invoke) { 1394 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, false, codegen_); 1395} 1396void IntrinsicCodeGeneratorX86_64::VisitUnsafePutVolatile(HInvoke* invoke) { 1397 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, true, codegen_); 1398} 1399void IntrinsicCodeGeneratorX86_64::VisitUnsafePutObject(HInvoke* invoke) { 1400 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, false, codegen_); 1401} 1402void IntrinsicCodeGeneratorX86_64::VisitUnsafePutObjectOrdered(HInvoke* invoke) { 1403 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, false, codegen_); 1404} 1405void IntrinsicCodeGeneratorX86_64::VisitUnsafePutObjectVolatile(HInvoke* invoke) { 1406 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, true, codegen_); 1407} 1408void IntrinsicCodeGeneratorX86_64::VisitUnsafePutLong(HInvoke* invoke) { 1409 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, false, codegen_); 1410} 1411void IntrinsicCodeGeneratorX86_64::VisitUnsafePutLongOrdered(HInvoke* invoke) { 1412 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, false, codegen_); 1413} 1414void IntrinsicCodeGeneratorX86_64::VisitUnsafePutLongVolatile(HInvoke* invoke) { 1415 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, true, codegen_); 1416} 1417 1418static void CreateIntIntIntIntIntToInt(ArenaAllocator* arena, Primitive::Type type, 1419 HInvoke* invoke) { 1420 LocationSummary* locations = new (arena) LocationSummary(invoke, 1421 LocationSummary::kNoCall, 1422 kIntrinsified); 1423 locations->SetInAt(0, Location::NoLocation()); // Unused receiver. 1424 locations->SetInAt(1, Location::RequiresRegister()); 1425 locations->SetInAt(2, Location::RequiresRegister()); 1426 // expected value must be in EAX/RAX. 1427 locations->SetInAt(3, Location::RegisterLocation(RAX)); 1428 locations->SetInAt(4, Location::RequiresRegister()); 1429 1430 locations->SetOut(Location::RequiresRegister()); 1431 if (type == Primitive::kPrimNot) { 1432 // Need temp registers for card-marking. 1433 locations->AddTemp(Location::RequiresRegister()); 1434 locations->AddTemp(Location::RequiresRegister()); 1435 } 1436} 1437 1438void IntrinsicLocationsBuilderX86_64::VisitUnsafeCASInt(HInvoke* invoke) { 1439 CreateIntIntIntIntIntToInt(arena_, Primitive::kPrimInt, invoke); 1440} 1441 1442void IntrinsicLocationsBuilderX86_64::VisitUnsafeCASLong(HInvoke* invoke) { 1443 CreateIntIntIntIntIntToInt(arena_, Primitive::kPrimLong, invoke); 1444} 1445 1446void IntrinsicLocationsBuilderX86_64::VisitUnsafeCASObject(HInvoke* invoke) { 1447 CreateIntIntIntIntIntToInt(arena_, Primitive::kPrimNot, invoke); 1448} 1449 1450static void GenCAS(Primitive::Type type, HInvoke* invoke, CodeGeneratorX86_64* codegen) { 1451 X86_64Assembler* assembler = 1452 reinterpret_cast<X86_64Assembler*>(codegen->GetAssembler()); 1453 LocationSummary* locations = invoke->GetLocations(); 1454 1455 CpuRegister base = locations->InAt(1).AsRegister<CpuRegister>(); 1456 CpuRegister offset = locations->InAt(2).AsRegister<CpuRegister>(); 1457 CpuRegister expected = locations->InAt(3).AsRegister<CpuRegister>(); 1458 DCHECK_EQ(expected.AsRegister(), RAX); 1459 CpuRegister value = locations->InAt(4).AsRegister<CpuRegister>(); 1460 CpuRegister out = locations->Out().AsRegister<CpuRegister>(); 1461 1462 if (type == Primitive::kPrimLong) { 1463 __ LockCmpxchgq(Address(base, offset, TIMES_1, 0), value); 1464 } else { 1465 // Integer or object. 1466 if (type == Primitive::kPrimNot) { 1467 // Mark card for object assuming new value is stored. 1468 codegen->MarkGCCard(locations->GetTemp(0).AsRegister<CpuRegister>(), 1469 locations->GetTemp(1).AsRegister<CpuRegister>(), 1470 base, 1471 value); 1472 } 1473 1474 __ LockCmpxchgl(Address(base, offset, TIMES_1, 0), value); 1475 } 1476 1477 // locked cmpxchg has full barrier semantics, and we don't need scheduling 1478 // barriers at this time. 1479 1480 // Convert ZF into the boolean result. 1481 __ setcc(kZero, out); 1482 __ movzxb(out, out); 1483} 1484 1485void IntrinsicCodeGeneratorX86_64::VisitUnsafeCASInt(HInvoke* invoke) { 1486 GenCAS(Primitive::kPrimInt, invoke, codegen_); 1487} 1488 1489void IntrinsicCodeGeneratorX86_64::VisitUnsafeCASLong(HInvoke* invoke) { 1490 GenCAS(Primitive::kPrimLong, invoke, codegen_); 1491} 1492 1493void IntrinsicCodeGeneratorX86_64::VisitUnsafeCASObject(HInvoke* invoke) { 1494 GenCAS(Primitive::kPrimNot, invoke, codegen_); 1495} 1496 1497void IntrinsicLocationsBuilderX86_64::VisitIntegerReverse(HInvoke* invoke) { 1498 LocationSummary* locations = new (arena_) LocationSummary(invoke, 1499 LocationSummary::kNoCall, 1500 kIntrinsified); 1501 locations->SetInAt(0, Location::RequiresRegister()); 1502 locations->SetOut(Location::SameAsFirstInput()); 1503 locations->AddTemp(Location::RequiresRegister()); 1504} 1505 1506static void SwapBits(CpuRegister reg, CpuRegister temp, int32_t shift, int32_t mask, 1507 X86_64Assembler* assembler) { 1508 Immediate imm_shift(shift); 1509 Immediate imm_mask(mask); 1510 __ movl(temp, reg); 1511 __ shrl(reg, imm_shift); 1512 __ andl(temp, imm_mask); 1513 __ andl(reg, imm_mask); 1514 __ shll(temp, imm_shift); 1515 __ orl(reg, temp); 1516} 1517 1518void IntrinsicCodeGeneratorX86_64::VisitIntegerReverse(HInvoke* invoke) { 1519 X86_64Assembler* assembler = 1520 reinterpret_cast<X86_64Assembler*>(codegen_->GetAssembler()); 1521 LocationSummary* locations = invoke->GetLocations(); 1522 1523 CpuRegister reg = locations->InAt(0).AsRegister<CpuRegister>(); 1524 CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>(); 1525 1526 /* 1527 * Use one bswap instruction to reverse byte order first and then use 3 rounds of 1528 * swapping bits to reverse bits in a number x. Using bswap to save instructions 1529 * compared to generic luni implementation which has 5 rounds of swapping bits. 1530 * x = bswap x 1531 * x = (x & 0x55555555) << 1 | (x >> 1) & 0x55555555; 1532 * x = (x & 0x33333333) << 2 | (x >> 2) & 0x33333333; 1533 * x = (x & 0x0F0F0F0F) << 4 | (x >> 4) & 0x0F0F0F0F; 1534 */ 1535 __ bswapl(reg); 1536 SwapBits(reg, temp, 1, 0x55555555, assembler); 1537 SwapBits(reg, temp, 2, 0x33333333, assembler); 1538 SwapBits(reg, temp, 4, 0x0f0f0f0f, assembler); 1539} 1540 1541void IntrinsicLocationsBuilderX86_64::VisitLongReverse(HInvoke* invoke) { 1542 LocationSummary* locations = new (arena_) LocationSummary(invoke, 1543 LocationSummary::kNoCall, 1544 kIntrinsified); 1545 locations->SetInAt(0, Location::RequiresRegister()); 1546 locations->SetOut(Location::SameAsFirstInput()); 1547 locations->AddTemp(Location::RequiresRegister()); 1548 locations->AddTemp(Location::RequiresRegister()); 1549} 1550 1551static void SwapBits64(CpuRegister reg, CpuRegister temp, CpuRegister temp_mask, 1552 int32_t shift, int64_t mask, X86_64Assembler* assembler) { 1553 Immediate imm_shift(shift); 1554 __ movq(temp_mask, Immediate(mask)); 1555 __ movq(temp, reg); 1556 __ shrq(reg, imm_shift); 1557 __ andq(temp, temp_mask); 1558 __ andq(reg, temp_mask); 1559 __ shlq(temp, imm_shift); 1560 __ orq(reg, temp); 1561} 1562 1563void IntrinsicCodeGeneratorX86_64::VisitLongReverse(HInvoke* invoke) { 1564 X86_64Assembler* assembler = 1565 reinterpret_cast<X86_64Assembler*>(codegen_->GetAssembler()); 1566 LocationSummary* locations = invoke->GetLocations(); 1567 1568 CpuRegister reg = locations->InAt(0).AsRegister<CpuRegister>(); 1569 CpuRegister temp1 = locations->GetTemp(0).AsRegister<CpuRegister>(); 1570 CpuRegister temp2 = locations->GetTemp(1).AsRegister<CpuRegister>(); 1571 1572 /* 1573 * Use one bswap instruction to reverse byte order first and then use 3 rounds of 1574 * swapping bits to reverse bits in a long number x. Using bswap to save instructions 1575 * compared to generic luni implementation which has 5 rounds of swapping bits. 1576 * x = bswap x 1577 * x = (x & 0x5555555555555555) << 1 | (x >> 1) & 0x5555555555555555; 1578 * x = (x & 0x3333333333333333) << 2 | (x >> 2) & 0x3333333333333333; 1579 * x = (x & 0x0F0F0F0F0F0F0F0F) << 4 | (x >> 4) & 0x0F0F0F0F0F0F0F0F; 1580 */ 1581 __ bswapq(reg); 1582 SwapBits64(reg, temp1, temp2, 1, INT64_C(0x5555555555555555), assembler); 1583 SwapBits64(reg, temp1, temp2, 2, INT64_C(0x3333333333333333), assembler); 1584 SwapBits64(reg, temp1, temp2, 4, INT64_C(0x0f0f0f0f0f0f0f0f), assembler); 1585} 1586 1587// Unimplemented intrinsics. 1588 1589#define UNIMPLEMENTED_INTRINSIC(Name) \ 1590void IntrinsicLocationsBuilderX86_64::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) { \ 1591} \ 1592void IntrinsicCodeGeneratorX86_64::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) { \ 1593} 1594 1595UNIMPLEMENTED_INTRINSIC(StringGetCharsNoCheck) 1596UNIMPLEMENTED_INTRINSIC(SystemArrayCopyChar) 1597UNIMPLEMENTED_INTRINSIC(ReferenceGetReferent) 1598 1599} // namespace x86_64 1600} // namespace art 1601