1/* 2 * Copyright (C) 2015 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17#include "intrinsics_x86_64.h" 18 19#include <limits> 20 21#include "arch/x86_64/instruction_set_features_x86_64.h" 22#include "art_method-inl.h" 23#include "base/bit_utils.h" 24#include "code_generator_x86_64.h" 25#include "entrypoints/quick/quick_entrypoints.h" 26#include "intrinsics.h" 27#include "intrinsics_utils.h" 28#include "mirror/array-inl.h" 29#include "mirror/string.h" 30#include "thread.h" 31#include "utils/x86_64/assembler_x86_64.h" 32#include "utils/x86_64/constants_x86_64.h" 33 34namespace art { 35 36namespace x86_64 { 37 38IntrinsicLocationsBuilderX86_64::IntrinsicLocationsBuilderX86_64(CodeGeneratorX86_64* codegen) 39 : arena_(codegen->GetGraph()->GetArena()), codegen_(codegen) { 40} 41 42 43X86_64Assembler* IntrinsicCodeGeneratorX86_64::GetAssembler() { 44 return down_cast<X86_64Assembler*>(codegen_->GetAssembler()); 45} 46 47ArenaAllocator* IntrinsicCodeGeneratorX86_64::GetAllocator() { 48 return codegen_->GetGraph()->GetArena(); 49} 50 51bool IntrinsicLocationsBuilderX86_64::TryDispatch(HInvoke* invoke) { 52 Dispatch(invoke); 53 LocationSummary* res = invoke->GetLocations(); 54 if (res == nullptr) { 55 return false; 56 } 57 if (kEmitCompilerReadBarrier && res->CanCall()) { 58 // Generating an intrinsic for this HInvoke may produce an 59 // IntrinsicSlowPathX86_64 slow path. Currently this approach 60 // does not work when using read barriers, as the emitted 61 // calling sequence will make use of another slow path 62 // (ReadBarrierForRootSlowPathX86_64 for HInvokeStaticOrDirect, 63 // ReadBarrierSlowPathX86_64 for HInvokeVirtual). So we bail 64 // out in this case. 65 // 66 // TODO: Find a way to have intrinsics work with read barriers. 67 invoke->SetLocations(nullptr); 68 return false; 69 } 70 return res->Intrinsified(); 71} 72 73static void MoveArguments(HInvoke* invoke, CodeGeneratorX86_64* codegen) { 74 InvokeDexCallingConventionVisitorX86_64 calling_convention_visitor; 75 IntrinsicVisitor::MoveArguments(invoke, codegen, &calling_convention_visitor); 76} 77 78using IntrinsicSlowPathX86_64 = IntrinsicSlowPath<InvokeDexCallingConventionVisitorX86_64>; 79 80#define __ assembler-> 81 82static void CreateFPToIntLocations(ArenaAllocator* arena, HInvoke* invoke) { 83 LocationSummary* locations = new (arena) LocationSummary(invoke, 84 LocationSummary::kNoCall, 85 kIntrinsified); 86 locations->SetInAt(0, Location::RequiresFpuRegister()); 87 locations->SetOut(Location::RequiresRegister()); 88} 89 90static void CreateIntToFPLocations(ArenaAllocator* arena, HInvoke* invoke) { 91 LocationSummary* locations = new (arena) LocationSummary(invoke, 92 LocationSummary::kNoCall, 93 kIntrinsified); 94 locations->SetInAt(0, Location::RequiresRegister()); 95 locations->SetOut(Location::RequiresFpuRegister()); 96} 97 98static void MoveFPToInt(LocationSummary* locations, bool is64bit, X86_64Assembler* assembler) { 99 Location input = locations->InAt(0); 100 Location output = locations->Out(); 101 __ movd(output.AsRegister<CpuRegister>(), input.AsFpuRegister<XmmRegister>(), is64bit); 102} 103 104static void MoveIntToFP(LocationSummary* locations, bool is64bit, X86_64Assembler* assembler) { 105 Location input = locations->InAt(0); 106 Location output = locations->Out(); 107 __ movd(output.AsFpuRegister<XmmRegister>(), input.AsRegister<CpuRegister>(), is64bit); 108} 109 110void IntrinsicLocationsBuilderX86_64::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) { 111 CreateFPToIntLocations(arena_, invoke); 112} 113void IntrinsicLocationsBuilderX86_64::VisitDoubleLongBitsToDouble(HInvoke* invoke) { 114 CreateIntToFPLocations(arena_, invoke); 115} 116 117void IntrinsicCodeGeneratorX86_64::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) { 118 MoveFPToInt(invoke->GetLocations(), /* is64bit */ true, GetAssembler()); 119} 120void IntrinsicCodeGeneratorX86_64::VisitDoubleLongBitsToDouble(HInvoke* invoke) { 121 MoveIntToFP(invoke->GetLocations(), /* is64bit */ true, GetAssembler()); 122} 123 124void IntrinsicLocationsBuilderX86_64::VisitFloatFloatToRawIntBits(HInvoke* invoke) { 125 CreateFPToIntLocations(arena_, invoke); 126} 127void IntrinsicLocationsBuilderX86_64::VisitFloatIntBitsToFloat(HInvoke* invoke) { 128 CreateIntToFPLocations(arena_, invoke); 129} 130 131void IntrinsicCodeGeneratorX86_64::VisitFloatFloatToRawIntBits(HInvoke* invoke) { 132 MoveFPToInt(invoke->GetLocations(), /* is64bit */ false, GetAssembler()); 133} 134void IntrinsicCodeGeneratorX86_64::VisitFloatIntBitsToFloat(HInvoke* invoke) { 135 MoveIntToFP(invoke->GetLocations(), /* is64bit */ false, GetAssembler()); 136} 137 138static void CreateIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) { 139 LocationSummary* locations = new (arena) LocationSummary(invoke, 140 LocationSummary::kNoCall, 141 kIntrinsified); 142 locations->SetInAt(0, Location::RequiresRegister()); 143 locations->SetOut(Location::SameAsFirstInput()); 144} 145 146static void GenReverseBytes(LocationSummary* locations, 147 Primitive::Type size, 148 X86_64Assembler* assembler) { 149 CpuRegister out = locations->Out().AsRegister<CpuRegister>(); 150 151 switch (size) { 152 case Primitive::kPrimShort: 153 // TODO: Can be done with an xchg of 8b registers. This is straight from Quick. 154 __ bswapl(out); 155 __ sarl(out, Immediate(16)); 156 break; 157 case Primitive::kPrimInt: 158 __ bswapl(out); 159 break; 160 case Primitive::kPrimLong: 161 __ bswapq(out); 162 break; 163 default: 164 LOG(FATAL) << "Unexpected size for reverse-bytes: " << size; 165 UNREACHABLE(); 166 } 167} 168 169void IntrinsicLocationsBuilderX86_64::VisitIntegerReverseBytes(HInvoke* invoke) { 170 CreateIntToIntLocations(arena_, invoke); 171} 172 173void IntrinsicCodeGeneratorX86_64::VisitIntegerReverseBytes(HInvoke* invoke) { 174 GenReverseBytes(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler()); 175} 176 177void IntrinsicLocationsBuilderX86_64::VisitLongReverseBytes(HInvoke* invoke) { 178 CreateIntToIntLocations(arena_, invoke); 179} 180 181void IntrinsicCodeGeneratorX86_64::VisitLongReverseBytes(HInvoke* invoke) { 182 GenReverseBytes(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler()); 183} 184 185void IntrinsicLocationsBuilderX86_64::VisitShortReverseBytes(HInvoke* invoke) { 186 CreateIntToIntLocations(arena_, invoke); 187} 188 189void IntrinsicCodeGeneratorX86_64::VisitShortReverseBytes(HInvoke* invoke) { 190 GenReverseBytes(invoke->GetLocations(), Primitive::kPrimShort, GetAssembler()); 191} 192 193 194// TODO: Consider Quick's way of doing Double abs through integer operations, as the immediate we 195// need is 64b. 196 197static void CreateFloatToFloatPlusTemps(ArenaAllocator* arena, HInvoke* invoke) { 198 // TODO: Enable memory operations when the assembler supports them. 199 LocationSummary* locations = new (arena) LocationSummary(invoke, 200 LocationSummary::kNoCall, 201 kIntrinsified); 202 locations->SetInAt(0, Location::RequiresFpuRegister()); 203 locations->SetOut(Location::SameAsFirstInput()); 204 locations->AddTemp(Location::RequiresFpuRegister()); // FP reg to hold mask. 205} 206 207static void MathAbsFP(LocationSummary* locations, 208 bool is64bit, 209 X86_64Assembler* assembler, 210 CodeGeneratorX86_64* codegen) { 211 Location output = locations->Out(); 212 213 DCHECK(output.IsFpuRegister()); 214 XmmRegister xmm_temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); 215 216 // TODO: Can mask directly with constant area using pand if we can guarantee 217 // that the literal is aligned on a 16 byte boundary. This will avoid a 218 // temporary. 219 if (is64bit) { 220 __ movsd(xmm_temp, codegen->LiteralInt64Address(INT64_C(0x7FFFFFFFFFFFFFFF))); 221 __ andpd(output.AsFpuRegister<XmmRegister>(), xmm_temp); 222 } else { 223 __ movss(xmm_temp, codegen->LiteralInt32Address(INT32_C(0x7FFFFFFF))); 224 __ andps(output.AsFpuRegister<XmmRegister>(), xmm_temp); 225 } 226} 227 228void IntrinsicLocationsBuilderX86_64::VisitMathAbsDouble(HInvoke* invoke) { 229 CreateFloatToFloatPlusTemps(arena_, invoke); 230} 231 232void IntrinsicCodeGeneratorX86_64::VisitMathAbsDouble(HInvoke* invoke) { 233 MathAbsFP(invoke->GetLocations(), /* is64bit */ true, GetAssembler(), codegen_); 234} 235 236void IntrinsicLocationsBuilderX86_64::VisitMathAbsFloat(HInvoke* invoke) { 237 CreateFloatToFloatPlusTemps(arena_, invoke); 238} 239 240void IntrinsicCodeGeneratorX86_64::VisitMathAbsFloat(HInvoke* invoke) { 241 MathAbsFP(invoke->GetLocations(), /* is64bit */ false, GetAssembler(), codegen_); 242} 243 244static void CreateIntToIntPlusTemp(ArenaAllocator* arena, HInvoke* invoke) { 245 LocationSummary* locations = new (arena) LocationSummary(invoke, 246 LocationSummary::kNoCall, 247 kIntrinsified); 248 locations->SetInAt(0, Location::RequiresRegister()); 249 locations->SetOut(Location::SameAsFirstInput()); 250 locations->AddTemp(Location::RequiresRegister()); 251} 252 253static void GenAbsInteger(LocationSummary* locations, bool is64bit, X86_64Assembler* assembler) { 254 Location output = locations->Out(); 255 CpuRegister out = output.AsRegister<CpuRegister>(); 256 CpuRegister mask = locations->GetTemp(0).AsRegister<CpuRegister>(); 257 258 if (is64bit) { 259 // Create mask. 260 __ movq(mask, out); 261 __ sarq(mask, Immediate(63)); 262 // Add mask. 263 __ addq(out, mask); 264 __ xorq(out, mask); 265 } else { 266 // Create mask. 267 __ movl(mask, out); 268 __ sarl(mask, Immediate(31)); 269 // Add mask. 270 __ addl(out, mask); 271 __ xorl(out, mask); 272 } 273} 274 275void IntrinsicLocationsBuilderX86_64::VisitMathAbsInt(HInvoke* invoke) { 276 CreateIntToIntPlusTemp(arena_, invoke); 277} 278 279void IntrinsicCodeGeneratorX86_64::VisitMathAbsInt(HInvoke* invoke) { 280 GenAbsInteger(invoke->GetLocations(), /* is64bit */ false, GetAssembler()); 281} 282 283void IntrinsicLocationsBuilderX86_64::VisitMathAbsLong(HInvoke* invoke) { 284 CreateIntToIntPlusTemp(arena_, invoke); 285} 286 287void IntrinsicCodeGeneratorX86_64::VisitMathAbsLong(HInvoke* invoke) { 288 GenAbsInteger(invoke->GetLocations(), /* is64bit */ true, GetAssembler()); 289} 290 291static void GenMinMaxFP(LocationSummary* locations, 292 bool is_min, 293 bool is_double, 294 X86_64Assembler* assembler, 295 CodeGeneratorX86_64* codegen) { 296 Location op1_loc = locations->InAt(0); 297 Location op2_loc = locations->InAt(1); 298 Location out_loc = locations->Out(); 299 XmmRegister out = out_loc.AsFpuRegister<XmmRegister>(); 300 301 // Shortcut for same input locations. 302 if (op1_loc.Equals(op2_loc)) { 303 DCHECK(out_loc.Equals(op1_loc)); 304 return; 305 } 306 307 // (out := op1) 308 // out <=? op2 309 // if Nan jmp Nan_label 310 // if out is min jmp done 311 // if op2 is min jmp op2_label 312 // handle -0/+0 313 // jmp done 314 // Nan_label: 315 // out := NaN 316 // op2_label: 317 // out := op2 318 // done: 319 // 320 // This removes one jmp, but needs to copy one input (op1) to out. 321 // 322 // TODO: This is straight from Quick. Make NaN an out-of-line slowpath? 323 324 XmmRegister op2 = op2_loc.AsFpuRegister<XmmRegister>(); 325 326 NearLabel nan, done, op2_label; 327 if (is_double) { 328 __ ucomisd(out, op2); 329 } else { 330 __ ucomiss(out, op2); 331 } 332 333 __ j(Condition::kParityEven, &nan); 334 335 __ j(is_min ? Condition::kAbove : Condition::kBelow, &op2_label); 336 __ j(is_min ? Condition::kBelow : Condition::kAbove, &done); 337 338 // Handle 0.0/-0.0. 339 if (is_min) { 340 if (is_double) { 341 __ orpd(out, op2); 342 } else { 343 __ orps(out, op2); 344 } 345 } else { 346 if (is_double) { 347 __ andpd(out, op2); 348 } else { 349 __ andps(out, op2); 350 } 351 } 352 __ jmp(&done); 353 354 // NaN handling. 355 __ Bind(&nan); 356 if (is_double) { 357 __ movsd(out, codegen->LiteralInt64Address(INT64_C(0x7FF8000000000000))); 358 } else { 359 __ movss(out, codegen->LiteralInt32Address(INT32_C(0x7FC00000))); 360 } 361 __ jmp(&done); 362 363 // out := op2; 364 __ Bind(&op2_label); 365 if (is_double) { 366 __ movsd(out, op2); 367 } else { 368 __ movss(out, op2); 369 } 370 371 // Done. 372 __ Bind(&done); 373} 374 375static void CreateFPFPToFP(ArenaAllocator* arena, HInvoke* invoke) { 376 LocationSummary* locations = new (arena) LocationSummary(invoke, 377 LocationSummary::kNoCall, 378 kIntrinsified); 379 locations->SetInAt(0, Location::RequiresFpuRegister()); 380 locations->SetInAt(1, Location::RequiresFpuRegister()); 381 // The following is sub-optimal, but all we can do for now. It would be fine to also accept 382 // the second input to be the output (we can simply swap inputs). 383 locations->SetOut(Location::SameAsFirstInput()); 384} 385 386void IntrinsicLocationsBuilderX86_64::VisitMathMinDoubleDouble(HInvoke* invoke) { 387 CreateFPFPToFP(arena_, invoke); 388} 389 390void IntrinsicCodeGeneratorX86_64::VisitMathMinDoubleDouble(HInvoke* invoke) { 391 GenMinMaxFP( 392 invoke->GetLocations(), /* is_min */ true, /* is_double */ true, GetAssembler(), codegen_); 393} 394 395void IntrinsicLocationsBuilderX86_64::VisitMathMinFloatFloat(HInvoke* invoke) { 396 CreateFPFPToFP(arena_, invoke); 397} 398 399void IntrinsicCodeGeneratorX86_64::VisitMathMinFloatFloat(HInvoke* invoke) { 400 GenMinMaxFP( 401 invoke->GetLocations(), /* is_min */ true, /* is_double */ false, GetAssembler(), codegen_); 402} 403 404void IntrinsicLocationsBuilderX86_64::VisitMathMaxDoubleDouble(HInvoke* invoke) { 405 CreateFPFPToFP(arena_, invoke); 406} 407 408void IntrinsicCodeGeneratorX86_64::VisitMathMaxDoubleDouble(HInvoke* invoke) { 409 GenMinMaxFP( 410 invoke->GetLocations(), /* is_min */ false, /* is_double */ true, GetAssembler(), codegen_); 411} 412 413void IntrinsicLocationsBuilderX86_64::VisitMathMaxFloatFloat(HInvoke* invoke) { 414 CreateFPFPToFP(arena_, invoke); 415} 416 417void IntrinsicCodeGeneratorX86_64::VisitMathMaxFloatFloat(HInvoke* invoke) { 418 GenMinMaxFP( 419 invoke->GetLocations(), /* is_min */ false, /* is_double */ false, GetAssembler(), codegen_); 420} 421 422static void GenMinMax(LocationSummary* locations, bool is_min, bool is_long, 423 X86_64Assembler* assembler) { 424 Location op1_loc = locations->InAt(0); 425 Location op2_loc = locations->InAt(1); 426 427 // Shortcut for same input locations. 428 if (op1_loc.Equals(op2_loc)) { 429 // Can return immediately, as op1_loc == out_loc. 430 // Note: if we ever support separate registers, e.g., output into memory, we need to check for 431 // a copy here. 432 DCHECK(locations->Out().Equals(op1_loc)); 433 return; 434 } 435 436 CpuRegister out = locations->Out().AsRegister<CpuRegister>(); 437 CpuRegister op2 = op2_loc.AsRegister<CpuRegister>(); 438 439 // (out := op1) 440 // out <=? op2 441 // if out is min jmp done 442 // out := op2 443 // done: 444 445 if (is_long) { 446 __ cmpq(out, op2); 447 } else { 448 __ cmpl(out, op2); 449 } 450 451 __ cmov(is_min ? Condition::kGreater : Condition::kLess, out, op2, is_long); 452} 453 454static void CreateIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) { 455 LocationSummary* locations = new (arena) LocationSummary(invoke, 456 LocationSummary::kNoCall, 457 kIntrinsified); 458 locations->SetInAt(0, Location::RequiresRegister()); 459 locations->SetInAt(1, Location::RequiresRegister()); 460 locations->SetOut(Location::SameAsFirstInput()); 461} 462 463void IntrinsicLocationsBuilderX86_64::VisitMathMinIntInt(HInvoke* invoke) { 464 CreateIntIntToIntLocations(arena_, invoke); 465} 466 467void IntrinsicCodeGeneratorX86_64::VisitMathMinIntInt(HInvoke* invoke) { 468 GenMinMax(invoke->GetLocations(), /* is_min */ true, /* is_long */ false, GetAssembler()); 469} 470 471void IntrinsicLocationsBuilderX86_64::VisitMathMinLongLong(HInvoke* invoke) { 472 CreateIntIntToIntLocations(arena_, invoke); 473} 474 475void IntrinsicCodeGeneratorX86_64::VisitMathMinLongLong(HInvoke* invoke) { 476 GenMinMax(invoke->GetLocations(), /* is_min */ true, /* is_long */ true, GetAssembler()); 477} 478 479void IntrinsicLocationsBuilderX86_64::VisitMathMaxIntInt(HInvoke* invoke) { 480 CreateIntIntToIntLocations(arena_, invoke); 481} 482 483void IntrinsicCodeGeneratorX86_64::VisitMathMaxIntInt(HInvoke* invoke) { 484 GenMinMax(invoke->GetLocations(), /* is_min */ false, /* is_long */ false, GetAssembler()); 485} 486 487void IntrinsicLocationsBuilderX86_64::VisitMathMaxLongLong(HInvoke* invoke) { 488 CreateIntIntToIntLocations(arena_, invoke); 489} 490 491void IntrinsicCodeGeneratorX86_64::VisitMathMaxLongLong(HInvoke* invoke) { 492 GenMinMax(invoke->GetLocations(), /* is_min */ false, /* is_long */ true, GetAssembler()); 493} 494 495static void CreateFPToFPLocations(ArenaAllocator* arena, HInvoke* invoke) { 496 LocationSummary* locations = new (arena) LocationSummary(invoke, 497 LocationSummary::kNoCall, 498 kIntrinsified); 499 locations->SetInAt(0, Location::RequiresFpuRegister()); 500 locations->SetOut(Location::RequiresFpuRegister()); 501} 502 503void IntrinsicLocationsBuilderX86_64::VisitMathSqrt(HInvoke* invoke) { 504 CreateFPToFPLocations(arena_, invoke); 505} 506 507void IntrinsicCodeGeneratorX86_64::VisitMathSqrt(HInvoke* invoke) { 508 LocationSummary* locations = invoke->GetLocations(); 509 XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>(); 510 XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>(); 511 512 GetAssembler()->sqrtsd(out, in); 513} 514 515static void InvokeOutOfLineIntrinsic(CodeGeneratorX86_64* codegen, HInvoke* invoke) { 516 MoveArguments(invoke, codegen); 517 518 DCHECK(invoke->IsInvokeStaticOrDirect()); 519 codegen->GenerateStaticOrDirectCall( 520 invoke->AsInvokeStaticOrDirect(), Location::RegisterLocation(RDI)); 521 codegen->RecordPcInfo(invoke, invoke->GetDexPc()); 522 523 // Copy the result back to the expected output. 524 Location out = invoke->GetLocations()->Out(); 525 if (out.IsValid()) { 526 DCHECK(out.IsRegister()); 527 codegen->MoveFromReturnRegister(out, invoke->GetType()); 528 } 529} 530 531static void CreateSSE41FPToFPLocations(ArenaAllocator* arena, 532 HInvoke* invoke, 533 CodeGeneratorX86_64* codegen) { 534 // Do we have instruction support? 535 if (codegen->GetInstructionSetFeatures().HasSSE4_1()) { 536 CreateFPToFPLocations(arena, invoke); 537 return; 538 } 539 540 // We have to fall back to a call to the intrinsic. 541 LocationSummary* locations = new (arena) LocationSummary(invoke, 542 LocationSummary::kCall); 543 InvokeRuntimeCallingConvention calling_convention; 544 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetFpuRegisterAt(0))); 545 locations->SetOut(Location::FpuRegisterLocation(XMM0)); 546 // Needs to be RDI for the invoke. 547 locations->AddTemp(Location::RegisterLocation(RDI)); 548} 549 550static void GenSSE41FPToFPIntrinsic(CodeGeneratorX86_64* codegen, 551 HInvoke* invoke, 552 X86_64Assembler* assembler, 553 int round_mode) { 554 LocationSummary* locations = invoke->GetLocations(); 555 if (locations->WillCall()) { 556 InvokeOutOfLineIntrinsic(codegen, invoke); 557 } else { 558 XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>(); 559 XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>(); 560 __ roundsd(out, in, Immediate(round_mode)); 561 } 562} 563 564void IntrinsicLocationsBuilderX86_64::VisitMathCeil(HInvoke* invoke) { 565 CreateSSE41FPToFPLocations(arena_, invoke, codegen_); 566} 567 568void IntrinsicCodeGeneratorX86_64::VisitMathCeil(HInvoke* invoke) { 569 GenSSE41FPToFPIntrinsic(codegen_, invoke, GetAssembler(), 2); 570} 571 572void IntrinsicLocationsBuilderX86_64::VisitMathFloor(HInvoke* invoke) { 573 CreateSSE41FPToFPLocations(arena_, invoke, codegen_); 574} 575 576void IntrinsicCodeGeneratorX86_64::VisitMathFloor(HInvoke* invoke) { 577 GenSSE41FPToFPIntrinsic(codegen_, invoke, GetAssembler(), 1); 578} 579 580void IntrinsicLocationsBuilderX86_64::VisitMathRint(HInvoke* invoke) { 581 CreateSSE41FPToFPLocations(arena_, invoke, codegen_); 582} 583 584void IntrinsicCodeGeneratorX86_64::VisitMathRint(HInvoke* invoke) { 585 GenSSE41FPToFPIntrinsic(codegen_, invoke, GetAssembler(), 0); 586} 587 588static void CreateSSE41FPToIntLocations(ArenaAllocator* arena, 589 HInvoke* invoke, 590 CodeGeneratorX86_64* codegen) { 591 // Do we have instruction support? 592 if (codegen->GetInstructionSetFeatures().HasSSE4_1()) { 593 LocationSummary* locations = new (arena) LocationSummary(invoke, 594 LocationSummary::kNoCall, 595 kIntrinsified); 596 locations->SetInAt(0, Location::RequiresFpuRegister()); 597 locations->SetOut(Location::RequiresRegister()); 598 locations->AddTemp(Location::RequiresFpuRegister()); 599 return; 600 } 601 602 // We have to fall back to a call to the intrinsic. 603 LocationSummary* locations = new (arena) LocationSummary(invoke, 604 LocationSummary::kCall); 605 InvokeRuntimeCallingConvention calling_convention; 606 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetFpuRegisterAt(0))); 607 locations->SetOut(Location::RegisterLocation(RAX)); 608 // Needs to be RDI for the invoke. 609 locations->AddTemp(Location::RegisterLocation(RDI)); 610} 611 612void IntrinsicLocationsBuilderX86_64::VisitMathRoundFloat(HInvoke* invoke) { 613 // See intrinsics.h. 614 if (kRoundIsPlusPointFive) { 615 CreateSSE41FPToIntLocations(arena_, invoke, codegen_); 616 } 617} 618 619void IntrinsicCodeGeneratorX86_64::VisitMathRoundFloat(HInvoke* invoke) { 620 LocationSummary* locations = invoke->GetLocations(); 621 if (locations->WillCall()) { 622 InvokeOutOfLineIntrinsic(codegen_, invoke); 623 return; 624 } 625 626 // Implement RoundFloat as t1 = floor(input + 0.5f); convert to int. 627 XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>(); 628 CpuRegister out = locations->Out().AsRegister<CpuRegister>(); 629 XmmRegister inPlusPointFive = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); 630 NearLabel done, nan; 631 X86_64Assembler* assembler = GetAssembler(); 632 633 // Load 0.5 into inPlusPointFive. 634 __ movss(inPlusPointFive, codegen_->LiteralFloatAddress(0.5f)); 635 636 // Add in the input. 637 __ addss(inPlusPointFive, in); 638 639 // And truncate to an integer. 640 __ roundss(inPlusPointFive, inPlusPointFive, Immediate(1)); 641 642 // Load maxInt into out. 643 codegen_->Load64BitValue(out, kPrimIntMax); 644 645 // if inPlusPointFive >= maxInt goto done 646 __ comiss(inPlusPointFive, codegen_->LiteralFloatAddress(static_cast<float>(kPrimIntMax))); 647 __ j(kAboveEqual, &done); 648 649 // if input == NaN goto nan 650 __ j(kUnordered, &nan); 651 652 // output = float-to-int-truncate(input) 653 __ cvttss2si(out, inPlusPointFive); 654 __ jmp(&done); 655 __ Bind(&nan); 656 657 // output = 0 658 __ xorl(out, out); 659 __ Bind(&done); 660} 661 662void IntrinsicLocationsBuilderX86_64::VisitMathRoundDouble(HInvoke* invoke) { 663 // See intrinsics.h. 664 if (kRoundIsPlusPointFive) { 665 CreateSSE41FPToIntLocations(arena_, invoke, codegen_); 666 } 667} 668 669void IntrinsicCodeGeneratorX86_64::VisitMathRoundDouble(HInvoke* invoke) { 670 LocationSummary* locations = invoke->GetLocations(); 671 if (locations->WillCall()) { 672 InvokeOutOfLineIntrinsic(codegen_, invoke); 673 return; 674 } 675 676 // Implement RoundDouble as t1 = floor(input + 0.5); convert to long. 677 XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>(); 678 CpuRegister out = locations->Out().AsRegister<CpuRegister>(); 679 XmmRegister inPlusPointFive = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); 680 NearLabel done, nan; 681 X86_64Assembler* assembler = GetAssembler(); 682 683 // Load 0.5 into inPlusPointFive. 684 __ movsd(inPlusPointFive, codegen_->LiteralDoubleAddress(0.5)); 685 686 // Add in the input. 687 __ addsd(inPlusPointFive, in); 688 689 // And truncate to an integer. 690 __ roundsd(inPlusPointFive, inPlusPointFive, Immediate(1)); 691 692 // Load maxLong into out. 693 codegen_->Load64BitValue(out, kPrimLongMax); 694 695 // if inPlusPointFive >= maxLong goto done 696 __ comisd(inPlusPointFive, codegen_->LiteralDoubleAddress(static_cast<double>(kPrimLongMax))); 697 __ j(kAboveEqual, &done); 698 699 // if input == NaN goto nan 700 __ j(kUnordered, &nan); 701 702 // output = double-to-long-truncate(input) 703 __ cvttsd2si(out, inPlusPointFive, /* is64bit */ true); 704 __ jmp(&done); 705 __ Bind(&nan); 706 707 // output = 0 708 __ xorl(out, out); 709 __ Bind(&done); 710} 711 712static void CreateFPToFPCallLocations(ArenaAllocator* arena, 713 HInvoke* invoke) { 714 LocationSummary* locations = new (arena) LocationSummary(invoke, 715 LocationSummary::kCall, 716 kIntrinsified); 717 InvokeRuntimeCallingConvention calling_convention; 718 locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0))); 719 locations->SetOut(Location::FpuRegisterLocation(XMM0)); 720 721 // We have to ensure that the native code doesn't clobber the XMM registers which are 722 // non-volatile for ART, but volatile for Native calls. This will ensure that they are 723 // saved in the prologue and properly restored. 724 for (auto fp_reg : non_volatile_xmm_regs) { 725 locations->AddTemp(Location::FpuRegisterLocation(fp_reg)); 726 } 727} 728 729static void GenFPToFPCall(HInvoke* invoke, CodeGeneratorX86_64* codegen, 730 QuickEntrypointEnum entry) { 731 LocationSummary* locations = invoke->GetLocations(); 732 DCHECK(locations->WillCall()); 733 DCHECK(invoke->IsInvokeStaticOrDirect()); 734 X86_64Assembler* assembler = codegen->GetAssembler(); 735 736 __ gs()->call(Address::Absolute(GetThreadOffset<kX86_64WordSize>(entry), true)); 737 codegen->RecordPcInfo(invoke, invoke->GetDexPc()); 738} 739 740void IntrinsicLocationsBuilderX86_64::VisitMathCos(HInvoke* invoke) { 741 CreateFPToFPCallLocations(arena_, invoke); 742} 743 744void IntrinsicCodeGeneratorX86_64::VisitMathCos(HInvoke* invoke) { 745 GenFPToFPCall(invoke, codegen_, kQuickCos); 746} 747 748void IntrinsicLocationsBuilderX86_64::VisitMathSin(HInvoke* invoke) { 749 CreateFPToFPCallLocations(arena_, invoke); 750} 751 752void IntrinsicCodeGeneratorX86_64::VisitMathSin(HInvoke* invoke) { 753 GenFPToFPCall(invoke, codegen_, kQuickSin); 754} 755 756void IntrinsicLocationsBuilderX86_64::VisitMathAcos(HInvoke* invoke) { 757 CreateFPToFPCallLocations(arena_, invoke); 758} 759 760void IntrinsicCodeGeneratorX86_64::VisitMathAcos(HInvoke* invoke) { 761 GenFPToFPCall(invoke, codegen_, kQuickAcos); 762} 763 764void IntrinsicLocationsBuilderX86_64::VisitMathAsin(HInvoke* invoke) { 765 CreateFPToFPCallLocations(arena_, invoke); 766} 767 768void IntrinsicCodeGeneratorX86_64::VisitMathAsin(HInvoke* invoke) { 769 GenFPToFPCall(invoke, codegen_, kQuickAsin); 770} 771 772void IntrinsicLocationsBuilderX86_64::VisitMathAtan(HInvoke* invoke) { 773 CreateFPToFPCallLocations(arena_, invoke); 774} 775 776void IntrinsicCodeGeneratorX86_64::VisitMathAtan(HInvoke* invoke) { 777 GenFPToFPCall(invoke, codegen_, kQuickAtan); 778} 779 780void IntrinsicLocationsBuilderX86_64::VisitMathCbrt(HInvoke* invoke) { 781 CreateFPToFPCallLocations(arena_, invoke); 782} 783 784void IntrinsicCodeGeneratorX86_64::VisitMathCbrt(HInvoke* invoke) { 785 GenFPToFPCall(invoke, codegen_, kQuickCbrt); 786} 787 788void IntrinsicLocationsBuilderX86_64::VisitMathCosh(HInvoke* invoke) { 789 CreateFPToFPCallLocations(arena_, invoke); 790} 791 792void IntrinsicCodeGeneratorX86_64::VisitMathCosh(HInvoke* invoke) { 793 GenFPToFPCall(invoke, codegen_, kQuickCosh); 794} 795 796void IntrinsicLocationsBuilderX86_64::VisitMathExp(HInvoke* invoke) { 797 CreateFPToFPCallLocations(arena_, invoke); 798} 799 800void IntrinsicCodeGeneratorX86_64::VisitMathExp(HInvoke* invoke) { 801 GenFPToFPCall(invoke, codegen_, kQuickExp); 802} 803 804void IntrinsicLocationsBuilderX86_64::VisitMathExpm1(HInvoke* invoke) { 805 CreateFPToFPCallLocations(arena_, invoke); 806} 807 808void IntrinsicCodeGeneratorX86_64::VisitMathExpm1(HInvoke* invoke) { 809 GenFPToFPCall(invoke, codegen_, kQuickExpm1); 810} 811 812void IntrinsicLocationsBuilderX86_64::VisitMathLog(HInvoke* invoke) { 813 CreateFPToFPCallLocations(arena_, invoke); 814} 815 816void IntrinsicCodeGeneratorX86_64::VisitMathLog(HInvoke* invoke) { 817 GenFPToFPCall(invoke, codegen_, kQuickLog); 818} 819 820void IntrinsicLocationsBuilderX86_64::VisitMathLog10(HInvoke* invoke) { 821 CreateFPToFPCallLocations(arena_, invoke); 822} 823 824void IntrinsicCodeGeneratorX86_64::VisitMathLog10(HInvoke* invoke) { 825 GenFPToFPCall(invoke, codegen_, kQuickLog10); 826} 827 828void IntrinsicLocationsBuilderX86_64::VisitMathSinh(HInvoke* invoke) { 829 CreateFPToFPCallLocations(arena_, invoke); 830} 831 832void IntrinsicCodeGeneratorX86_64::VisitMathSinh(HInvoke* invoke) { 833 GenFPToFPCall(invoke, codegen_, kQuickSinh); 834} 835 836void IntrinsicLocationsBuilderX86_64::VisitMathTan(HInvoke* invoke) { 837 CreateFPToFPCallLocations(arena_, invoke); 838} 839 840void IntrinsicCodeGeneratorX86_64::VisitMathTan(HInvoke* invoke) { 841 GenFPToFPCall(invoke, codegen_, kQuickTan); 842} 843 844void IntrinsicLocationsBuilderX86_64::VisitMathTanh(HInvoke* invoke) { 845 CreateFPToFPCallLocations(arena_, invoke); 846} 847 848void IntrinsicCodeGeneratorX86_64::VisitMathTanh(HInvoke* invoke) { 849 GenFPToFPCall(invoke, codegen_, kQuickTanh); 850} 851 852static void CreateFPFPToFPCallLocations(ArenaAllocator* arena, 853 HInvoke* invoke) { 854 LocationSummary* locations = new (arena) LocationSummary(invoke, 855 LocationSummary::kCall, 856 kIntrinsified); 857 InvokeRuntimeCallingConvention calling_convention; 858 locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0))); 859 locations->SetInAt(1, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(1))); 860 locations->SetOut(Location::FpuRegisterLocation(XMM0)); 861 862 // We have to ensure that the native code doesn't clobber the XMM registers which are 863 // non-volatile for ART, but volatile for Native calls. This will ensure that they are 864 // saved in the prologue and properly restored. 865 for (auto fp_reg : non_volatile_xmm_regs) { 866 locations->AddTemp(Location::FpuRegisterLocation(fp_reg)); 867 } 868} 869 870void IntrinsicLocationsBuilderX86_64::VisitMathAtan2(HInvoke* invoke) { 871 CreateFPFPToFPCallLocations(arena_, invoke); 872} 873 874void IntrinsicCodeGeneratorX86_64::VisitMathAtan2(HInvoke* invoke) { 875 GenFPToFPCall(invoke, codegen_, kQuickAtan2); 876} 877 878void IntrinsicLocationsBuilderX86_64::VisitMathHypot(HInvoke* invoke) { 879 CreateFPFPToFPCallLocations(arena_, invoke); 880} 881 882void IntrinsicCodeGeneratorX86_64::VisitMathHypot(HInvoke* invoke) { 883 GenFPToFPCall(invoke, codegen_, kQuickHypot); 884} 885 886void IntrinsicLocationsBuilderX86_64::VisitMathNextAfter(HInvoke* invoke) { 887 CreateFPFPToFPCallLocations(arena_, invoke); 888} 889 890void IntrinsicCodeGeneratorX86_64::VisitMathNextAfter(HInvoke* invoke) { 891 GenFPToFPCall(invoke, codegen_, kQuickNextAfter); 892} 893 894void IntrinsicLocationsBuilderX86_64::VisitStringCharAt(HInvoke* invoke) { 895 // The inputs plus one temp. 896 LocationSummary* locations = new (arena_) LocationSummary(invoke, 897 LocationSummary::kCallOnSlowPath, 898 kIntrinsified); 899 locations->SetInAt(0, Location::RequiresRegister()); 900 locations->SetInAt(1, Location::RequiresRegister()); 901 locations->SetOut(Location::SameAsFirstInput()); 902 locations->AddTemp(Location::RequiresRegister()); 903} 904 905void IntrinsicCodeGeneratorX86_64::VisitStringCharAt(HInvoke* invoke) { 906 LocationSummary* locations = invoke->GetLocations(); 907 908 // Location of reference to data array. 909 const int32_t value_offset = mirror::String::ValueOffset().Int32Value(); 910 // Location of count. 911 const int32_t count_offset = mirror::String::CountOffset().Int32Value(); 912 913 CpuRegister obj = locations->InAt(0).AsRegister<CpuRegister>(); 914 CpuRegister idx = locations->InAt(1).AsRegister<CpuRegister>(); 915 CpuRegister out = locations->Out().AsRegister<CpuRegister>(); 916 917 // TODO: Maybe we can support range check elimination. Overall, though, I think it's not worth 918 // the cost. 919 // TODO: For simplicity, the index parameter is requested in a register, so different from Quick 920 // we will not optimize the code for constants (which would save a register). 921 922 SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathX86_64(invoke); 923 codegen_->AddSlowPath(slow_path); 924 925 X86_64Assembler* assembler = GetAssembler(); 926 927 __ cmpl(idx, Address(obj, count_offset)); 928 codegen_->MaybeRecordImplicitNullCheck(invoke); 929 __ j(kAboveEqual, slow_path->GetEntryLabel()); 930 931 // out = out[2*idx]. 932 __ movzxw(out, Address(out, idx, ScaleFactor::TIMES_2, value_offset)); 933 934 __ Bind(slow_path->GetExitLabel()); 935} 936 937void IntrinsicLocationsBuilderX86_64::VisitSystemArrayCopyChar(HInvoke* invoke) { 938 // Check to see if we have known failures that will cause us to have to bail out 939 // to the runtime, and just generate the runtime call directly. 940 HIntConstant* src_pos = invoke->InputAt(1)->AsIntConstant(); 941 HIntConstant* dest_pos = invoke->InputAt(3)->AsIntConstant(); 942 943 // The positions must be non-negative. 944 if ((src_pos != nullptr && src_pos->GetValue() < 0) || 945 (dest_pos != nullptr && dest_pos->GetValue() < 0)) { 946 // We will have to fail anyways. 947 return; 948 } 949 950 // The length must be > 0. 951 HIntConstant* length = invoke->InputAt(4)->AsIntConstant(); 952 if (length != nullptr) { 953 int32_t len = length->GetValue(); 954 if (len < 0) { 955 // Just call as normal. 956 return; 957 } 958 } 959 960 LocationSummary* locations = new (arena_) LocationSummary(invoke, 961 LocationSummary::kCallOnSlowPath, 962 kIntrinsified); 963 // arraycopy(Object src, int src_pos, Object dest, int dest_pos, int length). 964 locations->SetInAt(0, Location::RequiresRegister()); 965 locations->SetInAt(1, Location::RegisterOrConstant(invoke->InputAt(1))); 966 locations->SetInAt(2, Location::RequiresRegister()); 967 locations->SetInAt(3, Location::RegisterOrConstant(invoke->InputAt(3))); 968 locations->SetInAt(4, Location::RegisterOrConstant(invoke->InputAt(4))); 969 970 // And we need some temporaries. We will use REP MOVSW, so we need fixed registers. 971 locations->AddTemp(Location::RegisterLocation(RSI)); 972 locations->AddTemp(Location::RegisterLocation(RDI)); 973 locations->AddTemp(Location::RegisterLocation(RCX)); 974} 975 976static void CheckPosition(X86_64Assembler* assembler, 977 Location pos, 978 CpuRegister input, 979 Location length, 980 SlowPathCode* slow_path, 981 CpuRegister input_len, 982 CpuRegister temp, 983 bool length_is_input_length = false) { 984 // Where is the length in the Array? 985 const uint32_t length_offset = mirror::Array::LengthOffset().Uint32Value(); 986 987 if (pos.IsConstant()) { 988 int32_t pos_const = pos.GetConstant()->AsIntConstant()->GetValue(); 989 if (pos_const == 0) { 990 if (!length_is_input_length) { 991 // Check that length(input) >= length. 992 if (length.IsConstant()) { 993 __ cmpl(Address(input, length_offset), 994 Immediate(length.GetConstant()->AsIntConstant()->GetValue())); 995 } else { 996 __ cmpl(Address(input, length_offset), length.AsRegister<CpuRegister>()); 997 } 998 __ j(kLess, slow_path->GetEntryLabel()); 999 } 1000 } else { 1001 // Check that length(input) >= pos. 1002 __ movl(input_len, Address(input, length_offset)); 1003 __ cmpl(input_len, Immediate(pos_const)); 1004 __ j(kLess, slow_path->GetEntryLabel()); 1005 1006 // Check that (length(input) - pos) >= length. 1007 __ leal(temp, Address(input_len, -pos_const)); 1008 if (length.IsConstant()) { 1009 __ cmpl(temp, Immediate(length.GetConstant()->AsIntConstant()->GetValue())); 1010 } else { 1011 __ cmpl(temp, length.AsRegister<CpuRegister>()); 1012 } 1013 __ j(kLess, slow_path->GetEntryLabel()); 1014 } 1015 } else if (length_is_input_length) { 1016 // The only way the copy can succeed is if pos is zero. 1017 CpuRegister pos_reg = pos.AsRegister<CpuRegister>(); 1018 __ testl(pos_reg, pos_reg); 1019 __ j(kNotEqual, slow_path->GetEntryLabel()); 1020 } else { 1021 // Check that pos >= 0. 1022 CpuRegister pos_reg = pos.AsRegister<CpuRegister>(); 1023 __ testl(pos_reg, pos_reg); 1024 __ j(kLess, slow_path->GetEntryLabel()); 1025 1026 // Check that pos <= length(input). 1027 __ cmpl(Address(input, length_offset), pos_reg); 1028 __ j(kLess, slow_path->GetEntryLabel()); 1029 1030 // Check that (length(input) - pos) >= length. 1031 __ movl(temp, Address(input, length_offset)); 1032 __ subl(temp, pos_reg); 1033 if (length.IsConstant()) { 1034 __ cmpl(temp, Immediate(length.GetConstant()->AsIntConstant()->GetValue())); 1035 } else { 1036 __ cmpl(temp, length.AsRegister<CpuRegister>()); 1037 } 1038 __ j(kLess, slow_path->GetEntryLabel()); 1039 } 1040} 1041 1042void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopyChar(HInvoke* invoke) { 1043 X86_64Assembler* assembler = GetAssembler(); 1044 LocationSummary* locations = invoke->GetLocations(); 1045 1046 CpuRegister src = locations->InAt(0).AsRegister<CpuRegister>(); 1047 Location src_pos = locations->InAt(1); 1048 CpuRegister dest = locations->InAt(2).AsRegister<CpuRegister>(); 1049 Location dest_pos = locations->InAt(3); 1050 Location length = locations->InAt(4); 1051 1052 // Temporaries that we need for MOVSW. 1053 CpuRegister src_base = locations->GetTemp(0).AsRegister<CpuRegister>(); 1054 DCHECK_EQ(src_base.AsRegister(), RSI); 1055 CpuRegister dest_base = locations->GetTemp(1).AsRegister<CpuRegister>(); 1056 DCHECK_EQ(dest_base.AsRegister(), RDI); 1057 CpuRegister count = locations->GetTemp(2).AsRegister<CpuRegister>(); 1058 DCHECK_EQ(count.AsRegister(), RCX); 1059 1060 SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathX86_64(invoke); 1061 codegen_->AddSlowPath(slow_path); 1062 1063 // Bail out if the source and destination are the same. 1064 __ cmpl(src, dest); 1065 __ j(kEqual, slow_path->GetEntryLabel()); 1066 1067 // Bail out if the source is null. 1068 __ testl(src, src); 1069 __ j(kEqual, slow_path->GetEntryLabel()); 1070 1071 // Bail out if the destination is null. 1072 __ testl(dest, dest); 1073 __ j(kEqual, slow_path->GetEntryLabel()); 1074 1075 // If the length is negative, bail out. 1076 // We have already checked in the LocationsBuilder for the constant case. 1077 if (!length.IsConstant()) { 1078 __ testl(length.AsRegister<CpuRegister>(), length.AsRegister<CpuRegister>()); 1079 __ j(kLess, slow_path->GetEntryLabel()); 1080 } 1081 1082 // Validity checks: source. 1083 CheckPosition(assembler, src_pos, src, length, slow_path, src_base, dest_base); 1084 1085 // Validity checks: dest. 1086 CheckPosition(assembler, dest_pos, dest, length, slow_path, src_base, dest_base); 1087 1088 // We need the count in RCX. 1089 if (length.IsConstant()) { 1090 __ movl(count, Immediate(length.GetConstant()->AsIntConstant()->GetValue())); 1091 } else { 1092 __ movl(count, length.AsRegister<CpuRegister>()); 1093 } 1094 1095 // Okay, everything checks out. Finally time to do the copy. 1096 // Check assumption that sizeof(Char) is 2 (used in scaling below). 1097 const size_t char_size = Primitive::ComponentSize(Primitive::kPrimChar); 1098 DCHECK_EQ(char_size, 2u); 1099 1100 const uint32_t data_offset = mirror::Array::DataOffset(char_size).Uint32Value(); 1101 1102 if (src_pos.IsConstant()) { 1103 int32_t src_pos_const = src_pos.GetConstant()->AsIntConstant()->GetValue(); 1104 __ leal(src_base, Address(src, char_size * src_pos_const + data_offset)); 1105 } else { 1106 __ leal(src_base, Address(src, src_pos.AsRegister<CpuRegister>(), 1107 ScaleFactor::TIMES_2, data_offset)); 1108 } 1109 if (dest_pos.IsConstant()) { 1110 int32_t dest_pos_const = dest_pos.GetConstant()->AsIntConstant()->GetValue(); 1111 __ leal(dest_base, Address(dest, char_size * dest_pos_const + data_offset)); 1112 } else { 1113 __ leal(dest_base, Address(dest, dest_pos.AsRegister<CpuRegister>(), 1114 ScaleFactor::TIMES_2, data_offset)); 1115 } 1116 1117 // Do the move. 1118 __ rep_movsw(); 1119 1120 __ Bind(slow_path->GetExitLabel()); 1121} 1122 1123 1124void IntrinsicLocationsBuilderX86_64::VisitSystemArrayCopy(HInvoke* invoke) { 1125 CodeGenerator::CreateSystemArrayCopyLocationSummary(invoke); 1126} 1127 1128// TODO: Implement read barriers in the SystemArrayCopy intrinsic. 1129// Note that this code path is not used (yet) because we do not 1130// intrinsify methods that can go into the IntrinsicSlowPathX86_64 1131// slow path. 1132void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) { 1133 X86_64Assembler* assembler = GetAssembler(); 1134 LocationSummary* locations = invoke->GetLocations(); 1135 1136 uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); 1137 uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value(); 1138 uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value(); 1139 uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value(); 1140 1141 CpuRegister src = locations->InAt(0).AsRegister<CpuRegister>(); 1142 Location src_pos = locations->InAt(1); 1143 CpuRegister dest = locations->InAt(2).AsRegister<CpuRegister>(); 1144 Location dest_pos = locations->InAt(3); 1145 Location length = locations->InAt(4); 1146 CpuRegister temp1 = locations->GetTemp(0).AsRegister<CpuRegister>(); 1147 CpuRegister temp2 = locations->GetTemp(1).AsRegister<CpuRegister>(); 1148 CpuRegister temp3 = locations->GetTemp(2).AsRegister<CpuRegister>(); 1149 1150 SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathX86_64(invoke); 1151 codegen_->AddSlowPath(slow_path); 1152 1153 NearLabel conditions_on_positions_validated; 1154 SystemArrayCopyOptimizations optimizations(invoke); 1155 1156 // If source and destination are the same, we go to slow path if we need to do 1157 // forward copying. 1158 if (src_pos.IsConstant()) { 1159 int32_t src_pos_constant = src_pos.GetConstant()->AsIntConstant()->GetValue(); 1160 if (dest_pos.IsConstant()) { 1161 int32_t dest_pos_constant = dest_pos.GetConstant()->AsIntConstant()->GetValue(); 1162 if (optimizations.GetDestinationIsSource()) { 1163 // Checked when building locations. 1164 DCHECK_GE(src_pos_constant, dest_pos_constant); 1165 } else if (src_pos_constant < dest_pos_constant) { 1166 __ cmpl(src, dest); 1167 __ j(kEqual, slow_path->GetEntryLabel()); 1168 } 1169 } else { 1170 if (!optimizations.GetDestinationIsSource()) { 1171 __ cmpl(src, dest); 1172 __ j(kNotEqual, &conditions_on_positions_validated); 1173 } 1174 __ cmpl(dest_pos.AsRegister<CpuRegister>(), Immediate(src_pos_constant)); 1175 __ j(kGreater, slow_path->GetEntryLabel()); 1176 } 1177 } else { 1178 if (!optimizations.GetDestinationIsSource()) { 1179 __ cmpl(src, dest); 1180 __ j(kNotEqual, &conditions_on_positions_validated); 1181 } 1182 if (dest_pos.IsConstant()) { 1183 int32_t dest_pos_constant = dest_pos.GetConstant()->AsIntConstant()->GetValue(); 1184 __ cmpl(src_pos.AsRegister<CpuRegister>(), Immediate(dest_pos_constant)); 1185 __ j(kLess, slow_path->GetEntryLabel()); 1186 } else { 1187 __ cmpl(src_pos.AsRegister<CpuRegister>(), dest_pos.AsRegister<CpuRegister>()); 1188 __ j(kLess, slow_path->GetEntryLabel()); 1189 } 1190 } 1191 1192 __ Bind(&conditions_on_positions_validated); 1193 1194 if (!optimizations.GetSourceIsNotNull()) { 1195 // Bail out if the source is null. 1196 __ testl(src, src); 1197 __ j(kEqual, slow_path->GetEntryLabel()); 1198 } 1199 1200 if (!optimizations.GetDestinationIsNotNull() && !optimizations.GetDestinationIsSource()) { 1201 // Bail out if the destination is null. 1202 __ testl(dest, dest); 1203 __ j(kEqual, slow_path->GetEntryLabel()); 1204 } 1205 1206 // If the length is negative, bail out. 1207 // We have already checked in the LocationsBuilder for the constant case. 1208 if (!length.IsConstant() && 1209 !optimizations.GetCountIsSourceLength() && 1210 !optimizations.GetCountIsDestinationLength()) { 1211 __ testl(length.AsRegister<CpuRegister>(), length.AsRegister<CpuRegister>()); 1212 __ j(kLess, slow_path->GetEntryLabel()); 1213 } 1214 1215 // Validity checks: source. 1216 CheckPosition(assembler, 1217 src_pos, 1218 src, 1219 length, 1220 slow_path, 1221 temp1, 1222 temp2, 1223 optimizations.GetCountIsSourceLength()); 1224 1225 // Validity checks: dest. 1226 CheckPosition(assembler, 1227 dest_pos, 1228 dest, 1229 length, 1230 slow_path, 1231 temp1, 1232 temp2, 1233 optimizations.GetCountIsDestinationLength()); 1234 1235 if (!optimizations.GetDoesNotNeedTypeCheck()) { 1236 // Check whether all elements of the source array are assignable to the component 1237 // type of the destination array. We do two checks: the classes are the same, 1238 // or the destination is Object[]. If none of these checks succeed, we go to the 1239 // slow path. 1240 __ movl(temp1, Address(dest, class_offset)); 1241 __ movl(temp2, Address(src, class_offset)); 1242 bool did_unpoison = false; 1243 if (!optimizations.GetDestinationIsNonPrimitiveArray() || 1244 !optimizations.GetSourceIsNonPrimitiveArray()) { 1245 // One or two of the references need to be unpoisoned. Unpoison them 1246 // both to make the identity check valid. 1247 __ MaybeUnpoisonHeapReference(temp1); 1248 __ MaybeUnpoisonHeapReference(temp2); 1249 did_unpoison = true; 1250 } 1251 1252 if (!optimizations.GetDestinationIsNonPrimitiveArray()) { 1253 // Bail out if the destination is not a non primitive array. 1254 // /* HeapReference<Class> */ TMP = temp1->component_type_ 1255 __ movl(CpuRegister(TMP), Address(temp1, component_offset)); 1256 __ testl(CpuRegister(TMP), CpuRegister(TMP)); 1257 __ j(kEqual, slow_path->GetEntryLabel()); 1258 __ MaybeUnpoisonHeapReference(CpuRegister(TMP)); 1259 __ cmpw(Address(CpuRegister(TMP), primitive_offset), Immediate(Primitive::kPrimNot)); 1260 __ j(kNotEqual, slow_path->GetEntryLabel()); 1261 } 1262 1263 if (!optimizations.GetSourceIsNonPrimitiveArray()) { 1264 // Bail out if the source is not a non primitive array. 1265 // /* HeapReference<Class> */ TMP = temp2->component_type_ 1266 __ movl(CpuRegister(TMP), Address(temp2, component_offset)); 1267 __ testl(CpuRegister(TMP), CpuRegister(TMP)); 1268 __ j(kEqual, slow_path->GetEntryLabel()); 1269 __ MaybeUnpoisonHeapReference(CpuRegister(TMP)); 1270 __ cmpw(Address(CpuRegister(TMP), primitive_offset), Immediate(Primitive::kPrimNot)); 1271 __ j(kNotEqual, slow_path->GetEntryLabel()); 1272 } 1273 1274 __ cmpl(temp1, temp2); 1275 1276 if (optimizations.GetDestinationIsTypedObjectArray()) { 1277 NearLabel do_copy; 1278 __ j(kEqual, &do_copy); 1279 if (!did_unpoison) { 1280 __ MaybeUnpoisonHeapReference(temp1); 1281 } 1282 // /* HeapReference<Class> */ temp1 = temp1->component_type_ 1283 __ movl(temp1, Address(temp1, component_offset)); 1284 __ MaybeUnpoisonHeapReference(temp1); 1285 // /* HeapReference<Class> */ temp1 = temp1->super_class_ 1286 __ movl(temp1, Address(temp1, super_offset)); 1287 // No need to unpoison the result, we're comparing against null. 1288 __ testl(temp1, temp1); 1289 __ j(kNotEqual, slow_path->GetEntryLabel()); 1290 __ Bind(&do_copy); 1291 } else { 1292 __ j(kNotEqual, slow_path->GetEntryLabel()); 1293 } 1294 } else if (!optimizations.GetSourceIsNonPrimitiveArray()) { 1295 DCHECK(optimizations.GetDestinationIsNonPrimitiveArray()); 1296 // Bail out if the source is not a non primitive array. 1297 // /* HeapReference<Class> */ temp1 = src->klass_ 1298 __ movl(temp1, Address(src, class_offset)); 1299 __ MaybeUnpoisonHeapReference(temp1); 1300 // /* HeapReference<Class> */ TMP = temp1->component_type_ 1301 __ movl(CpuRegister(TMP), Address(temp1, component_offset)); 1302 __ testl(CpuRegister(TMP), CpuRegister(TMP)); 1303 __ j(kEqual, slow_path->GetEntryLabel()); 1304 __ MaybeUnpoisonHeapReference(CpuRegister(TMP)); 1305 __ cmpw(Address(CpuRegister(TMP), primitive_offset), Immediate(Primitive::kPrimNot)); 1306 __ j(kNotEqual, slow_path->GetEntryLabel()); 1307 } 1308 1309 // Compute base source address, base destination address, and end source address. 1310 1311 uint32_t element_size = sizeof(int32_t); 1312 uint32_t offset = mirror::Array::DataOffset(element_size).Uint32Value(); 1313 if (src_pos.IsConstant()) { 1314 int32_t constant = src_pos.GetConstant()->AsIntConstant()->GetValue(); 1315 __ leal(temp1, Address(src, element_size * constant + offset)); 1316 } else { 1317 __ leal(temp1, Address(src, src_pos.AsRegister<CpuRegister>(), ScaleFactor::TIMES_4, offset)); 1318 } 1319 1320 if (dest_pos.IsConstant()) { 1321 int32_t constant = dest_pos.GetConstant()->AsIntConstant()->GetValue(); 1322 __ leal(temp2, Address(dest, element_size * constant + offset)); 1323 } else { 1324 __ leal(temp2, Address(dest, dest_pos.AsRegister<CpuRegister>(), ScaleFactor::TIMES_4, offset)); 1325 } 1326 1327 if (length.IsConstant()) { 1328 int32_t constant = length.GetConstant()->AsIntConstant()->GetValue(); 1329 __ leal(temp3, Address(temp1, element_size * constant)); 1330 } else { 1331 __ leal(temp3, Address(temp1, length.AsRegister<CpuRegister>(), ScaleFactor::TIMES_4, 0)); 1332 } 1333 1334 // Iterate over the arrays and do a raw copy of the objects. We don't need to 1335 // poison/unpoison, nor do any read barrier as the next uses of the destination 1336 // array will do it. 1337 NearLabel loop, done; 1338 __ cmpl(temp1, temp3); 1339 __ j(kEqual, &done); 1340 __ Bind(&loop); 1341 __ movl(CpuRegister(TMP), Address(temp1, 0)); 1342 __ movl(Address(temp2, 0), CpuRegister(TMP)); 1343 __ addl(temp1, Immediate(element_size)); 1344 __ addl(temp2, Immediate(element_size)); 1345 __ cmpl(temp1, temp3); 1346 __ j(kNotEqual, &loop); 1347 __ Bind(&done); 1348 1349 // We only need one card marking on the destination array. 1350 codegen_->MarkGCCard(temp1, 1351 temp2, 1352 dest, 1353 CpuRegister(kNoRegister), 1354 /* value_can_be_null */ false); 1355 1356 __ Bind(slow_path->GetExitLabel()); 1357} 1358 1359void IntrinsicLocationsBuilderX86_64::VisitStringCompareTo(HInvoke* invoke) { 1360 LocationSummary* locations = new (arena_) LocationSummary(invoke, 1361 LocationSummary::kCall, 1362 kIntrinsified); 1363 InvokeRuntimeCallingConvention calling_convention; 1364 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); 1365 locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); 1366 locations->SetOut(Location::RegisterLocation(RAX)); 1367} 1368 1369void IntrinsicCodeGeneratorX86_64::VisitStringCompareTo(HInvoke* invoke) { 1370 X86_64Assembler* assembler = GetAssembler(); 1371 LocationSummary* locations = invoke->GetLocations(); 1372 1373 // Note that the null check must have been done earlier. 1374 DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0))); 1375 1376 CpuRegister argument = locations->InAt(1).AsRegister<CpuRegister>(); 1377 __ testl(argument, argument); 1378 SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathX86_64(invoke); 1379 codegen_->AddSlowPath(slow_path); 1380 __ j(kEqual, slow_path->GetEntryLabel()); 1381 1382 __ gs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pStringCompareTo), 1383 /* no_rip */ true)); 1384 __ Bind(slow_path->GetExitLabel()); 1385} 1386 1387void IntrinsicLocationsBuilderX86_64::VisitStringEquals(HInvoke* invoke) { 1388 LocationSummary* locations = new (arena_) LocationSummary(invoke, 1389 LocationSummary::kNoCall, 1390 kIntrinsified); 1391 locations->SetInAt(0, Location::RequiresRegister()); 1392 locations->SetInAt(1, Location::RequiresRegister()); 1393 1394 // Request temporary registers, RCX and RDI needed for repe_cmpsq instruction. 1395 locations->AddTemp(Location::RegisterLocation(RCX)); 1396 locations->AddTemp(Location::RegisterLocation(RDI)); 1397 1398 // Set output, RSI needed for repe_cmpsq instruction anyways. 1399 locations->SetOut(Location::RegisterLocation(RSI), Location::kOutputOverlap); 1400} 1401 1402void IntrinsicCodeGeneratorX86_64::VisitStringEquals(HInvoke* invoke) { 1403 X86_64Assembler* assembler = GetAssembler(); 1404 LocationSummary* locations = invoke->GetLocations(); 1405 1406 CpuRegister str = locations->InAt(0).AsRegister<CpuRegister>(); 1407 CpuRegister arg = locations->InAt(1).AsRegister<CpuRegister>(); 1408 CpuRegister rcx = locations->GetTemp(0).AsRegister<CpuRegister>(); 1409 CpuRegister rdi = locations->GetTemp(1).AsRegister<CpuRegister>(); 1410 CpuRegister rsi = locations->Out().AsRegister<CpuRegister>(); 1411 1412 NearLabel end, return_true, return_false; 1413 1414 // Get offsets of count, value, and class fields within a string object. 1415 const uint32_t count_offset = mirror::String::CountOffset().Uint32Value(); 1416 const uint32_t value_offset = mirror::String::ValueOffset().Uint32Value(); 1417 const uint32_t class_offset = mirror::Object::ClassOffset().Uint32Value(); 1418 1419 // Note that the null check must have been done earlier. 1420 DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0))); 1421 1422 // Check if input is null, return false if it is. 1423 __ testl(arg, arg); 1424 __ j(kEqual, &return_false); 1425 1426 // Instanceof check for the argument by comparing class fields. 1427 // All string objects must have the same type since String cannot be subclassed. 1428 // Receiver must be a string object, so its class field is equal to all strings' class fields. 1429 // If the argument is a string object, its class field must be equal to receiver's class field. 1430 __ movl(rcx, Address(str, class_offset)); 1431 __ cmpl(rcx, Address(arg, class_offset)); 1432 __ j(kNotEqual, &return_false); 1433 1434 // Reference equality check, return true if same reference. 1435 __ cmpl(str, arg); 1436 __ j(kEqual, &return_true); 1437 1438 // Load length of receiver string. 1439 __ movl(rcx, Address(str, count_offset)); 1440 // Check if lengths are equal, return false if they're not. 1441 __ cmpl(rcx, Address(arg, count_offset)); 1442 __ j(kNotEqual, &return_false); 1443 // Return true if both strings are empty. 1444 __ jrcxz(&return_true); 1445 1446 // Load starting addresses of string values into RSI/RDI as required for repe_cmpsq instruction. 1447 __ leal(rsi, Address(str, value_offset)); 1448 __ leal(rdi, Address(arg, value_offset)); 1449 1450 // Divide string length by 4 and adjust for lengths not divisible by 4. 1451 __ addl(rcx, Immediate(3)); 1452 __ shrl(rcx, Immediate(2)); 1453 1454 // Assertions that must hold in order to compare strings 4 characters at a time. 1455 DCHECK_ALIGNED(value_offset, 8); 1456 static_assert(IsAligned<8>(kObjectAlignment), "String is not zero padded"); 1457 1458 // Loop to compare strings four characters at a time starting at the beginning of the string. 1459 __ repe_cmpsq(); 1460 // If strings are not equal, zero flag will be cleared. 1461 __ j(kNotEqual, &return_false); 1462 1463 // Return true and exit the function. 1464 // If loop does not result in returning false, we return true. 1465 __ Bind(&return_true); 1466 __ movl(rsi, Immediate(1)); 1467 __ jmp(&end); 1468 1469 // Return false and exit the function. 1470 __ Bind(&return_false); 1471 __ xorl(rsi, rsi); 1472 __ Bind(&end); 1473} 1474 1475static void CreateStringIndexOfLocations(HInvoke* invoke, 1476 ArenaAllocator* allocator, 1477 bool start_at_zero) { 1478 LocationSummary* locations = new (allocator) LocationSummary(invoke, 1479 LocationSummary::kCallOnSlowPath, 1480 kIntrinsified); 1481 // The data needs to be in RDI for scasw. So request that the string is there, anyways. 1482 locations->SetInAt(0, Location::RegisterLocation(RDI)); 1483 // If we look for a constant char, we'll still have to copy it into RAX. So just request the 1484 // allocator to do that, anyways. We can still do the constant check by checking the parameter 1485 // of the instruction explicitly. 1486 // Note: This works as we don't clobber RAX anywhere. 1487 locations->SetInAt(1, Location::RegisterLocation(RAX)); 1488 if (!start_at_zero) { 1489 locations->SetInAt(2, Location::RequiresRegister()); // The starting index. 1490 } 1491 // As we clobber RDI during execution anyways, also use it as the output. 1492 locations->SetOut(Location::SameAsFirstInput()); 1493 1494 // repne scasw uses RCX as the counter. 1495 locations->AddTemp(Location::RegisterLocation(RCX)); 1496 // Need another temporary to be able to compute the result. 1497 locations->AddTemp(Location::RequiresRegister()); 1498} 1499 1500static void GenerateStringIndexOf(HInvoke* invoke, 1501 X86_64Assembler* assembler, 1502 CodeGeneratorX86_64* codegen, 1503 ArenaAllocator* allocator, 1504 bool start_at_zero) { 1505 LocationSummary* locations = invoke->GetLocations(); 1506 1507 // Note that the null check must have been done earlier. 1508 DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0))); 1509 1510 CpuRegister string_obj = locations->InAt(0).AsRegister<CpuRegister>(); 1511 CpuRegister search_value = locations->InAt(1).AsRegister<CpuRegister>(); 1512 CpuRegister counter = locations->GetTemp(0).AsRegister<CpuRegister>(); 1513 CpuRegister string_length = locations->GetTemp(1).AsRegister<CpuRegister>(); 1514 CpuRegister out = locations->Out().AsRegister<CpuRegister>(); 1515 1516 // Check our assumptions for registers. 1517 DCHECK_EQ(string_obj.AsRegister(), RDI); 1518 DCHECK_EQ(search_value.AsRegister(), RAX); 1519 DCHECK_EQ(counter.AsRegister(), RCX); 1520 DCHECK_EQ(out.AsRegister(), RDI); 1521 1522 // Check for code points > 0xFFFF. Either a slow-path check when we don't know statically, 1523 // or directly dispatch if we have a constant. 1524 SlowPathCode* slow_path = nullptr; 1525 if (invoke->InputAt(1)->IsIntConstant()) { 1526 if (static_cast<uint32_t>(invoke->InputAt(1)->AsIntConstant()->GetValue()) > 1527 std::numeric_limits<uint16_t>::max()) { 1528 // Always needs the slow-path. We could directly dispatch to it, but this case should be 1529 // rare, so for simplicity just put the full slow-path down and branch unconditionally. 1530 slow_path = new (allocator) IntrinsicSlowPathX86_64(invoke); 1531 codegen->AddSlowPath(slow_path); 1532 __ jmp(slow_path->GetEntryLabel()); 1533 __ Bind(slow_path->GetExitLabel()); 1534 return; 1535 } 1536 } else { 1537 __ cmpl(search_value, Immediate(std::numeric_limits<uint16_t>::max())); 1538 slow_path = new (allocator) IntrinsicSlowPathX86_64(invoke); 1539 codegen->AddSlowPath(slow_path); 1540 __ j(kAbove, slow_path->GetEntryLabel()); 1541 } 1542 1543 // From here down, we know that we are looking for a char that fits in 16 bits. 1544 // Location of reference to data array within the String object. 1545 int32_t value_offset = mirror::String::ValueOffset().Int32Value(); 1546 // Location of count within the String object. 1547 int32_t count_offset = mirror::String::CountOffset().Int32Value(); 1548 1549 // Load string length, i.e., the count field of the string. 1550 __ movl(string_length, Address(string_obj, count_offset)); 1551 1552 // Do a length check. 1553 // TODO: Support jecxz. 1554 NearLabel not_found_label; 1555 __ testl(string_length, string_length); 1556 __ j(kEqual, ¬_found_label); 1557 1558 if (start_at_zero) { 1559 // Number of chars to scan is the same as the string length. 1560 __ movl(counter, string_length); 1561 1562 // Move to the start of the string. 1563 __ addq(string_obj, Immediate(value_offset)); 1564 } else { 1565 CpuRegister start_index = locations->InAt(2).AsRegister<CpuRegister>(); 1566 1567 // Do a start_index check. 1568 __ cmpl(start_index, string_length); 1569 __ j(kGreaterEqual, ¬_found_label); 1570 1571 // Ensure we have a start index >= 0; 1572 __ xorl(counter, counter); 1573 __ cmpl(start_index, Immediate(0)); 1574 __ cmov(kGreater, counter, start_index, /* is64bit */ false); // 32-bit copy is enough. 1575 1576 // Move to the start of the string: string_obj + value_offset + 2 * start_index. 1577 __ leaq(string_obj, Address(string_obj, counter, ScaleFactor::TIMES_2, value_offset)); 1578 1579 // Now update ecx, the work counter: it's gonna be string.length - start_index. 1580 __ negq(counter); // Needs to be 64-bit negation, as the address computation is 64-bit. 1581 __ leaq(counter, Address(string_length, counter, ScaleFactor::TIMES_1, 0)); 1582 } 1583 1584 // Everything is set up for repne scasw: 1585 // * Comparison address in RDI. 1586 // * Counter in ECX. 1587 __ repne_scasw(); 1588 1589 // Did we find a match? 1590 __ j(kNotEqual, ¬_found_label); 1591 1592 // Yes, we matched. Compute the index of the result. 1593 __ subl(string_length, counter); 1594 __ leal(out, Address(string_length, -1)); 1595 1596 NearLabel done; 1597 __ jmp(&done); 1598 1599 // Failed to match; return -1. 1600 __ Bind(¬_found_label); 1601 __ movl(out, Immediate(-1)); 1602 1603 // And join up at the end. 1604 __ Bind(&done); 1605 if (slow_path != nullptr) { 1606 __ Bind(slow_path->GetExitLabel()); 1607 } 1608} 1609 1610void IntrinsicLocationsBuilderX86_64::VisitStringIndexOf(HInvoke* invoke) { 1611 CreateStringIndexOfLocations(invoke, arena_, /* start_at_zero */ true); 1612} 1613 1614void IntrinsicCodeGeneratorX86_64::VisitStringIndexOf(HInvoke* invoke) { 1615 GenerateStringIndexOf(invoke, GetAssembler(), codegen_, GetAllocator(), /* start_at_zero */ true); 1616} 1617 1618void IntrinsicLocationsBuilderX86_64::VisitStringIndexOfAfter(HInvoke* invoke) { 1619 CreateStringIndexOfLocations(invoke, arena_, /* start_at_zero */ false); 1620} 1621 1622void IntrinsicCodeGeneratorX86_64::VisitStringIndexOfAfter(HInvoke* invoke) { 1623 GenerateStringIndexOf( 1624 invoke, GetAssembler(), codegen_, GetAllocator(), /* start_at_zero */ false); 1625} 1626 1627void IntrinsicLocationsBuilderX86_64::VisitStringNewStringFromBytes(HInvoke* invoke) { 1628 LocationSummary* locations = new (arena_) LocationSummary(invoke, 1629 LocationSummary::kCall, 1630 kIntrinsified); 1631 InvokeRuntimeCallingConvention calling_convention; 1632 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); 1633 locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); 1634 locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2))); 1635 locations->SetInAt(3, Location::RegisterLocation(calling_convention.GetRegisterAt(3))); 1636 locations->SetOut(Location::RegisterLocation(RAX)); 1637} 1638 1639void IntrinsicCodeGeneratorX86_64::VisitStringNewStringFromBytes(HInvoke* invoke) { 1640 X86_64Assembler* assembler = GetAssembler(); 1641 LocationSummary* locations = invoke->GetLocations(); 1642 1643 CpuRegister byte_array = locations->InAt(0).AsRegister<CpuRegister>(); 1644 __ testl(byte_array, byte_array); 1645 SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathX86_64(invoke); 1646 codegen_->AddSlowPath(slow_path); 1647 __ j(kEqual, slow_path->GetEntryLabel()); 1648 1649 __ gs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pAllocStringFromBytes), 1650 /* no_rip */ true)); 1651 CheckEntrypointTypes<kQuickAllocStringFromBytes, void*, void*, int32_t, int32_t, int32_t>(); 1652 codegen_->RecordPcInfo(invoke, invoke->GetDexPc()); 1653 __ Bind(slow_path->GetExitLabel()); 1654} 1655 1656void IntrinsicLocationsBuilderX86_64::VisitStringNewStringFromChars(HInvoke* invoke) { 1657 LocationSummary* locations = new (arena_) LocationSummary(invoke, 1658 LocationSummary::kCall, 1659 kIntrinsified); 1660 InvokeRuntimeCallingConvention calling_convention; 1661 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); 1662 locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); 1663 locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2))); 1664 locations->SetOut(Location::RegisterLocation(RAX)); 1665} 1666 1667void IntrinsicCodeGeneratorX86_64::VisitStringNewStringFromChars(HInvoke* invoke) { 1668 X86_64Assembler* assembler = GetAssembler(); 1669 1670 // No need to emit code checking whether `locations->InAt(2)` is a null 1671 // pointer, as callers of the native method 1672 // 1673 // java.lang.StringFactory.newStringFromChars(int offset, int charCount, char[] data) 1674 // 1675 // all include a null check on `data` before calling that method. 1676 __ gs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pAllocStringFromChars), 1677 /* no_rip */ true)); 1678 CheckEntrypointTypes<kQuickAllocStringFromChars, void*, int32_t, int32_t, void*>(); 1679 codegen_->RecordPcInfo(invoke, invoke->GetDexPc()); 1680} 1681 1682void IntrinsicLocationsBuilderX86_64::VisitStringNewStringFromString(HInvoke* invoke) { 1683 LocationSummary* locations = new (arena_) LocationSummary(invoke, 1684 LocationSummary::kCall, 1685 kIntrinsified); 1686 InvokeRuntimeCallingConvention calling_convention; 1687 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); 1688 locations->SetOut(Location::RegisterLocation(RAX)); 1689} 1690 1691void IntrinsicCodeGeneratorX86_64::VisitStringNewStringFromString(HInvoke* invoke) { 1692 X86_64Assembler* assembler = GetAssembler(); 1693 LocationSummary* locations = invoke->GetLocations(); 1694 1695 CpuRegister string_to_copy = locations->InAt(0).AsRegister<CpuRegister>(); 1696 __ testl(string_to_copy, string_to_copy); 1697 SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathX86_64(invoke); 1698 codegen_->AddSlowPath(slow_path); 1699 __ j(kEqual, slow_path->GetEntryLabel()); 1700 1701 __ gs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pAllocStringFromString), 1702 /* no_rip */ true)); 1703 CheckEntrypointTypes<kQuickAllocStringFromString, void*, void*>(); 1704 codegen_->RecordPcInfo(invoke, invoke->GetDexPc()); 1705 __ Bind(slow_path->GetExitLabel()); 1706} 1707 1708void IntrinsicLocationsBuilderX86_64::VisitStringGetCharsNoCheck(HInvoke* invoke) { 1709 // public void getChars(int srcBegin, int srcEnd, char[] dst, int dstBegin); 1710 LocationSummary* locations = new (arena_) LocationSummary(invoke, 1711 LocationSummary::kNoCall, 1712 kIntrinsified); 1713 locations->SetInAt(0, Location::RequiresRegister()); 1714 locations->SetInAt(1, Location::RegisterOrConstant(invoke->InputAt(1))); 1715 locations->SetInAt(2, Location::RequiresRegister()); 1716 locations->SetInAt(3, Location::RequiresRegister()); 1717 locations->SetInAt(4, Location::RequiresRegister()); 1718 1719 // And we need some temporaries. We will use REP MOVSW, so we need fixed registers. 1720 locations->AddTemp(Location::RegisterLocation(RSI)); 1721 locations->AddTemp(Location::RegisterLocation(RDI)); 1722 locations->AddTemp(Location::RegisterLocation(RCX)); 1723} 1724 1725void IntrinsicCodeGeneratorX86_64::VisitStringGetCharsNoCheck(HInvoke* invoke) { 1726 X86_64Assembler* assembler = GetAssembler(); 1727 LocationSummary* locations = invoke->GetLocations(); 1728 1729 size_t char_component_size = Primitive::ComponentSize(Primitive::kPrimChar); 1730 // Location of data in char array buffer. 1731 const uint32_t data_offset = mirror::Array::DataOffset(char_component_size).Uint32Value(); 1732 // Location of char array data in string. 1733 const uint32_t value_offset = mirror::String::ValueOffset().Uint32Value(); 1734 1735 // public void getChars(int srcBegin, int srcEnd, char[] dst, int dstBegin); 1736 CpuRegister obj = locations->InAt(0).AsRegister<CpuRegister>(); 1737 Location srcBegin = locations->InAt(1); 1738 int srcBegin_value = 1739 srcBegin.IsConstant() ? srcBegin.GetConstant()->AsIntConstant()->GetValue() : 0; 1740 CpuRegister srcEnd = locations->InAt(2).AsRegister<CpuRegister>(); 1741 CpuRegister dst = locations->InAt(3).AsRegister<CpuRegister>(); 1742 CpuRegister dstBegin = locations->InAt(4).AsRegister<CpuRegister>(); 1743 1744 // Check assumption that sizeof(Char) is 2 (used in scaling below). 1745 const size_t char_size = Primitive::ComponentSize(Primitive::kPrimChar); 1746 DCHECK_EQ(char_size, 2u); 1747 1748 // Compute the address of the destination buffer. 1749 __ leaq(CpuRegister(RDI), Address(dst, dstBegin, ScaleFactor::TIMES_2, data_offset)); 1750 1751 // Compute the address of the source string. 1752 if (srcBegin.IsConstant()) { 1753 // Compute the address of the source string by adding the number of chars from 1754 // the source beginning to the value offset of a string. 1755 __ leaq(CpuRegister(RSI), Address(obj, srcBegin_value * char_size + value_offset)); 1756 } else { 1757 __ leaq(CpuRegister(RSI), Address(obj, srcBegin.AsRegister<CpuRegister>(), 1758 ScaleFactor::TIMES_2, value_offset)); 1759 } 1760 1761 // Compute the number of chars (words) to move. 1762 __ movl(CpuRegister(RCX), srcEnd); 1763 if (srcBegin.IsConstant()) { 1764 if (srcBegin_value != 0) { 1765 __ subl(CpuRegister(RCX), Immediate(srcBegin_value)); 1766 } 1767 } else { 1768 DCHECK(srcBegin.IsRegister()); 1769 __ subl(CpuRegister(RCX), srcBegin.AsRegister<CpuRegister>()); 1770 } 1771 1772 // Do the move. 1773 __ rep_movsw(); 1774} 1775 1776static void GenPeek(LocationSummary* locations, Primitive::Type size, X86_64Assembler* assembler) { 1777 CpuRegister address = locations->InAt(0).AsRegister<CpuRegister>(); 1778 CpuRegister out = locations->Out().AsRegister<CpuRegister>(); // == address, here for clarity. 1779 // x86 allows unaligned access. We do not have to check the input or use specific instructions 1780 // to avoid a SIGBUS. 1781 switch (size) { 1782 case Primitive::kPrimByte: 1783 __ movsxb(out, Address(address, 0)); 1784 break; 1785 case Primitive::kPrimShort: 1786 __ movsxw(out, Address(address, 0)); 1787 break; 1788 case Primitive::kPrimInt: 1789 __ movl(out, Address(address, 0)); 1790 break; 1791 case Primitive::kPrimLong: 1792 __ movq(out, Address(address, 0)); 1793 break; 1794 default: 1795 LOG(FATAL) << "Type not recognized for peek: " << size; 1796 UNREACHABLE(); 1797 } 1798} 1799 1800void IntrinsicLocationsBuilderX86_64::VisitMemoryPeekByte(HInvoke* invoke) { 1801 CreateIntToIntLocations(arena_, invoke); 1802} 1803 1804void IntrinsicCodeGeneratorX86_64::VisitMemoryPeekByte(HInvoke* invoke) { 1805 GenPeek(invoke->GetLocations(), Primitive::kPrimByte, GetAssembler()); 1806} 1807 1808void IntrinsicLocationsBuilderX86_64::VisitMemoryPeekIntNative(HInvoke* invoke) { 1809 CreateIntToIntLocations(arena_, invoke); 1810} 1811 1812void IntrinsicCodeGeneratorX86_64::VisitMemoryPeekIntNative(HInvoke* invoke) { 1813 GenPeek(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler()); 1814} 1815 1816void IntrinsicLocationsBuilderX86_64::VisitMemoryPeekLongNative(HInvoke* invoke) { 1817 CreateIntToIntLocations(arena_, invoke); 1818} 1819 1820void IntrinsicCodeGeneratorX86_64::VisitMemoryPeekLongNative(HInvoke* invoke) { 1821 GenPeek(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler()); 1822} 1823 1824void IntrinsicLocationsBuilderX86_64::VisitMemoryPeekShortNative(HInvoke* invoke) { 1825 CreateIntToIntLocations(arena_, invoke); 1826} 1827 1828void IntrinsicCodeGeneratorX86_64::VisitMemoryPeekShortNative(HInvoke* invoke) { 1829 GenPeek(invoke->GetLocations(), Primitive::kPrimShort, GetAssembler()); 1830} 1831 1832static void CreateIntIntToVoidLocations(ArenaAllocator* arena, HInvoke* invoke) { 1833 LocationSummary* locations = new (arena) LocationSummary(invoke, 1834 LocationSummary::kNoCall, 1835 kIntrinsified); 1836 locations->SetInAt(0, Location::RequiresRegister()); 1837 locations->SetInAt(1, Location::RegisterOrInt32Constant(invoke->InputAt(1))); 1838} 1839 1840static void GenPoke(LocationSummary* locations, Primitive::Type size, X86_64Assembler* assembler) { 1841 CpuRegister address = locations->InAt(0).AsRegister<CpuRegister>(); 1842 Location value = locations->InAt(1); 1843 // x86 allows unaligned access. We do not have to check the input or use specific instructions 1844 // to avoid a SIGBUS. 1845 switch (size) { 1846 case Primitive::kPrimByte: 1847 if (value.IsConstant()) { 1848 __ movb(Address(address, 0), 1849 Immediate(CodeGenerator::GetInt32ValueOf(value.GetConstant()))); 1850 } else { 1851 __ movb(Address(address, 0), value.AsRegister<CpuRegister>()); 1852 } 1853 break; 1854 case Primitive::kPrimShort: 1855 if (value.IsConstant()) { 1856 __ movw(Address(address, 0), 1857 Immediate(CodeGenerator::GetInt32ValueOf(value.GetConstant()))); 1858 } else { 1859 __ movw(Address(address, 0), value.AsRegister<CpuRegister>()); 1860 } 1861 break; 1862 case Primitive::kPrimInt: 1863 if (value.IsConstant()) { 1864 __ movl(Address(address, 0), 1865 Immediate(CodeGenerator::GetInt32ValueOf(value.GetConstant()))); 1866 } else { 1867 __ movl(Address(address, 0), value.AsRegister<CpuRegister>()); 1868 } 1869 break; 1870 case Primitive::kPrimLong: 1871 if (value.IsConstant()) { 1872 int64_t v = value.GetConstant()->AsLongConstant()->GetValue(); 1873 DCHECK(IsInt<32>(v)); 1874 int32_t v_32 = v; 1875 __ movq(Address(address, 0), Immediate(v_32)); 1876 } else { 1877 __ movq(Address(address, 0), value.AsRegister<CpuRegister>()); 1878 } 1879 break; 1880 default: 1881 LOG(FATAL) << "Type not recognized for poke: " << size; 1882 UNREACHABLE(); 1883 } 1884} 1885 1886void IntrinsicLocationsBuilderX86_64::VisitMemoryPokeByte(HInvoke* invoke) { 1887 CreateIntIntToVoidLocations(arena_, invoke); 1888} 1889 1890void IntrinsicCodeGeneratorX86_64::VisitMemoryPokeByte(HInvoke* invoke) { 1891 GenPoke(invoke->GetLocations(), Primitive::kPrimByte, GetAssembler()); 1892} 1893 1894void IntrinsicLocationsBuilderX86_64::VisitMemoryPokeIntNative(HInvoke* invoke) { 1895 CreateIntIntToVoidLocations(arena_, invoke); 1896} 1897 1898void IntrinsicCodeGeneratorX86_64::VisitMemoryPokeIntNative(HInvoke* invoke) { 1899 GenPoke(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler()); 1900} 1901 1902void IntrinsicLocationsBuilderX86_64::VisitMemoryPokeLongNative(HInvoke* invoke) { 1903 CreateIntIntToVoidLocations(arena_, invoke); 1904} 1905 1906void IntrinsicCodeGeneratorX86_64::VisitMemoryPokeLongNative(HInvoke* invoke) { 1907 GenPoke(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler()); 1908} 1909 1910void IntrinsicLocationsBuilderX86_64::VisitMemoryPokeShortNative(HInvoke* invoke) { 1911 CreateIntIntToVoidLocations(arena_, invoke); 1912} 1913 1914void IntrinsicCodeGeneratorX86_64::VisitMemoryPokeShortNative(HInvoke* invoke) { 1915 GenPoke(invoke->GetLocations(), Primitive::kPrimShort, GetAssembler()); 1916} 1917 1918void IntrinsicLocationsBuilderX86_64::VisitThreadCurrentThread(HInvoke* invoke) { 1919 LocationSummary* locations = new (arena_) LocationSummary(invoke, 1920 LocationSummary::kNoCall, 1921 kIntrinsified); 1922 locations->SetOut(Location::RequiresRegister()); 1923} 1924 1925void IntrinsicCodeGeneratorX86_64::VisitThreadCurrentThread(HInvoke* invoke) { 1926 CpuRegister out = invoke->GetLocations()->Out().AsRegister<CpuRegister>(); 1927 GetAssembler()->gs()->movl(out, Address::Absolute(Thread::PeerOffset<kX86_64WordSize>(), 1928 /* no_rip */ true)); 1929} 1930 1931static void GenUnsafeGet(HInvoke* invoke, 1932 Primitive::Type type, 1933 bool is_volatile ATTRIBUTE_UNUSED, 1934 CodeGeneratorX86_64* codegen) { 1935 X86_64Assembler* assembler = down_cast<X86_64Assembler*>(codegen->GetAssembler()); 1936 LocationSummary* locations = invoke->GetLocations(); 1937 Location base_loc = locations->InAt(1); 1938 CpuRegister base = base_loc.AsRegister<CpuRegister>(); 1939 Location offset_loc = locations->InAt(2); 1940 CpuRegister offset = offset_loc.AsRegister<CpuRegister>(); 1941 Location output_loc = locations->Out(); 1942 CpuRegister output = output_loc.AsRegister<CpuRegister>(); 1943 1944 switch (type) { 1945 case Primitive::kPrimInt: 1946 __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0)); 1947 break; 1948 1949 case Primitive::kPrimNot: { 1950 if (kEmitCompilerReadBarrier) { 1951 if (kUseBakerReadBarrier) { 1952 Location temp = locations->GetTemp(0); 1953 codegen->GenerateArrayLoadWithBakerReadBarrier( 1954 invoke, output_loc, base, 0U, offset_loc, temp, /* needs_null_check */ false); 1955 } else { 1956 __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0)); 1957 codegen->GenerateReadBarrierSlow( 1958 invoke, output_loc, output_loc, base_loc, 0U, offset_loc); 1959 } 1960 } else { 1961 __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0)); 1962 __ MaybeUnpoisonHeapReference(output); 1963 } 1964 break; 1965 } 1966 1967 case Primitive::kPrimLong: 1968 __ movq(output, Address(base, offset, ScaleFactor::TIMES_1, 0)); 1969 break; 1970 1971 default: 1972 LOG(FATAL) << "Unsupported op size " << type; 1973 UNREACHABLE(); 1974 } 1975} 1976 1977static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, 1978 HInvoke* invoke, 1979 Primitive::Type type) { 1980 bool can_call = kEmitCompilerReadBarrier && 1981 (invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObject || 1982 invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile); 1983 LocationSummary* locations = new (arena) LocationSummary(invoke, 1984 can_call ? 1985 LocationSummary::kCallOnSlowPath : 1986 LocationSummary::kNoCall, 1987 kIntrinsified); 1988 locations->SetInAt(0, Location::NoLocation()); // Unused receiver. 1989 locations->SetInAt(1, Location::RequiresRegister()); 1990 locations->SetInAt(2, Location::RequiresRegister()); 1991 locations->SetOut(Location::RequiresRegister()); 1992 if (type == Primitive::kPrimNot && kEmitCompilerReadBarrier && kUseBakerReadBarrier) { 1993 // We need a temporary register for the read barrier marking slow 1994 // path in InstructionCodeGeneratorX86_64::GenerateArrayLoadWithBakerReadBarrier. 1995 locations->AddTemp(Location::RequiresRegister()); 1996 } 1997} 1998 1999void IntrinsicLocationsBuilderX86_64::VisitUnsafeGet(HInvoke* invoke) { 2000 CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimInt); 2001} 2002void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetVolatile(HInvoke* invoke) { 2003 CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimInt); 2004} 2005void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetLong(HInvoke* invoke) { 2006 CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimLong); 2007} 2008void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetLongVolatile(HInvoke* invoke) { 2009 CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimLong); 2010} 2011void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetObject(HInvoke* invoke) { 2012 CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimNot); 2013} 2014void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetObjectVolatile(HInvoke* invoke) { 2015 CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimNot); 2016} 2017 2018 2019void IntrinsicCodeGeneratorX86_64::VisitUnsafeGet(HInvoke* invoke) { 2020 GenUnsafeGet(invoke, Primitive::kPrimInt, /* is_volatile */ false, codegen_); 2021} 2022void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetVolatile(HInvoke* invoke) { 2023 GenUnsafeGet(invoke, Primitive::kPrimInt, /* is_volatile */ true, codegen_); 2024} 2025void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetLong(HInvoke* invoke) { 2026 GenUnsafeGet(invoke, Primitive::kPrimLong, /* is_volatile */ false, codegen_); 2027} 2028void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetLongVolatile(HInvoke* invoke) { 2029 GenUnsafeGet(invoke, Primitive::kPrimLong, /* is_volatile */ true, codegen_); 2030} 2031void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetObject(HInvoke* invoke) { 2032 GenUnsafeGet(invoke, Primitive::kPrimNot, /* is_volatile */ false, codegen_); 2033} 2034void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetObjectVolatile(HInvoke* invoke) { 2035 GenUnsafeGet(invoke, Primitive::kPrimNot, /* is_volatile */ true, codegen_); 2036} 2037 2038 2039static void CreateIntIntIntIntToVoidPlusTempsLocations(ArenaAllocator* arena, 2040 Primitive::Type type, 2041 HInvoke* invoke) { 2042 LocationSummary* locations = new (arena) LocationSummary(invoke, 2043 LocationSummary::kNoCall, 2044 kIntrinsified); 2045 locations->SetInAt(0, Location::NoLocation()); // Unused receiver. 2046 locations->SetInAt(1, Location::RequiresRegister()); 2047 locations->SetInAt(2, Location::RequiresRegister()); 2048 locations->SetInAt(3, Location::RequiresRegister()); 2049 if (type == Primitive::kPrimNot) { 2050 // Need temp registers for card-marking. 2051 locations->AddTemp(Location::RequiresRegister()); // Possibly used for reference poisoning too. 2052 locations->AddTemp(Location::RequiresRegister()); 2053 } 2054} 2055 2056void IntrinsicLocationsBuilderX86_64::VisitUnsafePut(HInvoke* invoke) { 2057 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimInt, invoke); 2058} 2059void IntrinsicLocationsBuilderX86_64::VisitUnsafePutOrdered(HInvoke* invoke) { 2060 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimInt, invoke); 2061} 2062void IntrinsicLocationsBuilderX86_64::VisitUnsafePutVolatile(HInvoke* invoke) { 2063 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimInt, invoke); 2064} 2065void IntrinsicLocationsBuilderX86_64::VisitUnsafePutObject(HInvoke* invoke) { 2066 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimNot, invoke); 2067} 2068void IntrinsicLocationsBuilderX86_64::VisitUnsafePutObjectOrdered(HInvoke* invoke) { 2069 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimNot, invoke); 2070} 2071void IntrinsicLocationsBuilderX86_64::VisitUnsafePutObjectVolatile(HInvoke* invoke) { 2072 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimNot, invoke); 2073} 2074void IntrinsicLocationsBuilderX86_64::VisitUnsafePutLong(HInvoke* invoke) { 2075 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimLong, invoke); 2076} 2077void IntrinsicLocationsBuilderX86_64::VisitUnsafePutLongOrdered(HInvoke* invoke) { 2078 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimLong, invoke); 2079} 2080void IntrinsicLocationsBuilderX86_64::VisitUnsafePutLongVolatile(HInvoke* invoke) { 2081 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimLong, invoke); 2082} 2083 2084// We don't care for ordered: it requires an AnyStore barrier, which is already given by the x86 2085// memory model. 2086static void GenUnsafePut(LocationSummary* locations, Primitive::Type type, bool is_volatile, 2087 CodeGeneratorX86_64* codegen) { 2088 X86_64Assembler* assembler = down_cast<X86_64Assembler*>(codegen->GetAssembler()); 2089 CpuRegister base = locations->InAt(1).AsRegister<CpuRegister>(); 2090 CpuRegister offset = locations->InAt(2).AsRegister<CpuRegister>(); 2091 CpuRegister value = locations->InAt(3).AsRegister<CpuRegister>(); 2092 2093 if (type == Primitive::kPrimLong) { 2094 __ movq(Address(base, offset, ScaleFactor::TIMES_1, 0), value); 2095 } else if (kPoisonHeapReferences && type == Primitive::kPrimNot) { 2096 CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>(); 2097 __ movl(temp, value); 2098 __ PoisonHeapReference(temp); 2099 __ movl(Address(base, offset, ScaleFactor::TIMES_1, 0), temp); 2100 } else { 2101 __ movl(Address(base, offset, ScaleFactor::TIMES_1, 0), value); 2102 } 2103 2104 if (is_volatile) { 2105 codegen->MemoryFence(); 2106 } 2107 2108 if (type == Primitive::kPrimNot) { 2109 bool value_can_be_null = true; // TODO: Worth finding out this information? 2110 codegen->MarkGCCard(locations->GetTemp(0).AsRegister<CpuRegister>(), 2111 locations->GetTemp(1).AsRegister<CpuRegister>(), 2112 base, 2113 value, 2114 value_can_be_null); 2115 } 2116} 2117 2118void IntrinsicCodeGeneratorX86_64::VisitUnsafePut(HInvoke* invoke) { 2119 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, /* is_volatile */ false, codegen_); 2120} 2121void IntrinsicCodeGeneratorX86_64::VisitUnsafePutOrdered(HInvoke* invoke) { 2122 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, /* is_volatile */ false, codegen_); 2123} 2124void IntrinsicCodeGeneratorX86_64::VisitUnsafePutVolatile(HInvoke* invoke) { 2125 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, /* is_volatile */ true, codegen_); 2126} 2127void IntrinsicCodeGeneratorX86_64::VisitUnsafePutObject(HInvoke* invoke) { 2128 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, /* is_volatile */ false, codegen_); 2129} 2130void IntrinsicCodeGeneratorX86_64::VisitUnsafePutObjectOrdered(HInvoke* invoke) { 2131 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, /* is_volatile */ false, codegen_); 2132} 2133void IntrinsicCodeGeneratorX86_64::VisitUnsafePutObjectVolatile(HInvoke* invoke) { 2134 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, /* is_volatile */ true, codegen_); 2135} 2136void IntrinsicCodeGeneratorX86_64::VisitUnsafePutLong(HInvoke* invoke) { 2137 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, /* is_volatile */ false, codegen_); 2138} 2139void IntrinsicCodeGeneratorX86_64::VisitUnsafePutLongOrdered(HInvoke* invoke) { 2140 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, /* is_volatile */ false, codegen_); 2141} 2142void IntrinsicCodeGeneratorX86_64::VisitUnsafePutLongVolatile(HInvoke* invoke) { 2143 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, /* is_volatile */ true, codegen_); 2144} 2145 2146static void CreateIntIntIntIntIntToInt(ArenaAllocator* arena, Primitive::Type type, 2147 HInvoke* invoke) { 2148 LocationSummary* locations = new (arena) LocationSummary(invoke, 2149 LocationSummary::kNoCall, 2150 kIntrinsified); 2151 locations->SetInAt(0, Location::NoLocation()); // Unused receiver. 2152 locations->SetInAt(1, Location::RequiresRegister()); 2153 locations->SetInAt(2, Location::RequiresRegister()); 2154 // expected value must be in EAX/RAX. 2155 locations->SetInAt(3, Location::RegisterLocation(RAX)); 2156 locations->SetInAt(4, Location::RequiresRegister()); 2157 2158 locations->SetOut(Location::RequiresRegister()); 2159 if (type == Primitive::kPrimNot) { 2160 // Need temp registers for card-marking. 2161 locations->AddTemp(Location::RequiresRegister()); // Possibly used for reference poisoning too. 2162 locations->AddTemp(Location::RequiresRegister()); 2163 } 2164} 2165 2166void IntrinsicLocationsBuilderX86_64::VisitUnsafeCASInt(HInvoke* invoke) { 2167 CreateIntIntIntIntIntToInt(arena_, Primitive::kPrimInt, invoke); 2168} 2169 2170void IntrinsicLocationsBuilderX86_64::VisitUnsafeCASLong(HInvoke* invoke) { 2171 CreateIntIntIntIntIntToInt(arena_, Primitive::kPrimLong, invoke); 2172} 2173 2174void IntrinsicLocationsBuilderX86_64::VisitUnsafeCASObject(HInvoke* invoke) { 2175 // The UnsafeCASObject intrinsic is missing a read barrier, and 2176 // therefore sometimes does not work as expected (b/25883050). 2177 // Turn it off temporarily as a quick fix, until the read barrier is 2178 // implemented. 2179 // 2180 // TODO(rpl): Implement a read barrier in GenCAS below and re-enable 2181 // this intrinsic. 2182 if (kEmitCompilerReadBarrier) { 2183 return; 2184 } 2185 2186 CreateIntIntIntIntIntToInt(arena_, Primitive::kPrimNot, invoke); 2187} 2188 2189static void GenCAS(Primitive::Type type, HInvoke* invoke, CodeGeneratorX86_64* codegen) { 2190 X86_64Assembler* assembler = down_cast<X86_64Assembler*>(codegen->GetAssembler()); 2191 LocationSummary* locations = invoke->GetLocations(); 2192 2193 CpuRegister base = locations->InAt(1).AsRegister<CpuRegister>(); 2194 CpuRegister offset = locations->InAt(2).AsRegister<CpuRegister>(); 2195 CpuRegister expected = locations->InAt(3).AsRegister<CpuRegister>(); 2196 // Ensure `expected` is in RAX (required by the CMPXCHG instruction). 2197 DCHECK_EQ(expected.AsRegister(), RAX); 2198 CpuRegister value = locations->InAt(4).AsRegister<CpuRegister>(); 2199 CpuRegister out = locations->Out().AsRegister<CpuRegister>(); 2200 2201 if (type == Primitive::kPrimNot) { 2202 // Mark card for object assuming new value is stored. 2203 bool value_can_be_null = true; // TODO: Worth finding out this information? 2204 codegen->MarkGCCard(locations->GetTemp(0).AsRegister<CpuRegister>(), 2205 locations->GetTemp(1).AsRegister<CpuRegister>(), 2206 base, 2207 value, 2208 value_can_be_null); 2209 2210 bool base_equals_value = (base.AsRegister() == value.AsRegister()); 2211 Register value_reg = value.AsRegister(); 2212 if (kPoisonHeapReferences) { 2213 if (base_equals_value) { 2214 // If `base` and `value` are the same register location, move 2215 // `value_reg` to a temporary register. This way, poisoning 2216 // `value_reg` won't invalidate `base`. 2217 value_reg = locations->GetTemp(0).AsRegister<CpuRegister>().AsRegister(); 2218 __ movl(CpuRegister(value_reg), base); 2219 } 2220 2221 // Check that the register allocator did not assign the location 2222 // of `expected` (RAX) to `value` nor to `base`, so that heap 2223 // poisoning (when enabled) works as intended below. 2224 // - If `value` were equal to `expected`, both references would 2225 // be poisoned twice, meaning they would not be poisoned at 2226 // all, as heap poisoning uses address negation. 2227 // - If `base` were equal to `expected`, poisoning `expected` 2228 // would invalidate `base`. 2229 DCHECK_NE(value_reg, expected.AsRegister()); 2230 DCHECK_NE(base.AsRegister(), expected.AsRegister()); 2231 2232 __ PoisonHeapReference(expected); 2233 __ PoisonHeapReference(CpuRegister(value_reg)); 2234 } 2235 2236 // TODO: Add a read barrier for the reference stored in the object 2237 // before attempting the CAS, similar to the one in the 2238 // art::Unsafe_compareAndSwapObject JNI implementation. 2239 // 2240 // Note that this code is not (yet) used when read barriers are 2241 // enabled (see IntrinsicLocationsBuilderX86_64::VisitUnsafeCASObject). 2242 DCHECK(!kEmitCompilerReadBarrier); 2243 __ LockCmpxchgl(Address(base, offset, TIMES_1, 0), CpuRegister(value_reg)); 2244 2245 // LOCK CMPXCHG has full barrier semantics, and we don't need 2246 // scheduling barriers at this time. 2247 2248 // Convert ZF into the boolean result. 2249 __ setcc(kZero, out); 2250 __ movzxb(out, out); 2251 2252 // If heap poisoning is enabled, we need to unpoison the values 2253 // that were poisoned earlier. 2254 if (kPoisonHeapReferences) { 2255 if (base_equals_value) { 2256 // `value_reg` has been moved to a temporary register, no need 2257 // to unpoison it. 2258 } else { 2259 // Ensure `value` is different from `out`, so that unpoisoning 2260 // the former does not invalidate the latter. 2261 DCHECK_NE(value_reg, out.AsRegister()); 2262 __ UnpoisonHeapReference(CpuRegister(value_reg)); 2263 } 2264 // Ensure `expected` is different from `out`, so that unpoisoning 2265 // the former does not invalidate the latter. 2266 DCHECK_NE(expected.AsRegister(), out.AsRegister()); 2267 __ UnpoisonHeapReference(expected); 2268 } 2269 } else { 2270 if (type == Primitive::kPrimInt) { 2271 __ LockCmpxchgl(Address(base, offset, TIMES_1, 0), value); 2272 } else if (type == Primitive::kPrimLong) { 2273 __ LockCmpxchgq(Address(base, offset, TIMES_1, 0), value); 2274 } else { 2275 LOG(FATAL) << "Unexpected CAS type " << type; 2276 } 2277 2278 // LOCK CMPXCHG has full barrier semantics, and we don't need 2279 // scheduling barriers at this time. 2280 2281 // Convert ZF into the boolean result. 2282 __ setcc(kZero, out); 2283 __ movzxb(out, out); 2284 } 2285} 2286 2287void IntrinsicCodeGeneratorX86_64::VisitUnsafeCASInt(HInvoke* invoke) { 2288 GenCAS(Primitive::kPrimInt, invoke, codegen_); 2289} 2290 2291void IntrinsicCodeGeneratorX86_64::VisitUnsafeCASLong(HInvoke* invoke) { 2292 GenCAS(Primitive::kPrimLong, invoke, codegen_); 2293} 2294 2295void IntrinsicCodeGeneratorX86_64::VisitUnsafeCASObject(HInvoke* invoke) { 2296 GenCAS(Primitive::kPrimNot, invoke, codegen_); 2297} 2298 2299void IntrinsicLocationsBuilderX86_64::VisitIntegerReverse(HInvoke* invoke) { 2300 LocationSummary* locations = new (arena_) LocationSummary(invoke, 2301 LocationSummary::kNoCall, 2302 kIntrinsified); 2303 locations->SetInAt(0, Location::RequiresRegister()); 2304 locations->SetOut(Location::SameAsFirstInput()); 2305 locations->AddTemp(Location::RequiresRegister()); 2306} 2307 2308static void SwapBits(CpuRegister reg, CpuRegister temp, int32_t shift, int32_t mask, 2309 X86_64Assembler* assembler) { 2310 Immediate imm_shift(shift); 2311 Immediate imm_mask(mask); 2312 __ movl(temp, reg); 2313 __ shrl(reg, imm_shift); 2314 __ andl(temp, imm_mask); 2315 __ andl(reg, imm_mask); 2316 __ shll(temp, imm_shift); 2317 __ orl(reg, temp); 2318} 2319 2320void IntrinsicCodeGeneratorX86_64::VisitIntegerReverse(HInvoke* invoke) { 2321 X86_64Assembler* assembler = GetAssembler(); 2322 LocationSummary* locations = invoke->GetLocations(); 2323 2324 CpuRegister reg = locations->InAt(0).AsRegister<CpuRegister>(); 2325 CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>(); 2326 2327 /* 2328 * Use one bswap instruction to reverse byte order first and then use 3 rounds of 2329 * swapping bits to reverse bits in a number x. Using bswap to save instructions 2330 * compared to generic luni implementation which has 5 rounds of swapping bits. 2331 * x = bswap x 2332 * x = (x & 0x55555555) << 1 | (x >> 1) & 0x55555555; 2333 * x = (x & 0x33333333) << 2 | (x >> 2) & 0x33333333; 2334 * x = (x & 0x0F0F0F0F) << 4 | (x >> 4) & 0x0F0F0F0F; 2335 */ 2336 __ bswapl(reg); 2337 SwapBits(reg, temp, 1, 0x55555555, assembler); 2338 SwapBits(reg, temp, 2, 0x33333333, assembler); 2339 SwapBits(reg, temp, 4, 0x0f0f0f0f, assembler); 2340} 2341 2342void IntrinsicLocationsBuilderX86_64::VisitLongReverse(HInvoke* invoke) { 2343 LocationSummary* locations = new (arena_) LocationSummary(invoke, 2344 LocationSummary::kNoCall, 2345 kIntrinsified); 2346 locations->SetInAt(0, Location::RequiresRegister()); 2347 locations->SetOut(Location::SameAsFirstInput()); 2348 locations->AddTemp(Location::RequiresRegister()); 2349 locations->AddTemp(Location::RequiresRegister()); 2350} 2351 2352static void SwapBits64(CpuRegister reg, CpuRegister temp, CpuRegister temp_mask, 2353 int32_t shift, int64_t mask, X86_64Assembler* assembler) { 2354 Immediate imm_shift(shift); 2355 __ movq(temp_mask, Immediate(mask)); 2356 __ movq(temp, reg); 2357 __ shrq(reg, imm_shift); 2358 __ andq(temp, temp_mask); 2359 __ andq(reg, temp_mask); 2360 __ shlq(temp, imm_shift); 2361 __ orq(reg, temp); 2362} 2363 2364void IntrinsicCodeGeneratorX86_64::VisitLongReverse(HInvoke* invoke) { 2365 X86_64Assembler* assembler = GetAssembler(); 2366 LocationSummary* locations = invoke->GetLocations(); 2367 2368 CpuRegister reg = locations->InAt(0).AsRegister<CpuRegister>(); 2369 CpuRegister temp1 = locations->GetTemp(0).AsRegister<CpuRegister>(); 2370 CpuRegister temp2 = locations->GetTemp(1).AsRegister<CpuRegister>(); 2371 2372 /* 2373 * Use one bswap instruction to reverse byte order first and then use 3 rounds of 2374 * swapping bits to reverse bits in a long number x. Using bswap to save instructions 2375 * compared to generic luni implementation which has 5 rounds of swapping bits. 2376 * x = bswap x 2377 * x = (x & 0x5555555555555555) << 1 | (x >> 1) & 0x5555555555555555; 2378 * x = (x & 0x3333333333333333) << 2 | (x >> 2) & 0x3333333333333333; 2379 * x = (x & 0x0F0F0F0F0F0F0F0F) << 4 | (x >> 4) & 0x0F0F0F0F0F0F0F0F; 2380 */ 2381 __ bswapq(reg); 2382 SwapBits64(reg, temp1, temp2, 1, INT64_C(0x5555555555555555), assembler); 2383 SwapBits64(reg, temp1, temp2, 2, INT64_C(0x3333333333333333), assembler); 2384 SwapBits64(reg, temp1, temp2, 4, INT64_C(0x0f0f0f0f0f0f0f0f), assembler); 2385} 2386 2387static void CreateBitCountLocations( 2388 ArenaAllocator* arena, CodeGeneratorX86_64* codegen, HInvoke* invoke) { 2389 if (!codegen->GetInstructionSetFeatures().HasPopCnt()) { 2390 // Do nothing if there is no popcnt support. This results in generating 2391 // a call for the intrinsic rather than direct code. 2392 return; 2393 } 2394 LocationSummary* locations = new (arena) LocationSummary(invoke, 2395 LocationSummary::kNoCall, 2396 kIntrinsified); 2397 locations->SetInAt(0, Location::Any()); 2398 locations->SetOut(Location::RequiresRegister()); 2399} 2400 2401static void GenBitCount(X86_64Assembler* assembler, 2402 CodeGeneratorX86_64* codegen, 2403 HInvoke* invoke, 2404 bool is_long) { 2405 LocationSummary* locations = invoke->GetLocations(); 2406 Location src = locations->InAt(0); 2407 CpuRegister out = locations->Out().AsRegister<CpuRegister>(); 2408 2409 if (invoke->InputAt(0)->IsConstant()) { 2410 // Evaluate this at compile time. 2411 int64_t value = Int64FromConstant(invoke->InputAt(0)->AsConstant()); 2412 int32_t result = is_long 2413 ? POPCOUNT(static_cast<uint64_t>(value)) 2414 : POPCOUNT(static_cast<uint32_t>(value)); 2415 codegen->Load32BitValue(out, result); 2416 return; 2417 } 2418 2419 if (src.IsRegister()) { 2420 if (is_long) { 2421 __ popcntq(out, src.AsRegister<CpuRegister>()); 2422 } else { 2423 __ popcntl(out, src.AsRegister<CpuRegister>()); 2424 } 2425 } else if (is_long) { 2426 DCHECK(src.IsDoubleStackSlot()); 2427 __ popcntq(out, Address(CpuRegister(RSP), src.GetStackIndex())); 2428 } else { 2429 DCHECK(src.IsStackSlot()); 2430 __ popcntl(out, Address(CpuRegister(RSP), src.GetStackIndex())); 2431 } 2432} 2433 2434void IntrinsicLocationsBuilderX86_64::VisitIntegerBitCount(HInvoke* invoke) { 2435 CreateBitCountLocations(arena_, codegen_, invoke); 2436} 2437 2438void IntrinsicCodeGeneratorX86_64::VisitIntegerBitCount(HInvoke* invoke) { 2439 GenBitCount(GetAssembler(), codegen_, invoke, /* is_long */ false); 2440} 2441 2442void IntrinsicLocationsBuilderX86_64::VisitLongBitCount(HInvoke* invoke) { 2443 CreateBitCountLocations(arena_, codegen_, invoke); 2444} 2445 2446void IntrinsicCodeGeneratorX86_64::VisitLongBitCount(HInvoke* invoke) { 2447 GenBitCount(GetAssembler(), codegen_, invoke, /* is_long */ true); 2448} 2449 2450static void CreateOneBitLocations(ArenaAllocator* arena, HInvoke* invoke, bool is_high) { 2451 LocationSummary* locations = new (arena) LocationSummary(invoke, 2452 LocationSummary::kNoCall, 2453 kIntrinsified); 2454 locations->SetInAt(0, Location::Any()); 2455 locations->SetOut(Location::RequiresRegister()); 2456 locations->AddTemp(is_high ? Location::RegisterLocation(RCX) // needs CL 2457 : Location::RequiresRegister()); // any will do 2458} 2459 2460static void GenOneBit(X86_64Assembler* assembler, 2461 CodeGeneratorX86_64* codegen, 2462 HInvoke* invoke, 2463 bool is_high, bool is_long) { 2464 LocationSummary* locations = invoke->GetLocations(); 2465 Location src = locations->InAt(0); 2466 CpuRegister out = locations->Out().AsRegister<CpuRegister>(); 2467 2468 if (invoke->InputAt(0)->IsConstant()) { 2469 // Evaluate this at compile time. 2470 int64_t value = Int64FromConstant(invoke->InputAt(0)->AsConstant()); 2471 if (value == 0) { 2472 __ xorl(out, out); // Clears upper bits too. 2473 return; 2474 } 2475 // Nonzero value. 2476 if (is_high) { 2477 value = is_long ? 63 - CLZ(static_cast<uint64_t>(value)) 2478 : 31 - CLZ(static_cast<uint32_t>(value)); 2479 } else { 2480 value = is_long ? CTZ(static_cast<uint64_t>(value)) 2481 : CTZ(static_cast<uint32_t>(value)); 2482 } 2483 if (is_long) { 2484 codegen->Load64BitValue(out, 1L << value); 2485 } else { 2486 codegen->Load32BitValue(out, 1 << value); 2487 } 2488 return; 2489 } 2490 2491 // Handle the non-constant cases. 2492 CpuRegister tmp = locations->GetTemp(0).AsRegister<CpuRegister>(); 2493 if (is_high) { 2494 // Use architectural support: basically 1 << bsr. 2495 if (src.IsRegister()) { 2496 if (is_long) { 2497 __ bsrq(tmp, src.AsRegister<CpuRegister>()); 2498 } else { 2499 __ bsrl(tmp, src.AsRegister<CpuRegister>()); 2500 } 2501 } else if (is_long) { 2502 DCHECK(src.IsDoubleStackSlot()); 2503 __ bsrq(tmp, Address(CpuRegister(RSP), src.GetStackIndex())); 2504 } else { 2505 DCHECK(src.IsStackSlot()); 2506 __ bsrl(tmp, Address(CpuRegister(RSP), src.GetStackIndex())); 2507 } 2508 // BSR sets ZF if the input was zero. 2509 NearLabel is_zero, done; 2510 __ j(kEqual, &is_zero); 2511 __ movl(out, Immediate(1)); // Clears upper bits too. 2512 if (is_long) { 2513 __ shlq(out, tmp); 2514 } else { 2515 __ shll(out, tmp); 2516 } 2517 __ jmp(&done); 2518 __ Bind(&is_zero); 2519 __ xorl(out, out); // Clears upper bits too. 2520 __ Bind(&done); 2521 } else { 2522 // Copy input into temporary. 2523 if (src.IsRegister()) { 2524 if (is_long) { 2525 __ movq(tmp, src.AsRegister<CpuRegister>()); 2526 } else { 2527 __ movl(tmp, src.AsRegister<CpuRegister>()); 2528 } 2529 } else if (is_long) { 2530 DCHECK(src.IsDoubleStackSlot()); 2531 __ movq(tmp, Address(CpuRegister(RSP), src.GetStackIndex())); 2532 } else { 2533 DCHECK(src.IsStackSlot()); 2534 __ movl(tmp, Address(CpuRegister(RSP), src.GetStackIndex())); 2535 } 2536 // Do the bit twiddling: basically tmp & -tmp; 2537 if (is_long) { 2538 __ movq(out, tmp); 2539 __ negq(tmp); 2540 __ andq(out, tmp); 2541 } else { 2542 __ movl(out, tmp); 2543 __ negl(tmp); 2544 __ andl(out, tmp); 2545 } 2546 } 2547} 2548 2549void IntrinsicLocationsBuilderX86_64::VisitIntegerHighestOneBit(HInvoke* invoke) { 2550 CreateOneBitLocations(arena_, invoke, /* is_high */ true); 2551} 2552 2553void IntrinsicCodeGeneratorX86_64::VisitIntegerHighestOneBit(HInvoke* invoke) { 2554 GenOneBit(GetAssembler(), codegen_, invoke, /* is_high */ true, /* is_long */ false); 2555} 2556 2557void IntrinsicLocationsBuilderX86_64::VisitLongHighestOneBit(HInvoke* invoke) { 2558 CreateOneBitLocations(arena_, invoke, /* is_high */ true); 2559} 2560 2561void IntrinsicCodeGeneratorX86_64::VisitLongHighestOneBit(HInvoke* invoke) { 2562 GenOneBit(GetAssembler(), codegen_, invoke, /* is_high */ true, /* is_long */ true); 2563} 2564 2565void IntrinsicLocationsBuilderX86_64::VisitIntegerLowestOneBit(HInvoke* invoke) { 2566 CreateOneBitLocations(arena_, invoke, /* is_high */ false); 2567} 2568 2569void IntrinsicCodeGeneratorX86_64::VisitIntegerLowestOneBit(HInvoke* invoke) { 2570 GenOneBit(GetAssembler(), codegen_, invoke, /* is_high */ false, /* is_long */ false); 2571} 2572 2573void IntrinsicLocationsBuilderX86_64::VisitLongLowestOneBit(HInvoke* invoke) { 2574 CreateOneBitLocations(arena_, invoke, /* is_high */ false); 2575} 2576 2577void IntrinsicCodeGeneratorX86_64::VisitLongLowestOneBit(HInvoke* invoke) { 2578 GenOneBit(GetAssembler(), codegen_, invoke, /* is_high */ false, /* is_long */ true); 2579} 2580 2581static void CreateLeadingZeroLocations(ArenaAllocator* arena, HInvoke* invoke) { 2582 LocationSummary* locations = new (arena) LocationSummary(invoke, 2583 LocationSummary::kNoCall, 2584 kIntrinsified); 2585 locations->SetInAt(0, Location::Any()); 2586 locations->SetOut(Location::RequiresRegister()); 2587} 2588 2589static void GenLeadingZeros(X86_64Assembler* assembler, 2590 CodeGeneratorX86_64* codegen, 2591 HInvoke* invoke, bool is_long) { 2592 LocationSummary* locations = invoke->GetLocations(); 2593 Location src = locations->InAt(0); 2594 CpuRegister out = locations->Out().AsRegister<CpuRegister>(); 2595 2596 int zero_value_result = is_long ? 64 : 32; 2597 if (invoke->InputAt(0)->IsConstant()) { 2598 // Evaluate this at compile time. 2599 int64_t value = Int64FromConstant(invoke->InputAt(0)->AsConstant()); 2600 if (value == 0) { 2601 value = zero_value_result; 2602 } else { 2603 value = is_long ? CLZ(static_cast<uint64_t>(value)) : CLZ(static_cast<uint32_t>(value)); 2604 } 2605 codegen->Load32BitValue(out, value); 2606 return; 2607 } 2608 2609 // Handle the non-constant cases. 2610 if (src.IsRegister()) { 2611 if (is_long) { 2612 __ bsrq(out, src.AsRegister<CpuRegister>()); 2613 } else { 2614 __ bsrl(out, src.AsRegister<CpuRegister>()); 2615 } 2616 } else if (is_long) { 2617 DCHECK(src.IsDoubleStackSlot()); 2618 __ bsrq(out, Address(CpuRegister(RSP), src.GetStackIndex())); 2619 } else { 2620 DCHECK(src.IsStackSlot()); 2621 __ bsrl(out, Address(CpuRegister(RSP), src.GetStackIndex())); 2622 } 2623 2624 // BSR sets ZF if the input was zero, and the output is undefined. 2625 NearLabel is_zero, done; 2626 __ j(kEqual, &is_zero); 2627 2628 // Correct the result from BSR to get the CLZ result. 2629 __ xorl(out, Immediate(zero_value_result - 1)); 2630 __ jmp(&done); 2631 2632 // Fix the zero case with the expected result. 2633 __ Bind(&is_zero); 2634 __ movl(out, Immediate(zero_value_result)); 2635 2636 __ Bind(&done); 2637} 2638 2639void IntrinsicLocationsBuilderX86_64::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) { 2640 CreateLeadingZeroLocations(arena_, invoke); 2641} 2642 2643void IntrinsicCodeGeneratorX86_64::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) { 2644 GenLeadingZeros(GetAssembler(), codegen_, invoke, /* is_long */ false); 2645} 2646 2647void IntrinsicLocationsBuilderX86_64::VisitLongNumberOfLeadingZeros(HInvoke* invoke) { 2648 CreateLeadingZeroLocations(arena_, invoke); 2649} 2650 2651void IntrinsicCodeGeneratorX86_64::VisitLongNumberOfLeadingZeros(HInvoke* invoke) { 2652 GenLeadingZeros(GetAssembler(), codegen_, invoke, /* is_long */ true); 2653} 2654 2655static void CreateTrailingZeroLocations(ArenaAllocator* arena, HInvoke* invoke) { 2656 LocationSummary* locations = new (arena) LocationSummary(invoke, 2657 LocationSummary::kNoCall, 2658 kIntrinsified); 2659 locations->SetInAt(0, Location::Any()); 2660 locations->SetOut(Location::RequiresRegister()); 2661} 2662 2663static void GenTrailingZeros(X86_64Assembler* assembler, 2664 CodeGeneratorX86_64* codegen, 2665 HInvoke* invoke, bool is_long) { 2666 LocationSummary* locations = invoke->GetLocations(); 2667 Location src = locations->InAt(0); 2668 CpuRegister out = locations->Out().AsRegister<CpuRegister>(); 2669 2670 int zero_value_result = is_long ? 64 : 32; 2671 if (invoke->InputAt(0)->IsConstant()) { 2672 // Evaluate this at compile time. 2673 int64_t value = Int64FromConstant(invoke->InputAt(0)->AsConstant()); 2674 if (value == 0) { 2675 value = zero_value_result; 2676 } else { 2677 value = is_long ? CTZ(static_cast<uint64_t>(value)) : CTZ(static_cast<uint32_t>(value)); 2678 } 2679 codegen->Load32BitValue(out, value); 2680 return; 2681 } 2682 2683 // Handle the non-constant cases. 2684 if (src.IsRegister()) { 2685 if (is_long) { 2686 __ bsfq(out, src.AsRegister<CpuRegister>()); 2687 } else { 2688 __ bsfl(out, src.AsRegister<CpuRegister>()); 2689 } 2690 } else if (is_long) { 2691 DCHECK(src.IsDoubleStackSlot()); 2692 __ bsfq(out, Address(CpuRegister(RSP), src.GetStackIndex())); 2693 } else { 2694 DCHECK(src.IsStackSlot()); 2695 __ bsfl(out, Address(CpuRegister(RSP), src.GetStackIndex())); 2696 } 2697 2698 // BSF sets ZF if the input was zero, and the output is undefined. 2699 NearLabel done; 2700 __ j(kNotEqual, &done); 2701 2702 // Fix the zero case with the expected result. 2703 __ movl(out, Immediate(zero_value_result)); 2704 2705 __ Bind(&done); 2706} 2707 2708void IntrinsicLocationsBuilderX86_64::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) { 2709 CreateTrailingZeroLocations(arena_, invoke); 2710} 2711 2712void IntrinsicCodeGeneratorX86_64::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) { 2713 GenTrailingZeros(GetAssembler(), codegen_, invoke, /* is_long */ false); 2714} 2715 2716void IntrinsicLocationsBuilderX86_64::VisitLongNumberOfTrailingZeros(HInvoke* invoke) { 2717 CreateTrailingZeroLocations(arena_, invoke); 2718} 2719 2720void IntrinsicCodeGeneratorX86_64::VisitLongNumberOfTrailingZeros(HInvoke* invoke) { 2721 GenTrailingZeros(GetAssembler(), codegen_, invoke, /* is_long */ true); 2722} 2723 2724UNIMPLEMENTED_INTRINSIC(X86_64, ReferenceGetReferent) 2725UNIMPLEMENTED_INTRINSIC(X86_64, FloatIsInfinite) 2726UNIMPLEMENTED_INTRINSIC(X86_64, DoubleIsInfinite) 2727 2728// 1.8. 2729UNIMPLEMENTED_INTRINSIC(X86_64, UnsafeGetAndAddInt) 2730UNIMPLEMENTED_INTRINSIC(X86_64, UnsafeGetAndAddLong) 2731UNIMPLEMENTED_INTRINSIC(X86_64, UnsafeGetAndSetInt) 2732UNIMPLEMENTED_INTRINSIC(X86_64, UnsafeGetAndSetLong) 2733UNIMPLEMENTED_INTRINSIC(X86_64, UnsafeGetAndSetObject) 2734 2735UNREACHABLE_INTRINSICS(X86_64) 2736 2737#undef __ 2738 2739} // namespace x86_64 2740} // namespace art 2741