19ad441ffec97db647fee3725b3424284fb913e14Howard Hinnant// This file is dual licensed under the MIT and the University of Illinois Open 29ad441ffec97db647fee3725b3424284fb913e14Howard Hinnant// Source Licenses. See LICENSE.TXT for details. 3b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar 419336a2d6b9b375ac106125950f4ff09742d1aecDaniel Dunbar#include "../assembly.h" 519336a2d6b9b375ac106125950f4ff09742d1aecDaniel Dunbar 6b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar// di_int __lshrdi3(di_int input, int count); 7b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar 8b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar// This routine has some extra memory traffic, loading the 64-bit input via two 9b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar// 32-bit loads, then immediately storing it back to the stack via a single 64-bit 10b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar// store. This is to avoid a write-small, read-large stall. 11b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar// However, if callers of this routine can be safely assumed to store the argument 12b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar// via a 64-bt store, this is unnecessary memory traffic, and should be avoided. 13b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar// It can be turned off by defining the TRUST_CALLERS_USE_64_BIT_STORES macro. 14b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar 15b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar#ifdef __i386__ 16b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar#ifdef __SSE2__ 17b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar 18b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar.text 192d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines.balign 4 20b4b1e8c5085cf83a50242057775a33ae4323d402Daniel DunbarDEFINE_COMPILERRT_FUNCTION(__lshrdi3) 21b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar movd 12(%esp), %xmm2 // Load count 22b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar#ifndef TRUST_CALLERS_USE_64_BIT_STORES 23b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar movd 4(%esp), %xmm0 24b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar movd 8(%esp), %xmm1 25b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar punpckldq %xmm1, %xmm0 // Load input 26b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar#else 27b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar movq 4(%esp), %xmm0 // Load input 28b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar#endif 29b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar psrlq %xmm2, %xmm0 // shift input by count 30b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar movd %xmm0, %eax 31b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar psrlq $32, %xmm0 32b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar movd %xmm0, %edx 33b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar ret 342d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen HinesEND_COMPILERRT_FUNCTION(__lshrdi3) 35b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar 36b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar#else // Use GPRs instead of SSE2 instructions, if they aren't available. 37b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar 38b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar.text 392d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines.balign 4 40b4b1e8c5085cf83a50242057775a33ae4323d402Daniel DunbarDEFINE_COMPILERRT_FUNCTION(__lshrdi3) 41b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar movl 12(%esp), %ecx // Load count 42b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar movl 8(%esp), %edx // Load high 43b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar movl 4(%esp), %eax // Load low 44b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar 45b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar testl $0x20, %ecx // If count >= 32 46d02ea672969690c678a491257240267d8335e266Eli Friedman jnz 1f // goto 1 47d02ea672969690c678a491257240267d8335e266Eli Friedman 48d02ea672969690c678a491257240267d8335e266Eli Friedman shrdl %cl, %edx, %eax // right shift low by count 49b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar shrl %cl, %edx // right shift high by count 50d02ea672969690c678a491257240267d8335e266Eli Friedman ret 51b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar 52d02ea672969690c678a491257240267d8335e266Eli Friedman1: movl %edx, %eax // Move high to low 53b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar xorl %edx, %edx // clear high 54b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar shrl %cl, %eax // shift low by count - 32 55b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar ret 562d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen HinesEND_COMPILERRT_FUNCTION(__lshrdi3) 572d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines 58b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar#endif // __SSE2__ 59b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar#endif // __i386__ 60c58a43648cd6121c51a2e795a28e2ef90d7813e6Pirama Arumuga Nainar 61c58a43648cd6121c51a2e795a28e2ef90d7813e6Pirama Arumuga NainarNO_EXEC_STACK_DIRECTIVE 62c58a43648cd6121c51a2e795a28e2ef90d7813e6Pirama Arumuga Nainar 63