19ad441ffec97db647fee3725b3424284fb913e14Howard Hinnant// This file is dual licensed under the MIT and the University of Illinois Open
29ad441ffec97db647fee3725b3424284fb913e14Howard Hinnant// Source Licenses. See LICENSE.TXT for details.
3b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar
419336a2d6b9b375ac106125950f4ff09742d1aecDaniel Dunbar#include "../assembly.h"
519336a2d6b9b375ac106125950f4ff09742d1aecDaniel Dunbar
6b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar// di_int __lshrdi3(di_int input, int count);
7b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar
8b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar// This routine has some extra memory traffic, loading the 64-bit input via two
9b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar// 32-bit loads, then immediately storing it back to the stack via a single 64-bit
10b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar// store.  This is to avoid a write-small, read-large stall.
11b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar// However, if callers of this routine can be safely assumed to store the argument
12b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar// via a 64-bt store, this is unnecessary memory traffic, and should be avoided.
13b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar// It can be turned off by defining the TRUST_CALLERS_USE_64_BIT_STORES macro.
14b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar
15b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar#ifdef __i386__
16b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar#ifdef __SSE2__
17b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar
18b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar.text
192d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines.balign 4
20b4b1e8c5085cf83a50242057775a33ae4323d402Daniel DunbarDEFINE_COMPILERRT_FUNCTION(__lshrdi3)
21b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar	movd	  12(%esp),		%xmm2	// Load count
22b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar#ifndef TRUST_CALLERS_USE_64_BIT_STORES
23b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar	movd	   4(%esp),		%xmm0
24b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar	movd	   8(%esp),		%xmm1
25b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar	punpckldq	%xmm1,		%xmm0	// Load input
26b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar#else
27b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar	movq	   4(%esp),		%xmm0	// Load input
28b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar#endif
29b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar	psrlq		%xmm2,		%xmm0	// shift input by count
30b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar	movd		%xmm0,		%eax
31b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar	psrlq		$32,		%xmm0
32b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar	movd		%xmm0,		%edx
33b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar	ret
342d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen HinesEND_COMPILERRT_FUNCTION(__lshrdi3)
35b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar
36b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar#else // Use GPRs instead of SSE2 instructions, if they aren't available.
37b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar
38b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar.text
392d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines.balign 4
40b4b1e8c5085cf83a50242057775a33ae4323d402Daniel DunbarDEFINE_COMPILERRT_FUNCTION(__lshrdi3)
41b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar	movl	  12(%esp),		%ecx	// Load count
42b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar	movl	   8(%esp),		%edx	// Load high
43b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar	movl	   4(%esp),		%eax	// Load low
44b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar
45b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar	testl		$0x20,		%ecx	// If count >= 32
46d02ea672969690c678a491257240267d8335e266Eli Friedman	jnz			1f					//    goto 1
47d02ea672969690c678a491257240267d8335e266Eli Friedman
48d02ea672969690c678a491257240267d8335e266Eli Friedman	shrdl		%cl, %edx,	%eax	// right shift low by count
49b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar	shrl		%cl,		%edx	// right shift high by count
50d02ea672969690c678a491257240267d8335e266Eli Friedman	ret
51b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar
52d02ea672969690c678a491257240267d8335e266Eli Friedman1:	movl		%edx,		%eax	// Move high to low
53b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar	xorl		%edx,		%edx	// clear high
54b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar	shrl		%cl,		%eax	// shift low by count - 32
55b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar	ret
562d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen HinesEND_COMPILERRT_FUNCTION(__lshrdi3)
572d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines
58b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar#endif // __SSE2__
59b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar#endif // __i386__
60c58a43648cd6121c51a2e795a28e2ef90d7813e6Pirama Arumuga Nainar
61c58a43648cd6121c51a2e795a28e2ef90d7813e6Pirama Arumuga NainarNO_EXEC_STACK_DIRECTIVE
62c58a43648cd6121c51a2e795a28e2ef90d7813e6Pirama Arumuga Nainar
63