177ed6142daed1e068fbda64405d0de9845e40e1Shih-wei Liao// This file is distributed under the University of Illinois Open Source 277ed6142daed1e068fbda64405d0de9845e40e1Shih-wei Liao// License. See LICENSE.TXT for details. 377ed6142daed1e068fbda64405d0de9845e40e1Shih-wei Liao 477ed6142daed1e068fbda64405d0de9845e40e1Shih-wei Liao#include "../assembly.h" 577ed6142daed1e068fbda64405d0de9845e40e1Shih-wei Liao 677ed6142daed1e068fbda64405d0de9845e40e1Shih-wei Liao// di_int __lshrdi3(di_int input, int count); 777ed6142daed1e068fbda64405d0de9845e40e1Shih-wei Liao 877ed6142daed1e068fbda64405d0de9845e40e1Shih-wei Liao// This routine has some extra memory traffic, loading the 64-bit input via two 977ed6142daed1e068fbda64405d0de9845e40e1Shih-wei Liao// 32-bit loads, then immediately storing it back to the stack via a single 64-bit 1077ed6142daed1e068fbda64405d0de9845e40e1Shih-wei Liao// store. This is to avoid a write-small, read-large stall. 1177ed6142daed1e068fbda64405d0de9845e40e1Shih-wei Liao// However, if callers of this routine can be safely assumed to store the argument 1277ed6142daed1e068fbda64405d0de9845e40e1Shih-wei Liao// via a 64-bt store, this is unnecessary memory traffic, and should be avoided. 1377ed6142daed1e068fbda64405d0de9845e40e1Shih-wei Liao// It can be turned off by defining the TRUST_CALLERS_USE_64_BIT_STORES macro. 1477ed6142daed1e068fbda64405d0de9845e40e1Shih-wei Liao 1577ed6142daed1e068fbda64405d0de9845e40e1Shih-wei Liao#ifdef __i386__ 1677ed6142daed1e068fbda64405d0de9845e40e1Shih-wei Liao#ifdef __SSE2__ 1777ed6142daed1e068fbda64405d0de9845e40e1Shih-wei Liao 1877ed6142daed1e068fbda64405d0de9845e40e1Shih-wei Liao.text 1977ed6142daed1e068fbda64405d0de9845e40e1Shih-wei Liao.align 4 2077ed6142daed1e068fbda64405d0de9845e40e1Shih-wei LiaoDEFINE_COMPILERRT_FUNCTION(__lshrdi3) 2177ed6142daed1e068fbda64405d0de9845e40e1Shih-wei Liao movd 12(%esp), %xmm2 // Load count 2277ed6142daed1e068fbda64405d0de9845e40e1Shih-wei Liao#ifndef TRUST_CALLERS_USE_64_BIT_STORES 2377ed6142daed1e068fbda64405d0de9845e40e1Shih-wei Liao movd 4(%esp), %xmm0 2477ed6142daed1e068fbda64405d0de9845e40e1Shih-wei Liao movd 8(%esp), %xmm1 2577ed6142daed1e068fbda64405d0de9845e40e1Shih-wei Liao punpckldq %xmm1, %xmm0 // Load input 2677ed6142daed1e068fbda64405d0de9845e40e1Shih-wei Liao#else 2777ed6142daed1e068fbda64405d0de9845e40e1Shih-wei Liao movq 4(%esp), %xmm0 // Load input 2877ed6142daed1e068fbda64405d0de9845e40e1Shih-wei Liao#endif 2977ed6142daed1e068fbda64405d0de9845e40e1Shih-wei Liao psrlq %xmm2, %xmm0 // shift input by count 3077ed6142daed1e068fbda64405d0de9845e40e1Shih-wei Liao movd %xmm0, %eax 3177ed6142daed1e068fbda64405d0de9845e40e1Shih-wei Liao psrlq $32, %xmm0 3277ed6142daed1e068fbda64405d0de9845e40e1Shih-wei Liao movd %xmm0, %edx 3377ed6142daed1e068fbda64405d0de9845e40e1Shih-wei Liao ret 3477ed6142daed1e068fbda64405d0de9845e40e1Shih-wei Liao 3577ed6142daed1e068fbda64405d0de9845e40e1Shih-wei Liao#else // Use GPRs instead of SSE2 instructions, if they aren't available. 3677ed6142daed1e068fbda64405d0de9845e40e1Shih-wei Liao 3777ed6142daed1e068fbda64405d0de9845e40e1Shih-wei Liao.text 3877ed6142daed1e068fbda64405d0de9845e40e1Shih-wei Liao.align 4 3977ed6142daed1e068fbda64405d0de9845e40e1Shih-wei LiaoDEFINE_COMPILERRT_FUNCTION(__lshrdi3) 4077ed6142daed1e068fbda64405d0de9845e40e1Shih-wei Liao movl 12(%esp), %ecx // Load count 4177ed6142daed1e068fbda64405d0de9845e40e1Shih-wei Liao movl 8(%esp), %edx // Load high 4277ed6142daed1e068fbda64405d0de9845e40e1Shih-wei Liao movl 4(%esp), %eax // Load low 4377ed6142daed1e068fbda64405d0de9845e40e1Shih-wei Liao 4477ed6142daed1e068fbda64405d0de9845e40e1Shih-wei Liao testl $0x20, %ecx // If count >= 32 4577ed6142daed1e068fbda64405d0de9845e40e1Shih-wei Liao jnz 1f // goto 1 4677ed6142daed1e068fbda64405d0de9845e40e1Shih-wei Liao 4777ed6142daed1e068fbda64405d0de9845e40e1Shih-wei Liao shrdl %cl, %edx, %eax // right shift low by count 4877ed6142daed1e068fbda64405d0de9845e40e1Shih-wei Liao shrl %cl, %edx // right shift high by count 4977ed6142daed1e068fbda64405d0de9845e40e1Shih-wei Liao ret 5077ed6142daed1e068fbda64405d0de9845e40e1Shih-wei Liao 5177ed6142daed1e068fbda64405d0de9845e40e1Shih-wei Liao1: movl %edx, %eax // Move high to low 5277ed6142daed1e068fbda64405d0de9845e40e1Shih-wei Liao xorl %edx, %edx // clear high 5377ed6142daed1e068fbda64405d0de9845e40e1Shih-wei Liao shrl %cl, %eax // shift low by count - 32 5477ed6142daed1e068fbda64405d0de9845e40e1Shih-wei Liao ret 5577ed6142daed1e068fbda64405d0de9845e40e1Shih-wei Liao 5677ed6142daed1e068fbda64405d0de9845e40e1Shih-wei Liao#endif // __SSE2__ 5777ed6142daed1e068fbda64405d0de9845e40e1Shih-wei Liao#endif // __i386__ 58