1#!/usr/bin/env perl 2 3$flavour = shift; 4$output = shift; 5if ($flavour =~ /\./) { $output = $flavour; undef $flavour; } 6 7$win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/); 8 9$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; 10( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or 11( $xlate="${dir}perlasm/x86_64-xlate.pl" and -f $xlate) or 12die "can't locate x86_64-xlate.pl"; 13 14open OUT,"| \"$^X\" $xlate $flavour $output"; 15*STDOUT=*OUT; 16 17($arg1,$arg2,$arg3,$arg4)=$win64?("%rcx","%rdx","%r8", "%r9") : # Win64 order 18 ("%rdi","%rsi","%rdx","%rcx"); # Unix order 19 20print<<___; 21.text 22 23.globl OPENSSL_ia32_cpuid 24.type OPENSSL_ia32_cpuid,\@function,1 25.align 16 26OPENSSL_ia32_cpuid: 27 # On Windows, $arg1 is rcx, but that will be clobbered. So make Windows 28 # use the same register as Unix. 29 mov $arg1,%rdi 30 mov %rbx,%r8 # save %rbx 31 32 xor %eax,%eax 33 mov %eax,8(%rdi) # clear 3rd word 34 cpuid 35 mov %eax,%r11d # max value for standard query level 36 37 xor %eax,%eax 38 cmp \$0x756e6547,%ebx # "Genu" 39 setne %al 40 mov %eax,%r9d 41 cmp \$0x49656e69,%edx # "ineI" 42 setne %al 43 or %eax,%r9d 44 cmp \$0x6c65746e,%ecx # "ntel" 45 setne %al 46 or %eax,%r9d # 0 indicates Intel CPU 47 jz .Lintel 48 49 cmp \$0x68747541,%ebx # "Auth" 50 setne %al 51 mov %eax,%r10d 52 cmp \$0x69746E65,%edx # "enti" 53 setne %al 54 or %eax,%r10d 55 cmp \$0x444D4163,%ecx # "cAMD" 56 setne %al 57 or %eax,%r10d # 0 indicates AMD CPU 58 jnz .Lintel 59 60 # AMD specific 61 # See http://developer.amd.com/wordpress/media/2012/10/254811.pdf (1) 62 63 mov \$0x80000000,%eax 64 cpuid 65 # Returns "The largest CPUID extended function input value supported by 66 # the processor implementation." in EAX. 67 cmp \$0x80000001,%eax 68 jb .Lintel 69 mov %eax,%r10d 70 mov \$0x80000001,%eax 71 cpuid 72 # Returns feature bits in ECX. See page 20 of [1]. 73 # TODO(fork): I think this should be a MOV. 74 or %ecx,%r9d 75 and \$0x00000801,%r9d # isolate AMD XOP bit, 1<<11 76 77 cmp \$0x80000008,%r10d 78 jb .Lintel 79 80 mov \$0x80000008,%eax 81 cpuid 82 # Returns APIC ID and number of cores in ECX. See page 27 of [1]. 83 movzb %cl,%r10 # number of cores - 1 84 inc %r10 # number of cores 85 86 mov \$1,%eax 87 cpuid 88 # See page 13 of [1]. 89 bt \$28,%edx # test hyper-threading bit 90 jnc .Lgeneric 91 shr \$16,%ebx # number of logical processors 92 cmp %r10b,%bl 93 ja .Lgeneric 94 and \$0xefffffff,%edx # Clear hyper-threading bit. 95 jmp .Lgeneric 96 97.Lintel: 98 cmp \$4,%r11d 99 mov \$-1,%r10d 100 jb .Lnocacheinfo 101 102 mov \$4,%eax 103 mov \$0,%ecx # query L1D 104 cpuid 105 mov %eax,%r10d 106 shr \$14,%r10d 107 and \$0xfff,%r10d # number of cores -1 per L1D 108 109 cmp \$7,%r11d 110 jb .Lnocacheinfo 111 112 mov \$7,%eax 113 xor %ecx,%ecx 114 cpuid 115 mov %ebx,8(%rdi) 116 117.Lnocacheinfo: 118 mov \$1,%eax 119 cpuid 120 # Gets feature information. See table 3-21 in the Intel manual. 121 and \$0xbfefffff,%edx # force reserved bits to 0 122 cmp \$0,%r9d 123 jne .Lnotintel 124 or \$0x40000000,%edx # set reserved bit#30 on Intel CPUs 125.Lnotintel: 126 bt \$28,%edx # test hyper-threading bit 127 jnc .Lgeneric 128 and \$0xefffffff,%edx # ~(1<<28) - clear hyper-threading. 129 cmp \$0,%r10d 130 je .Lgeneric 131 132 or \$0x10000000,%edx # 1<<28 133 shr \$16,%ebx 134 cmp \$1,%bl # see if cache is shared 135 ja .Lgeneric 136 and \$0xefffffff,%edx # ~(1<<28) 137.Lgeneric: 138 and \$0x00000800,%r9d # isolate AMD XOP flag 139 and \$0xfffff7ff,%ecx 140 or %ecx,%r9d # merge AMD XOP flag 141 142 mov %edx,%r10d # %r9d:%r10d is copy of %ecx:%edx 143 bt \$27,%r9d # check OSXSAVE bit 144 jnc .Lclear_avx 145 xor %ecx,%ecx # XCR0 146 .byte 0x0f,0x01,0xd0 # xgetbv 147 and \$6,%eax # isolate XMM and YMM state support 148 cmp \$6,%eax 149 je .Ldone 150.Lclear_avx: 151 mov \$0xefffe7ff,%eax # ~(1<<28|1<<12|1<<11) 152 and %eax,%r9d # clear AVX, FMA and AMD XOP bits 153 andl \$0xffffffdf,8(%rdi) # cleax AVX2, ~(1<<5) 154.Ldone: 155 movl %r9d,4(%rdi) 156 movl %r10d,0(%rdi) 157 mov %r8,%rbx # restore %rbx 158 ret 159.size OPENSSL_ia32_cpuid,.-OPENSSL_ia32_cpuid 160 161___ 162 163close STDOUT; # flush 164