1#!/usr/bin/env perl 2 3# ==================================================================== 4# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL 5# project. The module is, however, dual licensed under OpenSSL and 6# CRYPTOGAMS licenses depending on where you obtain it. For further 7# details see http://www.openssl.org/~appro/cryptogams/. 8# ==================================================================== 9 10# SHA256 block procedure for ARMv4. May 2007. 11 12# Performance is ~2x better than gcc 3.4 generated code and in "abso- 13# lute" terms is ~2250 cycles per 64-byte block or ~35 cycles per 14# byte [on single-issue Xscale PXA250 core]. 15 16# July 2010. 17# 18# Rescheduling for dual-issue pipeline resulted in 22% improvement on 19# Cortex A8 core and ~20 cycles per processed byte. 20 21# February 2011. 22# 23# Profiler-assisted and platform-specific optimization resulted in 16% 24# improvement on Cortex A8 core and ~15.4 cycles per processed byte. 25 26# September 2013. 27# 28# Add NEON implementation. On Cortex A8 it was measured to process one 29# byte in 12.5 cycles or 23% faster than integer-only code. Snapdragon 30# S4 does it in 12.5 cycles too, but it's 50% faster than integer-only 31# code (meaning that latter performs sub-optimally, nothing was done 32# about it). 33 34while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {} 35open STDOUT,">$output"; 36 37$ctx="r0"; $t0="r0"; 38$inp="r1"; $t4="r1"; 39$len="r2"; $t1="r2"; 40$T1="r3"; $t3="r3"; 41$A="r4"; 42$B="r5"; 43$C="r6"; 44$D="r7"; 45$E="r8"; 46$F="r9"; 47$G="r10"; 48$H="r11"; 49@V=($A,$B,$C,$D,$E,$F,$G,$H); 50$t2="r12"; 51$Ktbl="r14"; 52 53@Sigma0=( 2,13,22); 54@Sigma1=( 6,11,25); 55@sigma0=( 7,18, 3); 56@sigma1=(17,19,10); 57 58sub BODY_00_15 { 59my ($i,$a,$b,$c,$d,$e,$f,$g,$h) = @_; 60 61$code.=<<___ if ($i<16); 62#if __ARM_ARCH__>=7 63 @ ldr $t1,[$inp],#4 @ $i 64# if $i==15 65 str $inp,[sp,#17*4] @ make room for $t4 66# endif 67 eor $t0,$e,$e,ror#`$Sigma1[1]-$Sigma1[0]` 68 add $a,$a,$t2 @ h+=Maj(a,b,c) from the past 69 eor $t0,$t0,$e,ror#`$Sigma1[2]-$Sigma1[0]` @ Sigma1(e) 70 rev $t1,$t1 71#else 72 @ ldrb $t1,[$inp,#3] @ $i 73 add $a,$a,$t2 @ h+=Maj(a,b,c) from the past 74 ldrb $t2,[$inp,#2] 75 ldrb $t0,[$inp,#1] 76 orr $t1,$t1,$t2,lsl#8 77 ldrb $t2,[$inp],#4 78 orr $t1,$t1,$t0,lsl#16 79# if $i==15 80 str $inp,[sp,#17*4] @ make room for $t4 81# endif 82 eor $t0,$e,$e,ror#`$Sigma1[1]-$Sigma1[0]` 83 orr $t1,$t1,$t2,lsl#24 84 eor $t0,$t0,$e,ror#`$Sigma1[2]-$Sigma1[0]` @ Sigma1(e) 85#endif 86___ 87$code.=<<___; 88 ldr $t2,[$Ktbl],#4 @ *K256++ 89 add $h,$h,$t1 @ h+=X[i] 90 str $t1,[sp,#`$i%16`*4] 91 eor $t1,$f,$g 92 add $h,$h,$t0,ror#$Sigma1[0] @ h+=Sigma1(e) 93 and $t1,$t1,$e 94 add $h,$h,$t2 @ h+=K256[i] 95 eor $t1,$t1,$g @ Ch(e,f,g) 96 eor $t0,$a,$a,ror#`$Sigma0[1]-$Sigma0[0]` 97 add $h,$h,$t1 @ h+=Ch(e,f,g) 98#if $i==31 99 and $t2,$t2,#0xff 100 cmp $t2,#0xf2 @ done? 101#endif 102#if $i<15 103# if __ARM_ARCH__>=7 104 ldr $t1,[$inp],#4 @ prefetch 105# else 106 ldrb $t1,[$inp,#3] 107# endif 108 eor $t2,$a,$b @ a^b, b^c in next round 109#else 110 ldr $t1,[sp,#`($i+2)%16`*4] @ from future BODY_16_xx 111 eor $t2,$a,$b @ a^b, b^c in next round 112 ldr $t4,[sp,#`($i+15)%16`*4] @ from future BODY_16_xx 113#endif 114 eor $t0,$t0,$a,ror#`$Sigma0[2]-$Sigma0[0]` @ Sigma0(a) 115 and $t3,$t3,$t2 @ (b^c)&=(a^b) 116 add $d,$d,$h @ d+=h 117 eor $t3,$t3,$b @ Maj(a,b,c) 118 add $h,$h,$t0,ror#$Sigma0[0] @ h+=Sigma0(a) 119 @ add $h,$h,$t3 @ h+=Maj(a,b,c) 120___ 121 ($t2,$t3)=($t3,$t2); 122} 123 124sub BODY_16_XX { 125my ($i,$a,$b,$c,$d,$e,$f,$g,$h) = @_; 126 127$code.=<<___; 128 @ ldr $t1,[sp,#`($i+1)%16`*4] @ $i 129 @ ldr $t4,[sp,#`($i+14)%16`*4] 130 mov $t0,$t1,ror#$sigma0[0] 131 add $a,$a,$t2 @ h+=Maj(a,b,c) from the past 132 mov $t2,$t4,ror#$sigma1[0] 133 eor $t0,$t0,$t1,ror#$sigma0[1] 134 eor $t2,$t2,$t4,ror#$sigma1[1] 135 eor $t0,$t0,$t1,lsr#$sigma0[2] @ sigma0(X[i+1]) 136 ldr $t1,[sp,#`($i+0)%16`*4] 137 eor $t2,$t2,$t4,lsr#$sigma1[2] @ sigma1(X[i+14]) 138 ldr $t4,[sp,#`($i+9)%16`*4] 139 140 add $t2,$t2,$t0 141 eor $t0,$e,$e,ror#`$Sigma1[1]-$Sigma1[0]` @ from BODY_00_15 142 add $t1,$t1,$t2 143 eor $t0,$t0,$e,ror#`$Sigma1[2]-$Sigma1[0]` @ Sigma1(e) 144 add $t1,$t1,$t4 @ X[i] 145___ 146 &BODY_00_15(@_); 147} 148 149$code=<<___; 150#if defined(__arm__) 151#include "arm_arch.h" 152 153.text 154.code 32 155 156.type K256,%object 157.align 5 158K256: 159.word 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 160.word 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 161.word 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 162.word 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 163.word 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc 164.word 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da 165.word 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 166.word 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 167.word 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 168.word 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 169.word 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 170.word 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 171.word 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 172.word 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 173.word 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 174.word 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 175.size K256,.-K256 176.word 0 @ terminator 177.LOPENSSL_armcap: 178.word OPENSSL_armcap_P-sha256_block_data_order 179.align 5 180 181.global sha256_block_data_order 182.hidden sha256_block_data_order 183.type sha256_block_data_order,%function 184sha256_block_data_order: 185 sub r3,pc,#8 @ sha256_block_data_order 186 add $len,$inp,$len,lsl#6 @ len to point at the end of inp 187#if __ARM_ARCH__>=7 188 ldr r12,.LOPENSSL_armcap 189 ldr r12,[r3,r12] @ OPENSSL_armcap_P 190 tst r12,#1 191 bne .LNEON 192#endif 193 stmdb sp!,{$ctx,$inp,$len,r4-r11,lr} 194 ldmia $ctx,{$A,$B,$C,$D,$E,$F,$G,$H} 195 sub $Ktbl,r3,#256+32 @ K256 196 sub sp,sp,#16*4 @ alloca(X[16]) 197.Loop: 198# if __ARM_ARCH__>=7 199 ldr $t1,[$inp],#4 200# else 201 ldrb $t1,[$inp,#3] 202# endif 203 eor $t3,$B,$C @ magic 204 eor $t2,$t2,$t2 205___ 206for($i=0;$i<16;$i++) { &BODY_00_15($i,@V); unshift(@V,pop(@V)); } 207$code.=".Lrounds_16_xx:\n"; 208for (;$i<32;$i++) { &BODY_16_XX($i,@V); unshift(@V,pop(@V)); } 209$code.=<<___; 210 ldreq $t3,[sp,#16*4] @ pull ctx 211 bne .Lrounds_16_xx 212 213 add $A,$A,$t2 @ h+=Maj(a,b,c) from the past 214 ldr $t0,[$t3,#0] 215 ldr $t1,[$t3,#4] 216 ldr $t2,[$t3,#8] 217 add $A,$A,$t0 218 ldr $t0,[$t3,#12] 219 add $B,$B,$t1 220 ldr $t1,[$t3,#16] 221 add $C,$C,$t2 222 ldr $t2,[$t3,#20] 223 add $D,$D,$t0 224 ldr $t0,[$t3,#24] 225 add $E,$E,$t1 226 ldr $t1,[$t3,#28] 227 add $F,$F,$t2 228 ldr $inp,[sp,#17*4] @ pull inp 229 ldr $t2,[sp,#18*4] @ pull inp+len 230 add $G,$G,$t0 231 add $H,$H,$t1 232 stmia $t3,{$A,$B,$C,$D,$E,$F,$G,$H} 233 cmp $inp,$t2 234 sub $Ktbl,$Ktbl,#256 @ rewind Ktbl 235 bne .Loop 236 237 add sp,sp,#`16+3`*4 @ destroy frame 238#if __ARM_ARCH__>=5 239 ldmia sp!,{r4-r11,pc} 240#else 241 ldmia sp!,{r4-r11,lr} 242 tst lr,#1 243 moveq pc,lr @ be binary compatible with V4, yet 244 bx lr @ interoperable with Thumb ISA:-) 245#endif 246___ 247###################################################################### 248# NEON stuff 249# 250{{{ 251my @X=map("q$_",(0..3)); 252my ($T0,$T1,$T2,$T3,$T4,$T5)=("q8","q9","q10","q11","d24","d25"); 253my $Xfer=$t4; 254my $j=0; 255 256sub Dlo() { shift=~m|q([1]?[0-9])|?"d".($1*2):""; } 257sub Dhi() { shift=~m|q([1]?[0-9])|?"d".($1*2+1):""; } 258 259sub AUTOLOAD() # thunk [simplified] x86-style perlasm 260{ my $opcode = $AUTOLOAD; $opcode =~ s/.*:://; $opcode =~ s/_/\./; 261 my $arg = pop; 262 $arg = "#$arg" if ($arg*1 eq $arg); 263 $code .= "\t$opcode\t".join(',',@_,$arg)."\n"; 264} 265 266sub Xupdate() 267{ use integer; 268 my $body = shift; 269 my @insns = (&$body,&$body,&$body,&$body); 270 my ($a,$b,$c,$d,$e,$f,$g,$h); 271 272 &vext_8 ($T0,@X[0],@X[1],4); # X[1..4] 273 eval(shift(@insns)); 274 eval(shift(@insns)); 275 eval(shift(@insns)); 276 &vext_8 ($T1,@X[2],@X[3],4); # X[9..12] 277 eval(shift(@insns)); 278 eval(shift(@insns)); 279 eval(shift(@insns)); 280 &vshr_u32 ($T2,$T0,$sigma0[0]); 281 eval(shift(@insns)); 282 eval(shift(@insns)); 283 &vadd_i32 (@X[0],@X[0],$T1); # X[0..3] += X[9..12] 284 eval(shift(@insns)); 285 eval(shift(@insns)); 286 &vshr_u32 ($T1,$T0,$sigma0[2]); 287 eval(shift(@insns)); 288 eval(shift(@insns)); 289 &vsli_32 ($T2,$T0,32-$sigma0[0]); 290 eval(shift(@insns)); 291 eval(shift(@insns)); 292 &vshr_u32 ($T3,$T0,$sigma0[1]); 293 eval(shift(@insns)); 294 eval(shift(@insns)); 295 &veor ($T1,$T1,$T2); 296 eval(shift(@insns)); 297 eval(shift(@insns)); 298 &vsli_32 ($T3,$T0,32-$sigma0[1]); 299 eval(shift(@insns)); 300 eval(shift(@insns)); 301 &vshr_u32 ($T4,&Dhi(@X[3]),$sigma1[0]); 302 eval(shift(@insns)); 303 eval(shift(@insns)); 304 &veor ($T1,$T1,$T3); # sigma0(X[1..4]) 305 eval(shift(@insns)); 306 eval(shift(@insns)); 307 &vsli_32 ($T4,&Dhi(@X[3]),32-$sigma1[0]); 308 eval(shift(@insns)); 309 eval(shift(@insns)); 310 &vshr_u32 ($T5,&Dhi(@X[3]),$sigma1[2]); 311 eval(shift(@insns)); 312 eval(shift(@insns)); 313 &vadd_i32 (@X[0],@X[0],$T1); # X[0..3] += sigma0(X[1..4]) 314 eval(shift(@insns)); 315 eval(shift(@insns)); 316 &veor ($T5,$T5,$T4); 317 eval(shift(@insns)); 318 eval(shift(@insns)); 319 &vshr_u32 ($T4,&Dhi(@X[3]),$sigma1[1]); 320 eval(shift(@insns)); 321 eval(shift(@insns)); 322 &vsli_32 ($T4,&Dhi(@X[3]),32-$sigma1[1]); 323 eval(shift(@insns)); 324 eval(shift(@insns)); 325 &veor ($T5,$T5,$T4); # sigma1(X[14..15]) 326 eval(shift(@insns)); 327 eval(shift(@insns)); 328 &vadd_i32 (&Dlo(@X[0]),&Dlo(@X[0]),$T5);# X[0..1] += sigma1(X[14..15]) 329 eval(shift(@insns)); 330 eval(shift(@insns)); 331 &vshr_u32 ($T4,&Dlo(@X[0]),$sigma1[0]); 332 eval(shift(@insns)); 333 eval(shift(@insns)); 334 &vsli_32 ($T4,&Dlo(@X[0]),32-$sigma1[0]); 335 eval(shift(@insns)); 336 eval(shift(@insns)); 337 &vshr_u32 ($T5,&Dlo(@X[0]),$sigma1[2]); 338 eval(shift(@insns)); 339 eval(shift(@insns)); 340 &veor ($T5,$T5,$T4); 341 eval(shift(@insns)); 342 eval(shift(@insns)); 343 &vshr_u32 ($T4,&Dlo(@X[0]),$sigma1[1]); 344 eval(shift(@insns)); 345 eval(shift(@insns)); 346 &vld1_32 ("{$T0}","[$Ktbl,:128]!"); 347 eval(shift(@insns)); 348 eval(shift(@insns)); 349 &vsli_32 ($T4,&Dlo(@X[0]),32-$sigma1[1]); 350 eval(shift(@insns)); 351 eval(shift(@insns)); 352 &veor ($T5,$T5,$T4); # sigma1(X[16..17]) 353 eval(shift(@insns)); 354 eval(shift(@insns)); 355 &vadd_i32 (&Dhi(@X[0]),&Dhi(@X[0]),$T5);# X[2..3] += sigma1(X[16..17]) 356 eval(shift(@insns)); 357 eval(shift(@insns)); 358 &vadd_i32 ($T0,$T0,@X[0]); 359 while($#insns>=2) { eval(shift(@insns)); } 360 &vst1_32 ("{$T0}","[$Xfer,:128]!"); 361 eval(shift(@insns)); 362 eval(shift(@insns)); 363 364 push(@X,shift(@X)); # "rotate" X[] 365} 366 367sub Xpreload() 368{ use integer; 369 my $body = shift; 370 my @insns = (&$body,&$body,&$body,&$body); 371 my ($a,$b,$c,$d,$e,$f,$g,$h); 372 373 eval(shift(@insns)); 374 eval(shift(@insns)); 375 eval(shift(@insns)); 376 eval(shift(@insns)); 377 &vld1_32 ("{$T0}","[$Ktbl,:128]!"); 378 eval(shift(@insns)); 379 eval(shift(@insns)); 380 eval(shift(@insns)); 381 eval(shift(@insns)); 382 &vrev32_8 (@X[0],@X[0]); 383 eval(shift(@insns)); 384 eval(shift(@insns)); 385 eval(shift(@insns)); 386 eval(shift(@insns)); 387 &vadd_i32 ($T0,$T0,@X[0]); 388 foreach (@insns) { eval; } # remaining instructions 389 &vst1_32 ("{$T0}","[$Xfer,:128]!"); 390 391 push(@X,shift(@X)); # "rotate" X[] 392} 393 394sub body_00_15 () { 395 ( 396 '($a,$b,$c,$d,$e,$f,$g,$h)=@V;'. 397 '&add ($h,$h,$t1)', # h+=X[i]+K[i] 398 '&eor ($t1,$f,$g)', 399 '&eor ($t0,$e,$e,"ror#".($Sigma1[1]-$Sigma1[0]))', 400 '&add ($a,$a,$t2)', # h+=Maj(a,b,c) from the past 401 '&and ($t1,$t1,$e)', 402 '&eor ($t2,$t0,$e,"ror#".($Sigma1[2]-$Sigma1[0]))', # Sigma1(e) 403 '&eor ($t0,$a,$a,"ror#".($Sigma0[1]-$Sigma0[0]))', 404 '&eor ($t1,$t1,$g)', # Ch(e,f,g) 405 '&add ($h,$h,$t2,"ror#$Sigma1[0]")', # h+=Sigma1(e) 406 '&eor ($t2,$a,$b)', # a^b, b^c in next round 407 '&eor ($t0,$t0,$a,"ror#".($Sigma0[2]-$Sigma0[0]))', # Sigma0(a) 408 '&add ($h,$h,$t1)', # h+=Ch(e,f,g) 409 '&ldr ($t1,sprintf "[sp,#%d]",4*(($j+1)&15)) if (($j&15)!=15);'. 410 '&ldr ($t1,"[$Ktbl]") if ($j==15);'. 411 '&ldr ($t1,"[sp,#64]") if ($j==31)', 412 '&and ($t3,$t3,$t2)', # (b^c)&=(a^b) 413 '&add ($d,$d,$h)', # d+=h 414 '&add ($h,$h,$t0,"ror#$Sigma0[0]");'. # h+=Sigma0(a) 415 '&eor ($t3,$t3,$b)', # Maj(a,b,c) 416 '$j++; unshift(@V,pop(@V)); ($t2,$t3)=($t3,$t2);' 417 ) 418} 419 420$code.=<<___; 421#if __ARM_ARCH__>=7 422.fpu neon 423.align 4 424.LNEON: 425 stmdb sp!,{r4-r12,lr} 426 427 mov $t2,sp 428 sub sp,sp,#16*4+16 @ alloca 429 sub $Ktbl,r3,#256+32 @ K256 430 bic sp,sp,#15 @ align for 128-bit stores 431 432 vld1.8 {@X[0]},[$inp]! 433 vld1.8 {@X[1]},[$inp]! 434 vld1.8 {@X[2]},[$inp]! 435 vld1.8 {@X[3]},[$inp]! 436 vld1.32 {$T0},[$Ktbl,:128]! 437 vld1.32 {$T1},[$Ktbl,:128]! 438 vld1.32 {$T2},[$Ktbl,:128]! 439 vld1.32 {$T3},[$Ktbl,:128]! 440 vrev32.8 @X[0],@X[0] @ yes, even on 441 str $ctx,[sp,#64] 442 vrev32.8 @X[1],@X[1] @ big-endian 443 str $inp,[sp,#68] 444 mov $Xfer,sp 445 vrev32.8 @X[2],@X[2] 446 str $len,[sp,#72] 447 vrev32.8 @X[3],@X[3] 448 str $t2,[sp,#76] @ save original sp 449 vadd.i32 $T0,$T0,@X[0] 450 vadd.i32 $T1,$T1,@X[1] 451 vst1.32 {$T0},[$Xfer,:128]! 452 vadd.i32 $T2,$T2,@X[2] 453 vst1.32 {$T1},[$Xfer,:128]! 454 vadd.i32 $T3,$T3,@X[3] 455 vst1.32 {$T2},[$Xfer,:128]! 456 vst1.32 {$T3},[$Xfer,:128]! 457 458 ldmia $ctx,{$A-$H} 459 sub $Xfer,$Xfer,#64 460 ldr $t1,[sp,#0] 461 eor $t2,$t2,$t2 462 eor $t3,$B,$C 463 b .L_00_48 464 465.align 4 466.L_00_48: 467___ 468 &Xupdate(\&body_00_15); 469 &Xupdate(\&body_00_15); 470 &Xupdate(\&body_00_15); 471 &Xupdate(\&body_00_15); 472$code.=<<___; 473 teq $t1,#0 @ check for K256 terminator 474 ldr $t1,[sp,#0] 475 sub $Xfer,$Xfer,#64 476 bne .L_00_48 477 478 ldr $inp,[sp,#68] 479 ldr $t0,[sp,#72] 480 sub $Ktbl,$Ktbl,#256 @ rewind $Ktbl 481 teq $inp,$t0 482 subeq $inp,$inp,#64 @ avoid SEGV 483 vld1.8 {@X[0]},[$inp]! @ load next input block 484 vld1.8 {@X[1]},[$inp]! 485 vld1.8 {@X[2]},[$inp]! 486 vld1.8 {@X[3]},[$inp]! 487 strne $inp,[sp,#68] 488 mov $Xfer,sp 489___ 490 &Xpreload(\&body_00_15); 491 &Xpreload(\&body_00_15); 492 &Xpreload(\&body_00_15); 493 &Xpreload(\&body_00_15); 494$code.=<<___; 495 ldr $t0,[$t1,#0] 496 add $A,$A,$t2 @ h+=Maj(a,b,c) from the past 497 ldr $t2,[$t1,#4] 498 ldr $t3,[$t1,#8] 499 ldr $t4,[$t1,#12] 500 add $A,$A,$t0 @ accumulate 501 ldr $t0,[$t1,#16] 502 add $B,$B,$t2 503 ldr $t2,[$t1,#20] 504 add $C,$C,$t3 505 ldr $t3,[$t1,#24] 506 add $D,$D,$t4 507 ldr $t4,[$t1,#28] 508 add $E,$E,$t0 509 str $A,[$t1],#4 510 add $F,$F,$t2 511 str $B,[$t1],#4 512 add $G,$G,$t3 513 str $C,[$t1],#4 514 add $H,$H,$t4 515 str $D,[$t1],#4 516 stmia $t1,{$E-$H} 517 518 movne $Xfer,sp 519 ldrne $t1,[sp,#0] 520 eorne $t2,$t2,$t2 521 ldreq sp,[sp,#76] @ restore original sp 522 eorne $t3,$B,$C 523 bne .L_00_48 524 525 ldmia sp!,{r4-r12,pc} 526#endif 527___ 528}}} 529$code.=<<___; 530.size sha256_block_data_order,.-sha256_block_data_order 531.asciz "SHA256 block transform for ARMv4/NEON, CRYPTOGAMS by <appro\@openssl.org>" 532.align 2 533.comm OPENSSL_armcap_P,4,4 534 535#endif 536___ 537 538$code =~ s/\`([^\`]*)\`/eval $1/gem; 539$code =~ s/\bbx\s+lr\b/.word\t0xe12fff1e/gm; # make it possible to compile with -march=armv4 540print $code; 541close STDOUT; # enforce flush 542