1/*
2Copyright (c) 2014, Intel Corporation
3All rights reserved.
4
5Redistribution and use in source and binary forms, with or without
6modification, are permitted provided that the following conditions are met:
7
8    * Redistributions of source code must retain the above copyright notice,
9    * this list of conditions and the following disclaimer.
10
11    * Redistributions in binary form must reproduce the above copyright notice,
12    * this list of conditions and the following disclaimer in the documentation
13    * and/or other materials provided with the distribution.
14
15    * Neither the name of Intel Corporation nor the names of its contributors
16    * may be used to endorse or promote products derived from this software
17    * without specific prior written permission.
18
19THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
20ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
21WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
23ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
24(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
26ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29*/
30
31/******************************************************************************/
32//                     ALGORITHM DESCRIPTION
33//                     ---------------------
34//
35//   Assume x=2^{3*k+j} * 1.b1 b2 ... b5 b6 ... b52, where j = 0,1,2.
36//   Let r=(x*2^{-3k-j} - 1.b1 b2 ... b5 1)* rcp[b1 b2 ..b5],
37//   where rcp[b1 b2 .. b5]=1/(1.b1 b2 b3 b4 b5 1) in double precision
38//   cbrt(2^j * 1. b1 b2 .. b5 1) is approximated as T[j][b1..b5]+D[j][b1..b5]
39//   (T stores the high 53 bits, D stores the low order bits)
40//   Result=2^k*T+(2^k*T*r)*P+2^k*D
41//   where P=p1+p2*r+..+p8*r^7
42//
43// Special cases:
44//  cbrt(NaN) = quiet NaN, and raise invalid exception
45//  cbrt(INF) = that INF
46//  cbrt(+/-0) = +/-0
47//
48/******************************************************************************/
49
50#include <private/bionic_asm.h>
51# -- Begin  cbrt
52ENTRY(cbrt)
53# parameter 1: %xmm0
54..B1.1:
55..___tag_value_cbrt.1:
56        subq      $24, %rsp
57..___tag_value_cbrt.3:
58        movsd     %xmm0, (%rsp)
59..B1.2:
60        movq      %xmm0, %xmm7
61        movl      $524032, %edx
62        movsd     EXP_MSK3(%rip), %xmm5
63        movsd     EXP_MSK2(%rip), %xmm3
64        psrlq     $44, %xmm7
65        pextrw    $0, %xmm7, %ecx
66        movd      %xmm7, %eax
67        movsd     EXP_MASK(%rip), %xmm1
68        movsd     SIG_MASK(%rip), %xmm2
69        andl      $248, %ecx
70        lea       rcp_table(%rip), %r8
71        movsd     (%rcx,%r8), %xmm4
72        movq      %rax, %r9
73        andl      %eax, %edx
74        cmpl      $0, %edx
75        je        .L_2TAG_PACKET_0.0.1
76        cmpl      $524032, %edx
77        je        .L_2TAG_PACKET_1.0.1
78        shrl      $8, %edx
79        shrq      $8, %r9
80        andpd     %xmm0, %xmm2
81        andpd     %xmm5, %xmm0
82        orpd      %xmm2, %xmm3
83        orpd      %xmm0, %xmm1
84        movapd    coeff_table(%rip), %xmm5
85        movl      $5462, %eax
86        movapd    16+coeff_table(%rip), %xmm6
87        mull      %edx
88        movq      %r9, %rdx
89        andq      $2047, %r9
90        shrl      $14, %eax
91        andl      $2048, %edx
92        subq      %rax, %r9
93        subq      %rax, %r9
94        subq      %rax, %r9
95        shlq      $8, %r9
96        addl      $682, %eax
97        orl       %edx, %eax
98        movd      %eax, %xmm7
99        addq      %r9, %rcx
100        psllq     $52, %xmm7
101.L_2TAG_PACKET_2.0.1:
102        movapd    32+coeff_table(%rip), %xmm2
103        movapd    48+coeff_table(%rip), %xmm0
104        subsd     %xmm3, %xmm1
105        movq      %xmm7, %xmm3
106        lea       cbrt_table(%rip), %r8
107        mulsd     (%rcx,%r8), %xmm7
108        mulsd     %xmm4, %xmm1
109        lea       D_table(%rip), %r8
110        mulsd     (%rcx,%r8), %xmm3
111        movapd    %xmm1, %xmm4
112        unpcklpd  %xmm1, %xmm1
113        mulpd     %xmm1, %xmm5
114        mulpd     %xmm1, %xmm6
115        mulpd     %xmm1, %xmm1
116        addpd     %xmm5, %xmm2
117        addpd     %xmm6, %xmm0
118        mulpd     %xmm1, %xmm2
119        mulpd     %xmm1, %xmm1
120        mulsd     %xmm7, %xmm4
121        addpd     %xmm2, %xmm0
122        mulsd     %xmm0, %xmm1
123        unpckhpd  %xmm0, %xmm0
124        addsd     %xmm1, %xmm0
125        mulsd     %xmm4, %xmm0
126        addsd     %xmm3, %xmm0
127        addsd     %xmm7, %xmm0
128        jmp       ..B1.4
129.L_2TAG_PACKET_0.0.1:
130        mulsd     SCALE63(%rip), %xmm0
131        movq      %xmm0, %xmm7
132        movl      $524032, %edx
133        psrlq     $44, %xmm7
134        pextrw    $0, %xmm7, %ecx
135        movd      %xmm7, %eax
136        andl      $248, %ecx
137        lea       rcp_table(%rip), %r8
138        movsd     (%rcx,%r8), %xmm4
139        movq      %rax, %r9
140        andl      %eax, %edx
141        shrl      $8, %edx
142        shrq      $8, %r9
143        cmpl      $0, %edx
144        je        .L_2TAG_PACKET_3.0.1
145        andpd     %xmm0, %xmm2
146        andpd     %xmm5, %xmm0
147        orpd      %xmm2, %xmm3
148        orpd      %xmm0, %xmm1
149        movapd    coeff_table(%rip), %xmm5
150        movl      $5462, %eax
151        movapd    16+coeff_table(%rip), %xmm6
152        mull      %edx
153        movq      %r9, %rdx
154        andq      $2047, %r9
155        shrl      $14, %eax
156        andl      $2048, %edx
157        subq      %rax, %r9
158        subq      %rax, %r9
159        subq      %rax, %r9
160        shlq      $8, %r9
161        addl      $661, %eax
162        orl       %edx, %eax
163        movd      %eax, %xmm7
164        addq      %r9, %rcx
165        psllq     $52, %xmm7
166        jmp       .L_2TAG_PACKET_2.0.1
167.L_2TAG_PACKET_3.0.1:
168        cmpq      $0, %r9
169        jne       .L_2TAG_PACKET_4.0.1
170        xorpd     %xmm0, %xmm0
171        jmp       ..B1.4
172.L_2TAG_PACKET_4.0.1:
173        movsd     ZERON(%rip), %xmm0
174        jmp       ..B1.4
175.L_2TAG_PACKET_1.0.1:
176        movl      4(%rsp), %eax
177        movl      (%rsp), %edx
178        movl      %eax, %ecx
179        andl      $2147483647, %ecx
180        cmpl      $2146435072, %ecx
181        ja        .L_2TAG_PACKET_5.0.1
182        cmpl      $0, %edx
183        jne       .L_2TAG_PACKET_5.0.1
184        cmpl      $2146435072, %eax
185        jne       .L_2TAG_PACKET_6.0.1
186        movsd     INF(%rip), %xmm0
187        jmp       ..B1.4
188.L_2TAG_PACKET_6.0.1:
189        movsd     NEG_INF(%rip), %xmm0
190        jmp       ..B1.4
191.L_2TAG_PACKET_5.0.1:
192        movsd     (%rsp), %xmm0
193        addsd     %xmm0, %xmm0
194        movq      %xmm0, 8(%rsp)
195.L_2TAG_PACKET_7.0.1:
196..B1.4:
197        addq      $24, %rsp
198..___tag_value_cbrt.4:
199        ret
200..___tag_value_cbrt.5:
201END(cbrt)
202# -- End  cbrt
203	.section .rodata, "a"
204	.align 16
205	.align 16
206coeff_table:
207	.long	1553778919
208	.long	3213899486
209	.long	3534952507
210	.long	3215266280
211	.long	1646371399
212	.long	3214412045
213	.long	477218588
214	.long	3216798151
215	.long	3582521621
216	.long	1066628362
217	.long	1007461464
218	.long	1068473053
219	.long	889629714
220	.long	1067378449
221	.long	1431655765
222	.long	1070945621
223	.type	coeff_table,@object
224	.size	coeff_table,64
225	.align 4
226EXP_MSK3:
227	.long	4294967295
228	.long	1048575
229	.type	EXP_MSK3,@object
230	.size	EXP_MSK3,8
231	.align 4
232EXP_MSK2:
233	.long	0
234	.long	3220193280
235	.type	EXP_MSK2,@object
236	.size	EXP_MSK2,8
237	.align 4
238EXP_MASK:
239	.long	0
240	.long	3220176896
241	.type	EXP_MASK,@object
242	.size	EXP_MASK,8
243	.align 4
244SIG_MASK:
245	.long	0
246	.long	1032192
247	.type	SIG_MASK,@object
248	.size	SIG_MASK,8
249	.align 4
250rcp_table:
251	.long	528611360
252	.long	3220144632
253	.long	2884679527
254	.long	3220082993
255	.long	1991868891
256	.long	3220024928
257	.long	2298714891
258	.long	3219970134
259	.long	58835168
260	.long	3219918343
261	.long	3035110223
262	.long	3219869313
263	.long	1617585086
264	.long	3219822831
265	.long	2500867033
266	.long	3219778702
267	.long	4241943008
268	.long	3219736752
269	.long	258732970
270	.long	3219696825
271	.long	404232216
272	.long	3219658776
273	.long	2172167368
274	.long	3219622476
275	.long	1544257904
276	.long	3219587808
277	.long	377579543
278	.long	3219554664
279	.long	1616385542
280	.long	3219522945
281	.long	813783277
282	.long	3219492562
283	.long	3940743189
284	.long	3219463431
285	.long	2689777499
286	.long	3219435478
287	.long	1700977147
288	.long	3219408632
289	.long	3169102082
290	.long	3219382828
291	.long	327235604
292	.long	3219358008
293	.long	1244336319
294	.long	3219334115
295	.long	1300311200
296	.long	3219311099
297	.long	3095471925
298	.long	3219288912
299	.long	2166487928
300	.long	3219267511
301	.long	2913108253
302	.long	3219246854
303	.long	293672978
304	.long	3219226904
305	.long	288737297
306	.long	3219207624
307	.long	1810275472
308	.long	3219188981
309	.long	174592167
310	.long	3219170945
311	.long	3539053052
312	.long	3219153485
313	.long	2164392968
314	.long	3219136576
315	.type	rcp_table,@object
316	.size	rcp_table,256
317	.align 4
318cbrt_table:
319	.long	572345495
320	.long	1072698681
321	.long	1998204467
322	.long	1072709382
323	.long	3861501553
324	.long	1072719872
325	.long	2268192434
326	.long	1072730162
327	.long	2981979308
328	.long	1072740260
329	.long	270859143
330	.long	1072750176
331	.long	2958651392
332	.long	1072759916
333	.long	313113243
334	.long	1072769490
335	.long	919449400
336	.long	1072778903
337	.long	2809328903
338	.long	1072788162
339	.long	2222981587
340	.long	1072797274
341	.long	2352530781
342	.long	1072806244
343	.long	594152517
344	.long	1072815078
345	.long	1555767199
346	.long	1072823780
347	.long	4282421314
348	.long	1072832355
349	.long	2355578597
350	.long	1072840809
351	.long	1162590619
352	.long	1072849145
353	.long	797864051
354	.long	1072857367
355	.long	431273680
356	.long	1072865479
357	.long	2669831148
358	.long	1072873484
359	.long	733477752
360	.long	1072881387
361	.long	4280220604
362	.long	1072889189
363	.long	801961634
364	.long	1072896896
365	.long	2915370760
366	.long	1072904508
367	.long	1159613482
368	.long	1072912030
369	.long	2689944798
370	.long	1072919463
371	.long	1248687822
372	.long	1072926811
373	.long	2967951030
374	.long	1072934075
375	.long	630170432
376	.long	1072941259
377	.long	3760898254
378	.long	1072948363
379	.long	0
380	.long	1072955392
381	.long	2370273294
382	.long	1072962345
383	.long	1261754802
384	.long	1072972640
385	.long	546334065
386	.long	1072986123
387	.long	1054893830
388	.long	1072999340
389	.long	1571187597
390	.long	1073012304
391	.long	1107975175
392	.long	1073025027
393	.long	3606909377
394	.long	1073037519
395	.long	1113616747
396	.long	1073049792
397	.long	4154744632
398	.long	1073061853
399	.long	3358931423
400	.long	1073073713
401	.long	4060702372
402	.long	1073085379
403	.long	747576176
404	.long	1073096860
405	.long	3023138255
406	.long	1073108161
407	.long	1419988548
408	.long	1073119291
409	.long	1914185305
410	.long	1073130255
411	.long	294389948
412	.long	1073141060
413	.long	3761802570
414	.long	1073151710
415	.long	978281566
416	.long	1073162213
417	.long	823148820
418	.long	1073172572
419	.long	2420954441
420	.long	1073182792
421	.long	3815449908
422	.long	1073192878
423	.long	2046058587
424	.long	1073202835
425	.long	1807524753
426	.long	1073212666
427	.long	2628681401
428	.long	1073222375
429	.long	3225667357
430	.long	1073231966
431	.long	1555307421
432	.long	1073241443
433	.long	3454043099
434	.long	1073250808
435	.long	1208137896
436	.long	1073260066
437	.long	3659916772
438	.long	1073269218
439	.long	1886261264
440	.long	1073278269
441	.long	3593647839
442	.long	1073287220
443	.long	3086012205
444	.long	1073296075
445	.long	2769796922
446	.long	1073304836
447	.long	888716057
448	.long	1073317807
449	.long	2201465623
450	.long	1073334794
451	.long	164369365
452	.long	1073351447
453	.long	3462666733
454	.long	1073367780
455	.long	2773905457
456	.long	1073383810
457	.long	1342879088
458	.long	1073399550
459	.long	2543933975
460	.long	1073415012
461	.long	1684477781
462	.long	1073430209
463	.long	3532178543
464	.long	1073445151
465	.long	1147747300
466	.long	1073459850
467	.long	1928031793
468	.long	1073474314
469	.long	2079717015
470	.long	1073488553
471	.long	4016765315
472	.long	1073502575
473	.long	3670431139
474	.long	1073516389
475	.long	3549227225
476	.long	1073530002
477	.long	11637607
478	.long	1073543422
479	.long	588220169
480	.long	1073556654
481	.long	2635407503
482	.long	1073569705
483	.long	2042029317
484	.long	1073582582
485	.long	1925128962
486	.long	1073595290
487	.long	4136375664
488	.long	1073607834
489	.long	759964600
490	.long	1073620221
491	.long	4257606771
492	.long	1073632453
493	.long	297278907
494	.long	1073644538
495	.long	3655053093
496	.long	1073656477
497	.long	2442253172
498	.long	1073668277
499	.long	1111876799
500	.long	1073679941
501	.long	3330973139
502	.long	1073691472
503	.long	3438879452
504	.long	1073702875
505	.long	3671565478
506	.long	1073714153
507	.long	1317849547
508	.long	1073725310
509	.long	1642364115
510	.long	1073736348
511	.type	cbrt_table,@object
512	.size	cbrt_table,768
513	.align 4
514D_table:
515	.long	4050900474
516	.long	1014427190
517	.long	1157977860
518	.long	1016444461
519	.long	1374568199
520	.long	1017271387
521	.long	2809163288
522	.long	1016882676
523	.long	3742377377
524	.long	1013168191
525	.long	3101606597
526	.long	1017541672
527	.long	65224358
528	.long	1017217597
529	.long	2691591250
530	.long	1017266643
531	.long	4020758549
532	.long	1017689313
533	.long	1316310992
534	.long	1018030788
535	.long	1031537856
536	.long	1014090882
537	.long	3261395239
538	.long	1016413641
539	.long	886424999
540	.long	1016313335
541	.long	3114776834
542	.long	1014195875
543	.long	1681120620
544	.long	1017825416
545	.long	1329600273
546	.long	1016625740
547	.long	465474623
548	.long	1017097119
549	.long	4251633980
550	.long	1017169077
551	.long	1986990133
552	.long	1017710645
553	.long	752958613
554	.long	1017159641
555	.long	2216216792
556	.long	1018020163
557	.long	4282860129
558	.long	1015924861
559	.long	1557627859
560	.long	1016039538
561	.long	3889219754
562	.long	1018086237
563	.long	3684996408
564	.long	1017353275
565	.long	723532103
566	.long	1017717141
567	.long	2951149676
568	.long	1012528470
569	.long	831890937
570	.long	1017830553
571	.long	1031212645
572	.long	1017387331
573	.long	2741737450
574	.long	1017604974
575	.long	2863311531
576	.long	1003776682
577	.long	4276736099
578	.long	1013153088
579	.long	4111778382
580	.long	1015673686
581	.long	1728065769
582	.long	1016413986
583	.long	2708718031
584	.long	1018078833
585	.long	1069335005
586	.long	1015291224
587	.long	700037144
588	.long	1016482032
589	.long	2904566452
590	.long	1017226861
591	.long	4074156649
592	.long	1017622651
593	.long	25019565
594	.long	1015245366
595	.long	3601952608
596	.long	1015771755
597	.long	3267129373
598	.long	1017904664
599	.long	503203103
600	.long	1014921629
601	.long	2122011730
602	.long	1018027866
603	.long	3927295461
604	.long	1014189456
605	.long	2790625147
606	.long	1016024251
607	.long	1330460186
608	.long	1016940346
609	.long	4033568463
610	.long	1015538390
611	.long	3695818227
612	.long	1017509621
613	.long	257573361
614	.long	1017208868
615	.long	3227697852
616	.long	1017337964
617	.long	234118548
618	.long	1017169577
619	.long	4009025803
620	.long	1017278524
621	.long	1948343394
622	.long	1017749310
623	.long	678398162
624	.long	1018144239
625	.long	3083864863
626	.long	1016669086
627	.long	2415453452
628	.long	1017890370
629	.long	175467344
630	.long	1017330033
631	.long	3197359580
632	.long	1010339928
633	.long	2071276951
634	.long	1015941358
635	.long	268372543
636	.long	1016737773
637	.long	938132959
638	.long	1017389108
639	.long	1816750559
640	.long	1017337448
641	.long	4119203749
642	.long	1017152174
643	.long	2578653878
644	.long	1013108497
645	.long	2470331096
646	.long	1014678606
647	.long	123855735
648	.long	1016553320
649	.long	1265650889
650	.long	1014782687
651	.long	3414398172
652	.long	1017182638
653	.long	1040773369
654	.long	1016158401
655	.long	3483628886
656	.long	1016886550
657	.long	4140499405
658	.long	1016191425
659	.long	3893477850
660	.long	1016964495
661	.long	3935319771
662	.long	1009634717
663	.long	2978982660
664	.long	1015027112
665	.long	2452709923
666	.long	1017990229
667	.long	3190365712
668	.long	1015835149
669	.long	4237588139
670	.long	1015832925
671	.long	2610678389
672	.long	1017962711
673	.long	2127316774
674	.long	1017405770
675	.long	824267502
676	.long	1017959463
677	.long	2165924042
678	.long	1017912225
679	.long	2774007076
680	.long	1013257418
681	.long	4123916326
682	.long	1017582284
683	.long	1976417958
684	.long	1016959909
685	.long	4092806412
686	.long	1017711279
687	.long	119251817
688	.long	1015363631
689	.long	3475418768
690	.long	1017675415
691	.long	1972580503
692	.long	1015470684
693	.long	815541017
694	.long	1017517969
695	.long	2429917451
696	.long	1017397776
697	.long	4062888482
698	.long	1016749897
699	.long	68284153
700	.long	1017925678
701	.long	2207779246
702	.long	1016320298
703	.long	1183466520
704	.long	1017408657
705	.long	143326427
706	.long	1017060403
707	.type	D_table,@object
708	.size	D_table,768
709	.align 4
710SCALE63:
711	.long	0
712	.long	1138753536
713	.type	SCALE63,@object
714	.size	SCALE63,8
715	.align 4
716ZERON:
717	.long	0
718	.long	2147483648
719	.type	ZERON,@object
720	.size	ZERON,8
721	.align 4
722INF:
723	.long	0
724	.long	2146435072
725	.type	INF,@object
726	.size	INF,8
727	.align 4
728NEG_INF:
729	.long	0
730	.long	4293918720
731	.type	NEG_INF,@object
732	.size	NEG_INF,8
733	.data
734	.section .note.GNU-stack, ""
735// -- Begin DWARF2 SEGMENT .eh_frame
736	.section .eh_frame,"a",@progbits
737.eh_frame_seg:
738	.align 1
739	.4byte 0x00000014
740	.8byte 0x00527a0100000000
741	.8byte 0x08070c1b01107801
742	.4byte 0x00000190
743	.4byte 0x0000001c
744	.4byte 0x0000001c
745	.4byte ..___tag_value_cbrt.1-.
746	.4byte ..___tag_value_cbrt.5-..___tag_value_cbrt.1
747	.2byte 0x0400
748	.4byte ..___tag_value_cbrt.3-..___tag_value_cbrt.1
749	.2byte 0x200e
750	.byte 0x04
751	.4byte ..___tag_value_cbrt.4-..___tag_value_cbrt.3
752	.2byte 0x080e
753	.byte 0x00
754# End
755