1/*---------------------------------------------------------------------------*
2 *  himul32.h  *
3 *                                                                           *
4 *  Copyright 2007, 2008 Nuance Communciations, Inc.                               *
5 *                                                                           *
6 *  Licensed under the Apache License, Version 2.0 (the 'License');          *
7 *  you may not use this file except in compliance with the License.         *
8 *                                                                           *
9 *  You may obtain a copy of the License at                                  *
10 *      http://www.apache.org/licenses/LICENSE-2.0                           *
11 *                                                                           *
12 *  Unless required by applicable law or agreed to in writing, software      *
13 *  distributed under the License is distributed on an 'AS IS' BASIS,        *
14 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *
15 *  See the License for the specific language governing permissions and      *
16 *  limitations under the License.                                           *
17 *                                                                           *
18 *---------------------------------------------------------------------------*/
19
20/*
21////////////////////////////////////////////////////////////////////////////
22//
23//  FILE:         himul32.cpp
24//
25//  CREATED:   11-September-99
26//
27//  DESCRIPTION:  A multiplier returns most-significant 32 bits of the 64-bit
28//      product of its two signed 32-bit integers
29//
30//
31//
32//
33//  MODIFICATIONS:
34// Revision history log
35    VSS revision history.  Do not edit by hand.
36
37    $NoKeywords: $
38
39*/
40
41/* do not use PPC. VxWorks defines the PPC in vxcpu.h */
42#if defined(_PPC_)
43
44/* Reads timebase register for a higher precision clock */
45
46asm PINLINE int32 himul32(asr_int32_t factor1, asr_int32_T factor2)
47{
48  %   reg factor1;
49  reg factor2;
50
51  mulhw   r3, factor1, factor2  # place the high order 32 bits of the product in the return register r3
52    }
53
54#else
55
56/******************************************************************
57himul32 returns the most-significant 32 bits of the 64-bit
58product of its two signed 32-bit integer arguments.
59In other words, it's the exact value of the mathematical expression
60floor( (factor1 * factor2) / 2**32 )
61This is a platform-independent definition that needs to be
62implemented in platform-specific ways.
63
64Parameters:
65factor1 -- first signed 32 bit integer
66factor2 -- second signed 32 bit integer
67
68Returns:
69the most-significant 32 bits of the multiplication results
70*********************************************************************/
71
72#if COMPILER == C_MICROSOFT
73
74#if TARGET_CPU == CPU_I86
75
76PINLINE asr_int32_t himul32(asr_int32_t factor1, asr_int32_t factor2)
77{
78  asr_int32_t retval;
79  /*
80  // The x86 imul instruction, given a single 32-bit operand, computes
81  // the signed 64-bit product of register EAX and that operand, into
82  // the register pair EDX:EAX.  So we have to move the first factor into
83  // EAX, then IMUL, then take the high 32 bits (in EDX) and move them
84  // back to EAX (because that's where a function's return value is
85  // taken from).
86  */
87  __asm {
88    mov     eax, factor1
89    imul    factor2
90    mov     retval, edx
91  }
92  return retval;
93}
94
95#else /* TARGET_CPU != CPU_I86 */
96
97    PINLINE asr_int32_t himul32(asr_int32_t factor1, asr_int32_t factor2)
98    {
99      union {
100        __int64 full;
101        struct
102        {
103          asr_int32_t lo;
104          asr_int32_t hi;
105        }
106        pieces;
107      } result;
108
109      __int64 x = factor1;
110      __int64 y = factor2;
111      result.full = x * y;
112      return result.pieces.hi;
113    }
114
115#endif /* TARGET_CPU == CPU_I86 */
116
117#else  /* ~ COMPILER != C_MICROSOFT */
118
119    /*** ANSI C ***/
120
121    PINLINE asr_int32_t himul32(asr_int32_t factor1, asr_int32_t factor2)
122    {
123
124      asr_uint32_t x = (asr_uint32_t)factor1;
125      asr_uint32_t y = (asr_uint32_t)factor2;
126      asr_uint32_t xhi, xlo, yhi, ylo;
127      asr_uint32_t hi, lo, mid;
128      asr_uint32_t oldlo, carry;
129      int sign = 0;
130
131      if (factor1 < 0)
132      {
133        x = (asr_uint32_t) - factor1;
134        sign = 1;
135      }
136      if (factor2 < 0)
137      {
138        y = (asr_uint32_t) - factor2;
139        sign = 1 - sign;
140      }
141      xhi = x >> 16;       /* <= 2**15 */
142      xlo = x & 0xffff;    /* <  2**16 */
143      yhi = y >> 16;       /* <= 2**15 */
144      ylo = y & 0xffff;    /* <  2**16 */
145
146      lo = xlo * ylo;
147      /*
148      // xhi <= 2**15 and ylo <= 2**16-1, so
149      // xhi * ylo <= 2**31 - 2**15.
150      // Ditto for yhi * xlo, so their sum is
151      // <= 2*32 - 2**16, and so the next line can't overflow.
152      */
153      mid = xhi * ylo + yhi * xlo;
154      hi = xhi * yhi;
155
156      /*
157      // Now add the low part of mid to the high part of lo, and the
158      // high part of mid to the low part of hi:
159      //                    xxxxxxxx xxxxxxxx     lo
160      //           xxxxxxxx xxxxxxxx              mid
161      //  xxxxxxxx xxxxxxxx                       hi
162      //  -----------------------------------
163      //                    xxxxxxxx xxxxxxxx     lo
164      //  xxxxxxxx xxxxxxxx                       hi
165      // Note that folding mid into lo can cause a carry.  An old trick
166      // for portable carry-detection applies:  if a and b are unsigned,
167      // their sum overflows if and only if it's less than a (or b; can
168      // check either one).
169      */
170
171      oldlo = lo;
172      lo += mid << 16;
173      carry = lo < oldlo;
174
175      hi += carry + (mid >> 16);
176
177      if (sign)
178      {
179        /*
180        // Result must be negated, which is the same as taking the
181        // complement and adding 1.  So there's a carry out of the low
182        // half if and only if it's 0 now.
183        */
184        hi = ~hi;
185        hi += lo == 0;
186      }
187
188      return (asr_int32_t)hi;
189    }
190
191#endif  /* ~ COMPILER == C_MICROSOFT */
192
193
194#endif
195