Context Navigation

← Previous Change
Next Change →

Changeset 2616 for pjproject/trunk

Timestamp:

Apr 18, 2009 2:29:28 PM (16 years ago)

Author:

bennylp

Message:

More ticket #774: optimization for siren7/siren14 codecs

Location:

pjproject/trunk

Files:

: 1 added
: 1 deleted
: 12 edited

pjmedia/src/pjmedia-codec/g7221.c (modified) (3 diffs)
third_party/build/g7221/libg7221codec.dsp (modified) (3 diffs)
third_party/g7221/common/common.c (modified) (7 diffs)
third_party/g7221/common/stl-files/basop32.c (modified) (28 diffs)
third_party/g7221/common/stl-files/basop32.h (modified) (4 diffs)
third_party/g7221/common/stl-files/basop32_i.h (added)
third_party/g7221/common/stl-files/count.c (deleted)
third_party/g7221/common/stl-files/count.h (modified) (4 diffs)
third_party/g7221/decode/coef2sam.c (modified) (5 diffs)
third_party/g7221/decode/dct4_s.c (modified) (12 diffs)
third_party/g7221/decode/decoder.c (modified) (13 diffs)
third_party/g7221/encode/dct4_a.c (modified) (7 diffs)
third_party/g7221/encode/encoder.c (modified) (23 diffs)
third_party/g7221/encode/sam2coef.c (modified) (5 diffs)

Legend:

: Unmodified
: Added
: Removed

pjproject/trunk/pjmedia/src/pjmedia-codec/g7221.c

-                      r2603
+                      r2616
+}
+#if defined(PJ_IS_LITTLE_ENDIAN) && PJ_IS_LITTLE_ENDIAN!=0
+PJ_INLINE(void) swap_bytes(pj_uint16_t *buf, unsigned count)
+{
+    pj_uint16_t *end = buf + count;
+    while (buf != end) {
+        *buf = (pj_uint16_t)((*buf << 8) | (*buf >> 8));
+        ++buf;
+    }
+}
+#else
+#define swap_bytes(buf, count)
+#endif
 /*
  * Initialize and register G722.1 codec factory to pjmedia endpoint.
 …
             output->buf);
+#if defined(PJ_IS_LITTLE_ENDIAN) && PJ_IS_LITTLE_ENDIAN!=0
+    {
+        pj_uint16_t *p, *p_end;
+        p = (pj_uint16_t*)output->buf;
+        p_end = p + codec_data->frame_size/2;
+        while (p < p_end) {
+            *p = pj_htons(*p);
+            ++p;
+        }
+    }
+#endif
+    /* Encoder output are in native host byte order, while ITU says
+     * it must be in network byte order (MSB first).
+     */
+    swap_bytes((pj_uint16_t*)output->buf, codec_data->frame_size/2);
     output->type = PJMEDIA_FRAME_TYPE_AUDIO;
 …
                          PJMEDIA_CODEC_EFRMINLEN);
+        /* Decoder requires input of 16-bits array, so we need to take care
+         * about endianness.
+        /* Decoder requires input of 16-bits array in native host byte
+         * order, while the frame received from the network are in
+         * network byte order (MSB first).
          */
+#if defined(PJ_IS_LITTLE_ENDIAN) && PJ_IS_LITTLE_ENDIAN!=0
+        {
+            pj_uint16_t *p, *p_end;
+            p = (pj_uint16_t*)input->buf;
+            p_end = p + codec_data->frame_size/2;
+            while (p < p_end) {
+                *p = pj_ntohs(*p);
+                ++p;
+            }
+        }
+#endif
+        swap_bytes((pj_uint16_t*)input->buf, codec_data->frame_size/2);
         bitobj.code_word_ptr = (Word16*)input->buf;

pjproject/trunk/third_party/build/g7221/libg7221codec.dsp

-                      r2607
+                      r2616
 F90=df.exe
 # ADD BASE CPP /nologo /W3 /GX /O2 /D "WIN32" /D "NDEBUG" /D "_MBCS" /D "_LIB" /YX /FD /c
+# ADD CPP /nologo /MD /W3 /GX /O2 /I "../.." /I "../../g7221/common" /I "../../g7221/common/stl-files" /I "../../../pjlib/include" /D "WIN32" /D "NDEBUG" /D "_MBCS" /D "_LIB" /YX /FD /c
+# ADD CPP /nologo /MD /W3 /GX /O2 /I "../.." /I "../../g7221/common" /I "../../g7221/common/stl-files" /I "../../../pjlib/include" /D "WIN32" /D "NDEBUG" /D "_MBCS" /D "_LIB" /FR /YX /FD /c
+# SUBTRACT CPP /Z<none>
 # ADD BASE RSC /l 0x409 /d "NDEBUG"
 # ADD RSC /l 0x409 /d "NDEBUG"
 …
 F90=df.exe
 # ADD BASE CPP /nologo /W3 /Gm /GX /ZI /Od /D "WIN32" /D "_DEBUG" /D "_MBCS" /D "_LIB" /YX /FD /GZ /c
 # ADD CPP /nologo /MTd /W3 /Gm /GX /ZI /Od /I "../.." /I "../../g7221/common" /I "../../g7221/common/stl-files" /I "../../../pjlib/include" /D "WIN32" /D "_DEBUG" /D "_MBCS" /D "_LIB" /YX /FD /GZ /c
+# ADD CPP /nologo /MTd /W3 /Gm /GX /ZI /Od /I "../.." /I "../../g7221/common" /I "../../g7221/common/stl-files" /I "../../../pjlib/include" /D "WIN32" /D "_DEBUG" /D "_MBCS" /D "_LIB" /FR /YX /FD /GZ /c
 # ADD BASE RSC /l 0x409 /d "_DEBUG"
 # ADD RSC /l 0x409 /d "_DEBUG"
 …
 # Begin Source File
 SOURCE="..\..\g7221\common\stl-files\count.c"
+SOURCE="..\..\g7221\common\stl-files\basop32_i.h"
 # End Source File
 # Begin Source File

pjproject/trunk/third_party/g7221/common/common.c

-                      r2563
+                      r2616
         number_of_available_bits = sub(number_of_available_bits,frame_size);
         number_of_available_bits = extract_l(L_mult0(number_of_available_bits,5));
         number_of_available_bits = shr(number_of_available_bits,3);
+        number_of_available_bits = shr_nocheck(number_of_available_bits,3);
         number_of_available_bits = add(number_of_available_bits,frame_size);
+    }
 …
+    {
         min_plus_max = add(max,min);
         two_x_number_of_available_bits = shl(number_of_available_bits,1);
+        two_x_number_of_available_bits = shl_nocheck(number_of_available_bits,1);
         temp = sub(min_plus_max,two_x_number_of_available_bits);
 …
                 if (max_rate_categories[region] > 0)
+                {
                     itemp0 = shl(max_rate_categories[region],1);
+                    itemp0 = shl_nocheck(max_rate_categories[region],1);
                     itemp1 = sub(offset,rms_index[region]);
                     itemp0 = sub(itemp1,itemp0);
 …
                 if (temp < 0)
+                {
                     itemp0 = shl(min_rate_categories[region],1);
+                    itemp0 = shl_nocheck(min_rate_categories[region],1);
                     itemp1 = sub(offset,rms_index[region]);
                     itemp0 = sub(itemp1,itemp0);
 …
+        {
             j = sub(test_offset,rms_index[region]);
             j = shr(j,1);
+            j = shr_nocheck(j,1);
             /* Ensure j is between 0 and NUM_CAT-1 */
 …
             move16();
+        }
         delta = shr(delta,1);
+        delta = shr_nocheck(delta,1);
         test(); /* for the while loop */
     } while (delta > 0);
 …
+    {
         j = sub(offset,rms_index[region]);
         j = shr(j,1);
+        j = shr_nocheck(j,1);
         /* make sure j is between 0 and NUM_CAT-1 */

pjproject/trunk/third_party/g7221/common/stl-files/basop32.c

-                      r2601
+                      r2616
 /*___________________________________________________________________________
  |                                                                           |
- |   Local Functions                                                         |
- |___________________________________________________________________________|
-*/
-Word16 saturate (Word32 L_var1);
-/*___________________________________________________________________________
- |                                                                           |
  |   Constants and Globals                                                   |
  |___________________________________________________________________________|
 */
+Flag Overflow = 0;
+Flag Carry = 0;
+#if INCLUDE_UNSAFE
+Flag g7221_Overflow = 0;
+Flag g7221_Carry = 0;
+#endif
 /*___________________________________________________________________________
 …
  |___________________________________________________________________________|
 */
+/*___________________________________________________________________________
+ |                                                                           |
+ |   Function Name : saturate                                                |
+ |                                                                           |
+ |   Purpose :                                                               |
+ |                                                                           |
+ |    Limit the 32 bit input to the range of a 16 bit word.                  |
+ |                                                                           |
+ |   Inputs :                                                                |
+ |                                                                           |
+ |    L_var1                                                                 |
+ |             32 bit long signed integer (Word32) whose value falls in the  |
+ |             range : 0x8000 0000 <= L_var1 <= 0x7fff ffff.                 |
+/*___________________________________________________________________________
+ |                                                                           |
+ |   Function Name : shr                                                     |
+ |                                                                           |
+ |   Purpose :                                                               |
+ |                                                                           |
+ |   Arithmetically shift the 16 bit input var1 right var2 positions with    |
+ |   sign extension. If var2 is negative, arithmetically shift var1 left by  |
+ |   -var2 with sign extension. Saturate the result in case of underflows or |
+ |   overflows.                                                              |
+ |                                                                           |
+ |   Complexity weight : 1                                                   |
+ |                                                                           |
+ |   Inputs :                                                                |
+ |                                                                           |
+ |    var1                                                                   |
+ |             16 bit short signed integer (Word16) whose value falls in the |
+ |             range : 0xffff 8000 <= var1 <= 0x0000 7fff.                   |
+ |                                                                           |
+ |    var2                                                                   |
+ |             16 bit short signed integer (Word16) whose value falls in the |
+ |             range : 0xffff 8000 <= var1 <= 0x0000 7fff.                   |
  |                                                                           |
  |   Outputs :                                                               |
 …
  |___________________________________________________________________________|
 */
+Word16 saturate (Word32 L_var1)
+{
+    Word16 var_out;
+    if (L_var1 > 0X00007fffL)
+    {
+        Overflow = 1;
+        var_out = MAX_16;
+    }
+    else if (L_var1 < (Word32) 0xffff8000L)
+    {
+        Overflow = 1;
+        var_out = MIN_16;
+Word16 shr (Word16 var1, Word16 var2)
+{
+    if (var2 < 0)
+    {
+        if (var2 < -16)
+            var2 = -16;
+        return shl_nocheck(var1, (Word16) -var2);
+    }
     else
+    {
+        var_out = extract_l (L_var1);
+#if (WMOPS)
+        multiCounter[currCounter].extract_l--;
+#endif
+    }
+    return (var_out);
+}
+/* ------------------------- End of saturate() ------------------------- */
+/*___________________________________________________________________________
+ |                                                                           |
+ |   Function Name : add                                                     |
+ |                                                                           |
+ |   Purpose :                                                               |
+ |                                                                           |
+ |    Performs the addition (var1+var2) with overflow control and saturation;|
+ |    the 16 bit result is set at +32767 when overflow occurs or at -32768   |
+ |    when underflow occurs.                                                 |
+ |                                                                           |
+ |   Complexity weight : 1                                                   |
+ |                                                                           |
+ |   Inputs :                                                                |
+ |                                                                           |
+ |    var1                                                                   |
+ |             16 bit short signed integer (Word16) whose value falls in the |
+ |             range : 0xffff 8000 <= var1 <= 0x0000 7fff.                   |
+ |                                                                           |
+ |    var2                                                                   |
+ |             16 bit short signed integer (Word16) whose value falls in the |
+ |             range : 0xffff 8000 <= var1 <= 0x0000 7fff.                   |
+ |                                                                           |
+ |   Outputs :                                                               |
+ |                                                                           |
+ |    none                                                                   |
+ |                                                                           |
+ |   Return Value :                                                          |
+ |                                                                           |
+ |    var_out                                                                |
+ |             16 bit short signed integer (Word16) whose value falls in the |
+ |             range : 0xffff 8000 <= var_out <= 0x0000 7fff.                |
+ |___________________________________________________________________________|
+*/
+Word16 add (Word16 var1, Word16 var2)
+{
+    Word16 var_out;
+    Word32 L_sum;
+    L_sum = (Word32) var1 + var2;
+    var_out = saturate (L_sum);
+#if (WMOPS)
+    multiCounter[currCounter].add++;
+#endif
+    return (var_out);
+}
+/* ------------------------- End of add() ------------------------- */
+/*___________________________________________________________________________
+ |                                                                           |
+ |   Function Name : sub                                                     |
+ |                                                                           |
+ |   Purpose :                                                               |
+ |                                                                           |
+ |    Performs the subtraction (var1+var2) with overflow control and satu-   |
+ |    ration; the 16 bit result is set at +32767 when overflow occurs or at  |
+ |    -32768 when underflow occurs.                                          |
+ |                                                                           |
+ |   Complexity weight : 1                                                   |
+ |                                                                           |
+ |   Inputs :                                                                |
+ |                                                                           |
+ |    var1                                                                   |
+ |             16 bit short signed integer (Word16) whose value falls in the |
+ |             range : 0xffff 8000 <= var1 <= 0x0000 7fff.                   |
+ |                                                                           |
+ |    var2                                                                   |
+ |             16 bit short signed integer (Word16) whose value falls in the |
+ |             range : 0xffff 8000 <= var1 <= 0x0000 7fff.                   |
+ |                                                                           |
+ |   Outputs :                                                               |
+ |                                                                           |
+ |    none                                                                   |
+ |                                                                           |
+ |   Return Value :                                                          |
+ |                                                                           |
+ |    var_out                                                                |
+ |             16 bit short signed integer (Word16) whose value falls in the |
+ |             range : 0xffff 8000 <= var_out <= 0x0000 7fff.                |
+ |___________________________________________________________________________|
+*/
+Word16 sub (Word16 var1, Word16 var2)
+{
+    Word16 var_out;
+    Word32 L_diff;
+    L_diff = (Word32) var1 - var2;
+    var_out = saturate (L_diff);
+#if (WMOPS)
+    multiCounter[currCounter].sub++;
+#endif
+    return (var_out);
+}
+/* ------------------------- End of sub() ------------------------- */
+/*___________________________________________________________________________
+ |                                                                           |
+ |   Function Name : abs_s                                                   |
+ |                                                                           |
+ |   Purpose :                                                               |
+ |                                                                           |
+ |    Absolute value of var1; abs_s(-32768) = 32767.                         |
+ |                                                                           |
+ |   Complexity weight : 1                                                   |
+ |                                                                           |
+ |   Inputs :                                                                |
+ |                                                                           |
+ |    var1                                                                   |
+ |             16 bit short signed integer (Word16) whose value falls in the |
+ |             range : 0xffff 8000 <= var1 <= 0x0000 7fff.                   |
+ |                                                                           |
+ |   Outputs :                                                               |
+ |                                                                           |
+ |    none                                                                   |
+ |                                                                           |
+ |   Return Value :                                                          |
+ |                                                                           |
+ |    var_out                                                                |
+ |             16 bit short signed integer (Word16) whose value falls in the |
+ |             range : 0x0000 0000 <= var_out <= 0x0000 7fff.                |
+ |___________________________________________________________________________|
+*/
+Word16 abs_s (Word16 var1)
+{
+    Word16 var_out;
+    if (var1 == (Word16) 0X8000)
+    {
+        var_out = MAX_16;
+    }
+    else
+    {
+        if (var1 < 0)
+        {
+            var_out = -var1;
+        }
+        else
+        {
+            var_out = var1;
+        }
+    }
+#if (WMOPS)
+    multiCounter[currCounter].abs_s++;
+#endif
+    return (var_out);
+}
+/* ------------------------- End of abs_s() ------------------------- */
+        return shr_nocheck(var1, var2);
+    }
+}
+/* ------------------------- End of shr() ------------------------- */
 …
 Word16 shl (Word16 var1, Word16 var2)
+{
-    Word16 var_out;
-    Word32 result;
     if (var2 < 0)
+    {
+        if (var2 < -16)
+            var2 = -16;
+        var_out = shr (var1, (Word16) -var2);
+#if (WMOPS)
+        multiCounter[currCounter].shr--;
+#endif
+        return shr_nocheck(var1, (Word16) -var2);
+    }
     else
+    {
+        result = (Word32) var1 *((Word32) 1 << var2);
+        if ((var2 > 15 && var1 != 0) || (result != (Word32) ((Word16) result)))
+        {
+            Overflow = 1;
+            var_out = (var1 > 0) ? MAX_16 : MIN_16;
+        }
+        else
+        {
+            var_out = extract_l (result);
+#if (WMOPS)
+            multiCounter[currCounter].extract_l--;
+#endif
+        }
+    }
+#if (WMOPS)
+    multiCounter[currCounter].shl++;
+#endif
+    return (var_out);
+        return shl_nocheck(var1, var2);
+    }
+}
 /* ------------------------- End of shl() ------------------------- */
-/*___________________________________________________________________________
- |                                                                           |
- |   Function Name : shr                                                     |
- |                                                                           |
- |   Purpose :                                                               |
- |                                                                           |
- |   Arithmetically shift the 16 bit input var1 right var2 positions with    |
- |   sign extension. If var2 is negative, arithmetically shift var1 left by  |
- |   -var2 with sign extension. Saturate the result in case of underflows or |
- |   overflows.                                                              |
- |                                                                           |
- |   Complexity weight : 1                                                   |
- |                                                                           |
- |   Inputs :                                                                |
- |                                                                           |
- |    var1                                                                   |
- |             16 bit short signed integer (Word16) whose value falls in the |
- |             range : 0xffff 8000 <= var1 <= 0x0000 7fff.                   |
- |                                                                           |
- |    var2                                                                   |
- |             16 bit short signed integer (Word16) whose value falls in the |
- |             range : 0xffff 8000 <= var1 <= 0x0000 7fff.                   |
- |                                                                           |
- |   Outputs :                                                               |
- |                                                                           |
- |    none                                                                   |
- |                                                                           |
- |   Return Value :                                                          |
- |                                                                           |
- |    var_out                                                                |
- |             16 bit short signed integer (Word16) whose value falls in the |
- |             range : 0xffff 8000 <= var_out <= 0x0000 7fff.                |
- |___________________________________________________________________________|
-*/
-Word16 shr (Word16 var1, Word16 var2)
+{
-    Word16 var_out;
-    if (var2 < 0)
+    {
-        if (var2 < -16)
-            var2 = -16;
-        var_out = shl (var1, (Word16) -var2);
-#if (WMOPS)
-        multiCounter[currCounter].shl--;
-#endif
+    }
-    else
+    {
-        if (var2 >= 15)
+        {
-            var_out = (var1 < 0) ? -1 : 0;
+        }
-        else
+        {
-            if (var1 < 0)
+            {
-                var_out = ~((~var1) >> var2);
+            }
-            else
+            {
-                var_out = var1 >> var2;
+            }
+        }
+    }
-#if (WMOPS)
-    multiCounter[currCounter].shr++;
-#endif
-    return (var_out);
+}
-/* ------------------------- End of shr() ------------------------- */
 …
 /*___________________________________________________________________________
  |                                                                           |
+ |   Function Name : L_mult                                                  |
+ |                                                                           |
+ |   Purpose :                                                               |
+ |                                                                           |
+ |   L_mult is the 32 bit result of the multiplication of var1 times var2    |
+ |   with one shift left i.e.:                                               |
+ |        L_mult(var1,var2) = L_shl((var1 times var2),1) and                 |
+ |        L_mult(-32768,-32768) = 2147483647.                                |
+ |   Function Name : L_msu                                                   |
+ |                                                                           |
+ |   Purpose :                                                               |
+ |                                                                           |
+ |   Multiply var1 by var2 and shift the result left by 1. Subtract the 32   |
+ |   bit result to L_var3 with saturation, return a 32 bit result:           |
+ |        L_msu(L_var3,var1,var2) = L_sub(L_var3,L_mult(var1,var2)).         |
  |                                                                           |
  |   Complexity weight : 1                                                   |
  |                                                                           |
  |   Inputs :                                                                |
+ |                                                                           |
+ |    L_var3   32 bit long signed integer (Word32) whose value falls in the  |
+ |             range : 0x8000 0000 <= L_var3 <= 0x7fff ffff.                 |
  |                                                                           |
  |    var1                                                                   |
 …
  |___________________________________________________________________________|
 */
+Word32 L_mult (Word16 var1, Word16 var2)
+{
+    Word32 L_var_out;
+    L_var_out = (Word32) var1 *(Word32) var2;
+    if (L_var_out != (Word32) 0x40000000L)
+    {
+        L_var_out *= 2;
+    }
+    else
+    {
+        Overflow = 1;
+        L_var_out = MAX_32;
+    }
+#if (WMOPS)
+    multiCounter[currCounter].L_mult++;
+#endif
+    return (L_var_out);
+}
+/* ------------------------- End of L_mult() ------------------------- */
+/*___________________________________________________________________________
+ |                                                                           |
+ |   Function Name : negate                                                  |
+ |                                                                           |
+ |   Purpose :                                                               |
+ |                                                                           |
+ |   Negate var1 with saturation, saturate in the case where input is -32768:|
+ |                negate(var1) = sub(0,var1).                                |
+ |                                                                           |
+ |   Complexity weight : 1                                                   |
+ |                                                                           |
+ |   Inputs :                                                                |
+ |                                                                           |
+ |    var1                                                                   |
+ |             16 bit short signed integer (Word16) whose value falls in the |
+ |             range : 0xffff 8000 <= var1 <= 0x0000 7fff.                   |
+ |                                                                           |
+ |   Outputs :                                                               |
+ |                                                                           |
+ |    none                                                                   |
+ |                                                                           |
+ |   Return Value :                                                          |
+ |                                                                           |
+ |    var_out                                                                |
+ |             16 bit short signed integer (Word16) whose value falls in the |
+ |             range : 0xffff 8000 <= var_out <= 0x0000 7fff.                |
+ |___________________________________________________________________________|
+*/
+Word16 negate (Word16 var1)
+{
+    Word16 var_out;
+    var_out = (var1 == MIN_16) ? MAX_16 : -var1;
+#if (WMOPS)
+    multiCounter[currCounter].negate++;
+#endif
+    return (var_out);
+}
+/* ------------------------- End of negate() ------------------------- */
+/*___________________________________________________________________________
+ |                                                                           |
+ |   Function Name : extract_h                                               |
+ |                                                                           |
+ |   Purpose :                                                               |
+ |                                                                           |
+ |   Return the 16 MSB of L_var1.                                            |
+ |                                                                           |
+ |   Complexity weight : 1                                                   |
+ |                                                                           |
+ |   Inputs :                                                                |
+ |                                                                           |
+ |    L_var1                                                                 |
+ |             32 bit long signed integer (Word32 ) whose value falls in the |
+ |             range : 0x8000 0000 <= L_var1 <= 0x7fff ffff.                 |
+ |                                                                           |
+ |   Outputs :                                                               |
+ |                                                                           |
+ |    none                                                                   |
+ |                                                                           |
+ |   Return Value :                                                          |
+ |                                                                           |
+ |    var_out                                                                |
+ |             16 bit short signed integer (Word16) whose value falls in the |
+ |             range : 0xffff 8000 <= var_out <= 0x0000 7fff.                |
+ |___________________________________________________________________________|
+*/
+Word16 extract_h (Word32 L_var1)
+{
+    Word16 var_out;
+    var_out = (Word16) (L_var1 >> 16);
+#if (WMOPS)
+    multiCounter[currCounter].extract_h++;
+#endif
+    return (var_out);
+}
+/* ------------------------- End of extract_h() ------------------------- */
+/*___________________________________________________________________________
+ |                                                                           |
+ |   Function Name : extract_l                                               |
+ |                                                                           |
+ |   Purpose :                                                               |
+ |                                                                           |
+ |   Return the 16 LSB of L_var1.                                            |
+ |                                                                           |
+ |   Complexity weight : 1                                                   |
+ |                                                                           |
+ |   Inputs :                                                                |
+ |                                                                           |
+ |    L_var1                                                                 |
+ |             32 bit long signed integer (Word32 ) whose value falls in the |
+ |             range : 0x8000 0000 <= L_var1 <= 0x7fff ffff.                 |
+ |                                                                           |
+ |   Outputs :                                                               |
+ |                                                                           |
+ |    none                                                                   |
+ |                                                                           |
+ |   Return Value :                                                          |
+ |                                                                           |
+ |    var_out                                                                |
+ |             16 bit short signed integer (Word16) whose value falls in the |
+ |             range : 0xffff 8000 <= var_out <= 0x0000 7fff.                |
+ |___________________________________________________________________________|
+*/
+Word16 extract_l (Word32 L_var1)
+{
+    Word16 var_out;
+    var_out = (Word16) L_var1;
+#if (WMOPS)
+    multiCounter[currCounter].extract_l++;
+#endif
+    return (var_out);
+}
+/* ------------------------- End of extract_l() ------------------------- */
+/*___________________________________________________________________________
+ |                                                                           |
+ |   Function Name : round                                                   |
+ |                                                                           |
+ |   Purpose :                                                               |
+ |                                                                           |
+ |   Round the lower 16 bits of the 32 bit input number into the MS 16 bits  |
+ |   with saturation. Shift the resulting bits right by 16 and return the 16 |
+ |   bit number:                                                             |
+ |               round(L_var1) = extract_h(L_add(L_var1,32768))              |
+ |                                                                           |
+ |   Complexity weight : 1                                                   |
+ |                                                                           |
+ |   Inputs :                                                                |
+ |                                                                           |
+ |    L_var1                                                                 |
+ |             32 bit long signed integer (Word32 ) whose value falls in the |
+ |             range : 0x8000 0000 <= L_var1 <= 0x7fff ffff.                 |
+ |                                                                           |
+ |   Outputs :                                                               |
+ |                                                                           |
+ |    none                                                                   |
+ |                                                                           |
+ |   Return Value :                                                          |
+ |                                                                           |
+ |    var_out                                                                |
+ |             16 bit short signed integer (Word16) whose value falls in the |
+ |             range : 0xffff 8000 <= var_out <= 0x0000 7fff.                |
+ |___________________________________________________________________________|
+*/
+Word16 itu_round (Word32 L_var1)
+{
+    Word16 var_out;
+    Word32 L_rounded;
+    L_rounded = L_add (L_var1, (Word32) 0x00008000L);
+#if (WMOPS)
+    multiCounter[currCounter].L_add--;
+#endif
+    var_out = extract_h (L_rounded);
+#if (WMOPS)
+    multiCounter[currCounter].extract_h--;
+    multiCounter[currCounter].round++;
+#endif
+    return (var_out);
+}
+/* ------------------------- End of round() ------------------------- */
+/*___________________________________________________________________________
+ |                                                                           |
+ |   Function Name : L_mac                                                   |
+ |                                                                           |
+ |   Purpose :                                                               |
+ |                                                                           |
+ |   Multiply var1 by var2 and shift the result left by 1. Add the 32 bit    |
+ |   result to L_var3 with saturation, return a 32 bit result:               |
+ |        L_mac(L_var3,var1,var2) = L_add(L_var3,L_mult(var1,var2)).         |
+ |                                                                           |
+ |   Complexity weight : 1                                                   |
+ |                                                                           |
+ |   Inputs :                                                                |
+ |                                                                           |
+ |    L_var3   32 bit long signed integer (Word32) whose value falls in the  |
+ |             range : 0x8000 0000 <= L_var3 <= 0x7fff ffff.                 |
+ |                                                                           |
+ |    var1                                                                   |
+ |             16 bit short signed integer (Word16) whose value falls in the |
+ |             range : 0xffff 8000 <= var1 <= 0x0000 7fff.                   |
+ |                                                                           |
+ |    var2                                                                   |
+ |             16 bit short signed integer (Word16) whose value falls in the |
+ |             range : 0xffff 8000 <= var1 <= 0x0000 7fff.                   |
+ |                                                                           |
+ |   Outputs :                                                               |
+ |                                                                           |
+ |    none                                                                   |
+ |                                                                           |
+ |   Return Value :                                                          |
+ |                                                                           |
+ |    L_var_out                                                              |
+ |             32 bit long signed integer (Word32) whose value falls in the  |
+ |             range : 0x8000 0000 <= L_var_out <= 0x7fff ffff.              |
+ |___________________________________________________________________________|
+*/
+Word32 L_mac (Word32 L_var3, Word16 var1, Word16 var2)
+Word32 L_msu (Word32 L_var3, Word16 var1, Word16 var2)
+{
     Word32 L_var_out;
 …
     multiCounter[currCounter].L_mult--;
 #endif
-    L_var_out = L_add (L_var3, L_product);
-#if (WMOPS)
-    multiCounter[currCounter].L_add--;
-    multiCounter[currCounter].L_mac++;
-#endif
-    return (L_var_out);
+}
-/* ------------------------- End of L_mac() ------------------------- */
-/*___________________________________________________________________________
- |                                                                           |
- |   Function Name : L_msu                                                   |
- |                                                                           |
- |   Purpose :                                                               |
- |                                                                           |
- |   Multiply var1 by var2 and shift the result left by 1. Subtract the 32   |
- |   bit result to L_var3 with saturation, return a 32 bit result:           |
- |        L_msu(L_var3,var1,var2) = L_sub(L_var3,L_mult(var1,var2)).         |
- |                                                                           |
- |   Complexity weight : 1                                                   |
- |                                                                           |
- |   Inputs :                                                                |
- |                                                                           |
- |    L_var3   32 bit long signed integer (Word32) whose value falls in the  |
- |             range : 0x8000 0000 <= L_var3 <= 0x7fff ffff.                 |
- |                                                                           |
- |    var1                                                                   |
- |             16 bit short signed integer (Word16) whose value falls in the |
- |             range : 0xffff 8000 <= var1 <= 0x0000 7fff.                   |
- |                                                                           |
- |    var2                                                                   |
- |             16 bit short signed integer (Word16) whose value falls in the |
- |             range : 0xffff 8000 <= var1 <= 0x0000 7fff.                   |
- |                                                                           |
- |   Outputs :                                                               |
- |                                                                           |
- |    none                                                                   |
- |                                                                           |
- |   Return Value :                                                          |
- |                                                                           |
- |    L_var_out                                                              |
- |             32 bit long signed integer (Word32) whose value falls in the  |
- |             range : 0x8000 0000 <= L_var_out <= 0x7fff ffff.              |
- |___________________________________________________________________________|
-*/
-Word32 L_msu (Word32 L_var3, Word16 var1, Word16 var2)
+{
-    Word32 L_var_out;
-    Word32 L_product;
-    L_product = L_mult (var1, var2);
-#if (WMOPS)
-    multiCounter[currCounter].L_mult--;
-#endif
     L_var_out = L_sub (L_var3, L_product);
 #if (WMOPS)
 …
 /* ------------------------- End of L_msu() ------------------------- */
+#if INCLUDE_UNSAFE
 /*___________________________________________________________________________
  |                                                                           |
 …
     return (L_var_out);
+}
+#endif
 /* ------------------------- End of L_macNs() ------------------------- */
+#if INCLUDE_UNSAFE
 /*___________________________________________________________________________
  |                                                                           |
 …
     return (L_var_out);
+}
+#endif
 /* ------------------------- End of L_msuNs() ------------------------- */
+/*___________________________________________________________________________
+ |                                                                           |
+ |   Function Name : L_add                                                   |
+ |                                                                           |
+ |   Purpose :                                                               |
+ |                                                                           |
+ |   32 bits addition of the two 32 bits variables (L_var1+L_var2) with      |
+ |   overflow control and saturation; the result is set at +2147483647 when  |
+ |   overflow occurs or at -2147483648 when underflow occurs.                |
+ |                                                                           |
+ |   Complexity weight : 2                                                   |
+ |                                                                           |
+ |   Inputs :                                                                |
+ |                                                                           |
+ |    L_var1   32 bit long signed integer (Word32) whose value falls in the  |
+ |             range : 0x8000 0000 <= L_var3 <= 0x7fff ffff.                 |
+ |                                                                           |
+ |    L_var2   32 bit long signed integer (Word32) whose value falls in the  |
+ |             range : 0x8000 0000 <= L_var3 <= 0x7fff ffff.                 |
+ |                                                                           |
+ |   Outputs :                                                               |
+ |                                                                           |
+ |    none                                                                   |
+ |                                                                           |
+ |   Return Value :                                                          |
+ |                                                                           |
+ |    L_var_out                                                              |
+ |             32 bit long signed integer (Word32) whose value falls in the  |
+ |             range : 0x8000 0000 <= L_var_out <= 0x7fff ffff.              |
+ |___________________________________________________________________________|
+*/
+Word32 L_add (Word32 L_var1, Word32 L_var2)
+{
+    Word32 L_var_out;
+    L_var_out = L_var1 + L_var2;
+    if (((L_var1 ^ L_var2) & MIN_32) == 0)
+    {
+        if ((L_var_out ^ L_var1) & MIN_32)
+        {
+            L_var_out = (L_var1 < 0) ? MIN_32 : MAX_32;
+            Overflow = 1;
+        }
+    }
+#if (WMOPS)
+    multiCounter[currCounter].L_add++;
+#endif
+    return (L_var_out);
+}
+/* ------------------------- End of L_add() ------------------------- */
+/*___________________________________________________________________________
+ |                                                                           |
+ |   Function Name : L_sub                                                   |
+ |                                                                           |
+ |   Purpose :                                                               |
+ |                                                                           |
+ |   32 bits subtraction of the two 32 bits variables (L_var1-L_var2) with   |
+ |   overflow control and saturation; the result is set at +2147483647 when  |
+ |   overflow occurs or at -2147483648 when underflow occurs.                |
+ |                                                                           |
+ |   Complexity weight : 2                                                   |
+ |                                                                           |
+ |   Inputs :                                                                |
+ |                                                                           |
+ |    L_var1   32 bit long signed integer (Word32) whose value falls in the  |
+ |             range : 0x8000 0000 <= L_var3 <= 0x7fff ffff.                 |
+ |                                                                           |
+ |    L_var2   32 bit long signed integer (Word32) whose value falls in the  |
+ |             range : 0x8000 0000 <= L_var3 <= 0x7fff ffff.                 |
+ |                                                                           |
+ |   Outputs :                                                               |
+ |                                                                           |
+ |    none                                                                   |
+ |                                                                           |
+ |   Return Value :                                                          |
+ |                                                                           |
+ |    L_var_out                                                              |
+ |             32 bit long signed integer (Word32) whose value falls in the  |
+ |             range : 0x8000 0000 <= L_var_out <= 0x7fff ffff.              |
+ |___________________________________________________________________________|
+*/
+Word32 L_sub (Word32 L_var1, Word32 L_var2)
+{
+    Word32 L_var_out;
+    L_var_out = L_var1 - L_var2;
+    if (((L_var1 ^ L_var2) & MIN_32) != 0)
+    {
+        if ((L_var_out ^ L_var1) & MIN_32)
+        {
+            L_var_out = (L_var1 < 0L) ? MIN_32 : MAX_32;
+            Overflow = 1;
+        }
+    }
+#if (WMOPS)
+    multiCounter[currCounter].L_sub++;
+#endif
+    return (L_var_out);
+}
+/* ------------------------- End of L_sub() ------------------------- */
+#if INCLUDE_UNSAFE
 /*___________________________________________________________________________
  |                                                                           |
 …
     Flag carry_int = 0;
     L_var_out = L_var1 + L_var2 + Carry;
+    L_var_out = L_var1 + L_var2 + GET_CARRY();
     L_test = L_var1 + L_var2;
 …
     if ((L_var1 > 0) && (L_var2 > 0) && (L_test < 0))
+    {
         Overflow = 1;
+        SET_OVERFLOW(1);
         carry_int = 0;
+    }
 …
             if (L_test >= 0)
+            {
                 Overflow = 1;
+                SET_OVERFLOW(1);
                 carry_int = 1;
+            }
             else
+            {
                 Overflow = 0;
+                SET_OVERFLOW(0);
                 carry_int = 1;
+            }
 …
             if (((L_var1 ^ L_var2) < 0) && (L_test >= 0))
+            {
                 Overflow = 0;
+                SET_OVERFLOW(0);
                 carry_int = 1;
+            }
             else
+            {
                 Overflow = 0;
+                SET_OVERFLOW(0);
                 carry_int = 0;
+            }
 …
+    }
     if (Carry)
+    if (GET_CARRY())
+    {
         if (L_test == MAX_32)
+        {
             Overflow = 1;
             Carry = carry_int;
+            SET_OVERFLOW(1);
+            SET_CARRY(carry_int);
+        }
         else
 …
             if (L_test == (Word32) 0xFFFFFFFFL)
+            {
                 Carry = 1;
+                SET_CARRY(1);
+            }
             else
+            {
                 Carry = carry_int;
+                SET_CARRY(carry_int);
+            }
+        }
 …
     else
+    {
         Carry = carry_int;
+        SET_CARRY(carry_int);
+    }
 …
     return (L_var_out);
+}
+#endif
 /* ------------------------- End of L_add_c() ------------------------- */
+#if INCLUDE_UNSAFE
 /*___________________________________________________________________________
  |                                                                           |
 …
     Flag carry_int = 0;
     if (Carry)
+    {
         Carry = 0;
+    if (GET_CARRY())
+    {
+        SET_CARRY(0);
         if (L_var2 != MIN_32)
+        {
 …
             if (L_var1 > 0L)
+            {
                 Overflow = 1;
                 Carry = 0;
+                SET_OVERFLOW(1);
+                SET_CARRY(0);
+            }
+        }
 …
         if ((L_test < 0) && (L_var1 > 0) && (L_var2 < 0))
+        {
             Overflow = 1;
+            SET_OVERFLOW(1);
             carry_int = 0;
+        }
         else if ((L_test > 0) && (L_var1 < 0) && (L_var2 > 0))
+        {
             Overflow = 1;
+            SET_OVERFLOW(1);
             carry_int = 1;
+        }
         else if ((L_test > 0) && ((L_var1 ^ L_var2) > 0))
+        {
             Overflow = 0;
+            SET_OVERFLOW(0);
             carry_int = 1;
+        }
         if (L_test == MIN_32)
+        {
             Overflow = 1;
             Carry = carry_int;
+            SET_OVERFLOW(1);
+            SET_CARRY(carry_int);
+        }
         else
+        {
             Carry = carry_int;
+            SET_CARRY(carry_int);
+        }
+    }
 …
     return (L_var_out);
+}
+#endif
 /* ------------------------- End of L_sub_c() ------------------------- */
 …
 /* ------------------------- End of mult_r() ------------------------- */
-/*___________________________________________________________________________
- |                                                                           |
- |   Function Name : L_shl                                                   |
- |                                                                           |
- |   Purpose :                                                               |
- |                                                                           |
- |   Arithmetically shift the 32 bit input L_var1 left var2 positions. Zero  |
- |   fill the var2 LSB of the result. If var2 is negative, arithmetically    |
- |   shift L_var1 right by -var2 with sign extension. Saturate the result in |
- |   case of underflows or overflows.                                        |
- |                                                                           |
- |   Complexity weight : 2                                                   |
- |                                                                           |
- |   Inputs :                                                                |
- |                                                                           |
- |    L_var1   32 bit long signed integer (Word32) whose value falls in the  |
- |             range : 0x8000 0000 <= L_var3 <= 0x7fff ffff.                 |
- |                                                                           |
- |    var2                                                                   |
- |             16 bit short signed integer (Word16) whose value falls in the |
- |             range : 0xffff 8000 <= var1 <= 0x0000 7fff.                   |
- |                                                                           |
- |   Outputs :                                                               |
- |                                                                           |
- |    none                                                                   |
- |                                                                           |
- |   Return Value :                                                          |
- |                                                                           |
- |    L_var_out                                                              |
- |             32 bit long signed integer (Word32) whose value falls in the  |
- |             range : 0x8000 0000 <= L_var_out <= 0x7fff ffff.              |
- |___________________________________________________________________________|
-*/
-Word32 L_shl (Word32 L_var1, Word16 var2)
+{
-    Word32 L_var_out;
-    if (var2 <= 0)
+    {
-        if (var2 < -32)
-            var2 = -32;
-        L_var_out = L_shr (L_var1, (Word16) -var2);
-#if (WMOPS)
-        multiCounter[currCounter].L_shr--;
-#endif
+    }
-    else
+    {
-        for (; var2 > 0; var2--)
+        {
-            if (L_var1 > (Word32) 0X3fffffffL)
+            {
-                Overflow = 1;
-                L_var_out = MAX_32;
-                break;
+            }
-            else
+            {
-                if (L_var1 < (Word32) 0xc0000000L)
+                {
-                    Overflow = 1;
-                    L_var_out = MIN_32;
-                    break;
+                }
+            }
-            L_var1 *= 2;
-            L_var_out = L_var1;
+        }
+    }
-#if (WMOPS)
-    multiCounter[currCounter].L_shl++;
-#endif
-    return (L_var_out);
+}
-/* ------------------------- End of L_shl() ------------------------- */
-/*___________________________________________________________________________
- |                                                                           |
- |   Function Name : L_shr                                                   |
- |                                                                           |
- |   Purpose :                                                               |
- |                                                                           |
- |   Arithmetically shift the 32 bit input L_var1 right var2 positions with  |
- |   sign extension. If var2 is negative, arithmetically shift L_var1 left   |
- |   by -var2 and zero fill the -var2 LSB of the result. Saturate the result |
- |   in case of underflows or overflows.                                     |
- |                                                                           |
- |   Complexity weight : 2                                                   |
- |                                                                           |
- |   Inputs :                                                                |
- |                                                                           |
- |    L_var1   32 bit long signed integer (Word32) whose value falls in the  |
- |             range : 0x8000 0000 <= L_var3 <= 0x7fff ffff.                 |
- |                                                                           |
- |    var2                                                                   |
- |             16 bit short signed integer (Word16) whose value falls in the |
- |             range : 0xffff 8000 <= var1 <= 0x0000 7fff.                   |
- |                                                                           |
- |   Outputs :                                                               |
- |                                                                           |
- |    none                                                                   |
- |                                                                           |
- |   Return Value :                                                          |
- |                                                                           |
- |    L_var_out                                                              |
- |             32 bit long signed integer (Word32) whose value falls in the  |
- |             range : 0x8000 0000 <= L_var_out <= 0x7fff ffff.              |
- |___________________________________________________________________________|
-*/
-Word32 L_shr (Word32 L_var1, Word16 var2)
+{
-    Word32 L_var_out;
-    if (var2 < 0)
+    {
-        if (var2 < -32)
-            var2 = -32;
-        L_var_out = L_shl (L_var1, (Word16) -var2);
-#if (WMOPS)
-        multiCounter[currCounter].L_shl--;
-#endif
+    }
-    else
+    {
-        if (var2 >= 31)
+        {
-            L_var_out = (L_var1 < 0L) ? -1 : 0;
+        }
-        else
+        {
-            if (L_var1 < 0)
+            {
-                L_var_out = ~((~L_var1) >> var2);
+            }
-            else
+            {
-                L_var_out = L_var1 >> var2;
+            }
+        }
+    }
-#if (WMOPS)
-    multiCounter[currCounter].L_shr++;
-#endif
-    return (L_var_out);
+}
-/* ------------------------- End of L_shr() ------------------------- */
 …
 /*___________________________________________________________________________
  |                                                                           |
- |   Function Name : L_sat                                                   |
- |                                                                           |
- |   Purpose :                                                               |
- |                                                                           |
- |    32 bit L_var1 is set to 2147483647 if an overflow occured or to        |
- |    -2147483648 if an underflow occured on the most recent L_add_c,        |
- |    L_sub_c, L_macNs or L_msuNs operations. The carry and overflow values  |
- |    are binary values which can be tested and assigned values.             |
- |                                                                           |
- |   Complexity weight : 4                                                   |
- |                                                                           |
- |   Inputs :                                                                |
- |                                                                           |
- |    L_var1                                                                 |
- |             32 bit long signed integer (Word32) whose value falls in the  |
- |             range : 0x8000 0000 <= var1 <= 0x7fff ffff.                   |
- |                                                                           |
- |   Outputs :                                                               |
- |                                                                           |
- |    none                                                                   |
- |                                                                           |
- |   Return Value :                                                          |
- |                                                                           |
- |    L_var_out                                                              |
- |             32 bit long signed integer (Word32) whose value falls in the  |
- |             range : 0x8000 0000 <= var_out <= 0x7fff ffff.                |
- |___________________________________________________________________________|
-*/
-Word32 L_sat (Word32 L_var1)
+{
-    Word32 L_var_out;
-    L_var_out = L_var1;
-    if (Overflow)
+    {
-        if (Carry)
+        {
-            L_var_out = MIN_32;
+        }
-        else
+        {
-            L_var_out = MAX_32;
+        }
-        Carry = 0;
-        Overflow = 0;
+    }
-#if (WMOPS)
-    multiCounter[currCounter].L_sat++;
-#endif
-    return (L_var_out);
+}
-/* ------------------------- End of L_sat() ------------------------- */
-/*___________________________________________________________________________
- |                                                                           |
  |   Function Name : norm_s                                                  |
  |                                                                           |
 …
    Temp = Lv & (Word32) 0x0000ffff ;
    Temp = Temp * (Word32) v ;
    Temp = L_shr( Temp, (Word16) 15 ) ;
+   Temp = L_shr_nocheck( Temp, (Word16) 15 ) ;
    Temp = L_mac( Temp, v, extract_h(Lv) ) ;
 …
+    }
     else {
         L_num = L_shr(L_num, (Word16)1) ;
         L_den = L_shr(L_den, (Word16)1);
+        L_num = L_shr_nocheck(L_num, (Word16)1) ;
+        L_den = L_shr_nocheck(L_den, (Word16)1);
 #if (WMOPS)
         multiCounter[currCounter].L_shr-=2;
 #endif
         for(iteration=(Word16)0; iteration< (Word16)15;iteration++) {
             var_out = shl( var_out, (Word16)1);
             L_num   = L_shl( L_num, (Word16)1);
+            var_out = shl_nocheck( var_out, (Word16)1);
+            L_num   = L_shl_nocheck( L_num, (Word16)1);
 #if (WMOPS)
             multiCounter[currCounter].shl--;
 …
             if (L_var1 > (UWord32) 0X7fffffffL)
+            {
                 Overflow = 1;
+                SET_OVERFLOW(1);
                 L_var_out = UMAX_32;
                 break;
 …
                 if (L_var1 < (UWord32) 0x00000001L)
+                {
                     Overflow = 1;
+                    SET_OVERFLOW(1);
                     L_var_out = MIN_32;
                     break;

pjproject/trunk/third_party/g7221/common/stl-files/basop32.h

-                      r2601
+                      r2616
 #define MAX_16 (Word16)0x7fff
 #define MIN_16 (Word16)0x8000
+#define MIN_16 ((Word16)0x8000)
 #define UMAX_32 (Word32)0xffffffffL
 …
 */
 Word16 add (Word16 var1, Word16 var2);    /* Short add,           1   */
 Word16 sub (Word16 var1, Word16 var2);    /* Short sub,           1   */
 Word16 abs_s (Word16 var1);               /* Short abs,           1   */
+PJ_INLINE(Word16) add (Word16 var1, Word16 var2);    /* Short add,           1   */
+PJ_INLINE(Word16) sub (Word16 var1, Word16 var2);    /* Short sub,           1   */
+PJ_INLINE(Word16) abs_s (Word16 var1);               /* Short abs,           1   */
 Word16 shl (Word16 var1, Word16 var2);    /* Short shift left,    1   */
+PJ_INLINE(Word16) shl_nocheck(Word16 var1, Word16 var2);
 Word16 shr (Word16 var1, Word16 var2);    /* Short shift right,   1   */
+PJ_INLINE(Word16) shr_nocheck(Word16 var1, Word16 var2);
 Word16 mult (Word16 var1, Word16 var2);   /* Short mult,          1   */
 Word32 L_mult (Word16 var1, Word16 var2); /* Long mult,           1   */
 Word16 negate (Word16 var1);              /* Short negate,        1   */
 Word16 extract_h (Word32 L_var1);         /* Extract high,        1   */
 Word16 extract_l (Word32 L_var1);         /* Extract low,         1   */
 Word16 itu_round (Word32 L_var1);         /* Round,               1   */
 Word32 L_mac (Word32 L_var3, Word16 var1, Word16 var2);   /* Mac,  1  */
+PJ_INLINE(Word32) L_mult (Word16 var1, Word16 var2); /* Long mult,           1   */
+PJ_INLINE(Word16) negate (Word16 var1);              /* Short negate,        1   */
+PJ_INLINE(Word16) extract_h (Word32 L_var1);         /* Extract high,        1   */
+PJ_INLINE(Word16) extract_l (Word32 L_var1);         /* Extract low,         1   */
+PJ_INLINE(Word16) itu_round (Word32 L_var1);         /* Round,               1   */
+PJ_INLINE(Word32) L_mac (Word32 L_var3, Word16 var1, Word16 var2);   /* Mac,  1  */
 Word32 L_msu (Word32 L_var3, Word16 var1, Word16 var2);   /* Msu,  1  */
 Word32 L_macNs (Word32 L_var3, Word16 var1, Word16 var2); /* Mac without
 …
 Word32 L_msuNs (Word32 L_var3, Word16 var1, Word16 var2); /* Msu without
                                                              sat, 1   */
 Word32 L_add (Word32 L_var1, Word32 L_var2);    /* Long add,        2 */
 Word32 L_sub (Word32 L_var1, Word32 L_var2);    /* Long sub,        2 */
+//PJ_INLINE(Word32) L_add (Word32 L_var1, Word32 L_var2);    /* Long add,        2 */
+PJ_INLINE(Word32) L_sub (Word32 L_var1, Word32 L_var2);    /* Long sub,        2 */
 Word32 L_add_c (Word32 L_var1, Word32 L_var2);  /* Long add with c, 2 */
 Word32 L_sub_c (Word32 L_var1, Word32 L_var2);  /* Long sub with c, 2 */
 Word32 L_negate (Word32 L_var1);                /* Long negate,     2 */
 Word16 mult_r (Word16 var1, Word16 var2);       /* Mult with round, 2 */
 Word32 L_shl (Word32 L_var1, Word16 var2);      /* Long shift left, 2 */
 Word32 L_shr (Word32 L_var1, Word16 var2);      /* Long shift right, 2*/
+PJ_INLINE(Word32) L_shl (Word32 L_var1, Word16 var2);      /* Long shift left, 2 */
+PJ_INLINE(Word32) L_shr (Word32 L_var1, Word16 var2);      /* Long shift right, 2*/
 Word16 shr_r (Word16 var1, Word16 var2);        /* Shift right with
                                                    round, 2           */
 …
 UWord32 LU_shl (UWord32 L_var1, Word16 var2);
 UWord32 LU_shr (UWord32 L_var1, Word16 var2);
+#define INCLUDE_UNSAFE      0
+/* Local */
+PJ_INLINE(Word16) saturate (Word32 L_var1);
+#if INCLUDE_UNSAFE
+    extern Flag g7221_Overflow;
+    extern Flag g7221_Carry;
+#   define SET_OVERFLOW(n)  g7221_Overflow = n
+#   define SET_CARRY(n)     g7221_Carry = n
+#else
+#   define SET_OVERFLOW(n)
+#   define SET_CARRY(n)
+#   define GET_OVERFLOW()   0
+#   define GET_CARRY()      0
+#endif
+#include "basop32_i.h"
 #endif /* BASOP_H_DEFINED */

pjproject/trunk/third_party/g7221/common/stl-files/count.h

-                      r2563
+                      r2616
  ===========================================================================
 */
+#if 0
 #ifndef COUNT_H
 #define COUNT_H "$Id $"
 …
  */
+Word32 fwc (void);
+PJ_INLINE(Word32) fwc (void)
+{
+#if WMOPS
+    Word32 tot;
+    tot = DeltaWeightedOperation ();
+    if (tot > wc[currCounter][funcid[currCounter]])
+        wc[currCounter][funcid[currCounter]] = tot;
+    funcid[currCounter]++;
+    return (tot);
+#else
+    return 0; /* Dummy */
+#endif
+}
 /*
  * worst worst case counter.
 …
  * The WMOPS_output function add together all parts and presents the sum.
  */
+void move16 (void);
+void move32 (void);
+void logic16 (void);
+void logic32 (void);
+void test (void);
+PJ_INLINE(void) move16 (void)
+{
+#if WMOPS
+    multiCounter[currCounter].DataMove16++;
+#endif
+}
+PJ_INLINE(void) move32 (void)
+{
+#if WMOPS
+    multiCounter[currCounter].DataMove32++;
+#endif
+}
+PJ_INLINE(void )logic16 (void)
+{
+#if WMOPS
+    multiCounter[currCounter].Logic16++;
+#endif
+}
+PJ_INLINE(void) logic32 (void)
+{
+#if WMOPS
+    multiCounter[currCounter].Logic32++;
+#endif
+}
+PJ_INLINE(void) test (void)
+{
+#if WMOPS
+    multiCounter[currCounter].Test++;
+#endif
+}
 /*
  * The functions above increases the corresponding operation counter for
 …
 #endif /* COUNT_H */
+#else
+#define move16()
+#define move32()
+#define logic16()
+#define logic32()
+#define test()
+#endif

pjproject/trunk/third_party/g7221/decode/coef2sam.c

-                      r2601
+                      r2616
     half_dct_size = shr(dct_length,1);
+    half_dct_size = shr_nocheck(dct_length,1);
     /* Perform a Type IV (inverse) DCT on the coefficients */
 …
         for(index=0;index<dct_length;index++)
+        {
             new_samples[index] = shr(new_samples[index],mag_shift);
+            new_samples[index] = shr_nocheck(new_samples[index],mag_shift);
             move16();
+        }
 …
             for(index=0;index<dct_length;index++)
+            {
                 new_samples[index] = shl(new_samples[index],mag_shift);
+                new_samples[index] = shl_nocheck(new_samples[index],mag_shift);
                 move16();
+            }
 …
         sum = L_mac(sum,*win_new++, *--new_ptr);
         sum = L_mac(sum,*--win_old, *old_ptr++);
         *out_ptr++ = itu_round(L_shl(sum,2));
+        *out_ptr++ = itu_round(L_shl_nocheck(sum,2));
         move16();
 …
         sum = L_mac(sum,*win_new++, *new_ptr++);
         sum = L_mac(sum,negate(*--win_old), *--old_ptr);
         *out_ptr++ = itu_round(L_shl(sum,2));
+        *out_ptr++ = itu_round(L_shl_nocheck(sum,2));
         move16();
+    }

pjproject/trunk/third_party/g7221/decode/dct4_s.c

-                      r2601
+                      r2616
         /*    set_span      = 1 << (DCT_LENGTH_LOG - set_count_log); */
         set_span = shr(dct_length,set_count_log);
+        set_span = shr_nocheck(dct_length,set_count_log);
         set_count     = shl(1,set_count_log);
+        set_count     = shl_nocheck(1,set_count_log);
         in_ptr        = in_buffer;
         move16();
 …
                     dummy = add(in_val_low,dither_ptr[i++]);
+                    acca = L_add(dummy,in_val_high);
+                    out_val_low = extract_l(L_shr(acca,1));
+                    // blp: addition of two 16bits vars, there's no way
+                    //      they'll overflow a 32bit var
+                    //acca = L_add(dummy,in_val_high);
+                    acca = dummy + in_val_high;
+                    out_val_low = extract_l(L_shr_nocheck(acca,1));
                     dummy = add(in_val_low,dither_ptr[i++]);
+                    acca = L_add(dummy,-in_val_high);
+                    out_val_high = extract_l(L_shr(acca,1));
+                    // blp: addition of two 16bits vars, there's no way
+                    //      they'll overflow a 32bit var
+                    //acca = L_add(dummy,-in_val_high);
+                    acca = dummy - in_val_high;
+                    out_val_high = extract_l(L_shr_nocheck(acca,1));
                     *out_ptr_low++  = out_val_low;
 …
         for ( k=0; k<CORE_SIZE; k++ )
+        {
+#if PJ_HAS_INT64
+            /* blp: danger danger! not really compatible but faster */
+            pj_int64_t sum64=0;
+            move32();
+            for ( i=0; i<CORE_SIZE; i++ )
+            {
+                sum64 += L_mult(pair_ptr[i], dct_core_s[i][k]);
+            }
+            sum = L_saturate(sum64);
+#else
             sum=0L;
             move32();
 …
                 sum = L_mac(sum, pair_ptr[i],dct_core_s[i][k]);
+            }
+#endif
             buffer_swap[k] = itu_round(sum);
+        }
 …
         /*    set_span      = 1 << (DCT_LENGTH_LOG - set_count_log); */
         set_span = shr(dct_length,set_count_log);
         set_count     = shl(1,set_count_log);
+        set_span = shr_nocheck(dct_length,set_count_log);
+        set_count     = shl_nocheck(1,set_count_log);
         next_in_base  = in_buffer;
         move16();
 …
             move16();
             temp = shr(set_span,1);
+            temp = shr_nocheck(set_span,1);
             in_ptr_high    = in_ptr_low + temp;
             move16();
 …
                 sum = L_mac(sum,cos_even,in_low_even);
                 sum = L_mac(sum,negate(msin_even),in_high_even);
                 out_low_even = itu_round(L_shl(sum,1));
+                out_low_even = itu_round(L_shl_nocheck(sum,1));
                 sum = 0L;
 …
                 sum = L_mac(sum,msin_even,in_low_even);
                 sum = L_mac(sum,cos_even,in_high_even);
                 out_high_even = itu_round(L_shl(sum,1));
+                out_high_even = itu_round(L_shl_nocheck(sum,1));
                 sum = 0L;
 …
                 sum = L_mac(sum,cos_odd,in_low_odd);
                 sum = L_mac(sum,msin_odd,in_high_odd);
                 out_low_odd = itu_round(L_shl(sum,1));
+                out_low_odd = itu_round(L_shl_nocheck(sum,1));
                 sum = 0L;
 …
                 sum = L_mac(sum,msin_odd,in_low_odd);
                 sum = L_mac(sum,negate(cos_odd),in_high_odd);
                 out_high_odd = itu_round(L_shl(sum,1));
+                out_high_odd = itu_round(L_shl_nocheck(sum,1));
                 *out_ptr_low++  = out_low_even;
 …
         for(i=0;i<320;i++)
+        {
+           sum = L_add(output[i],syn_bias_7khz[i]);
+           // blp: addition of two 16bits vars, there's no way
+           //      they'll overflow a 32bit var
+           //sum = L_add(output[i],syn_bias_7khz[i]);
+           sum = output[i] + syn_bias_7khz[i];
            acca = L_sub(sum,32767);
            test();
 …
                move32();
+           }
+           acca = L_add(sum,32768L);
+           // blp: addition of two 16bits vars, there's no way
+           //      they'll overflow 32bit var
+           //acca = L_add(sum,32768L);
+           acca = sum + 32768;
            test();
            if (acca < 0)

pjproject/trunk/third_party/g7221/decode/decoder.c

-                      r2563
+                      r2616
+        {
                 get_next_bit(bitobj);
                 categorization_control = shl(categorization_control,1);
+                categorization_control = shl_nocheck(categorization_control,1);
                 categorization_control = add(categorization_control,bitobj->next_bit);
+        }
 …
+    {
         get_next_bit(bitobj);
         index = shl(index,1);
+        index = shl_nocheck(index,1);
         index = add(index,bitobj->next_bit);
+    }
 …
+    {
         i = sub(i,1);
         temp = shr(temp,1);
+        temp = shr_nocheck(temp,1);
         max_index = sub(max_index,2);
         temp1 = sub(temp,8);
 …
                     if (bitobj->next_bit == 0)
+                        {
                         temp = shl(index,1);
+                        temp = shl_nocheck(index,1);
                         index = (Word16)*(decoder_table_ptr + temp);
                         move16();
 …
                         else
+                        {
                         temp = shl(index,1);
+                        temp = shl_nocheck(index,1);
                         index = (Word16)*(decoder_table_ptr + temp + 1);
                         move16();
 …
+                        {
                                     get_next_bit(bitobj);
                                 signs_index = shl(signs_index,1);
+                                signs_index = shl_nocheck(signs_index,1);
                                     signs_index = add(signs_index,bitobj->next_bit);
                                     bitobj->number_of_bits_left = sub(bitobj->number_of_bits_left,1);
+                            }
                             temp = sub(num_sign_bits,1);
                         bit = shl(1,(temp));
+                        bit = shl_nocheck(1,(temp));
+                        }
 …
+                    {
                             acca = L_mult0(standard_deviation,mlt_quant_centroid[category][k[j]]);
                         acca = L_shr(acca,12);
+                        acca = L_shr_nocheck(acca,12);
                         decoder_mlt_value = extract_l(acca);
 …
                             if ((signs_index & bit) == 0)
                                         decoder_mlt_value = negate(decoder_mlt_value);
                                     bit = shr(bit,1);
+                                    bit = shr_nocheck(bit,1);
+                            }
                         *decoder_mlt_ptr++ = decoder_mlt_value;
 …
                         *decoder_mlt_ptr = temp1;
                     move16();
                         random_word = shr(random_word,1);
+                        random_word = shr_nocheck(random_word,1);
+                    }
                     /* pointer arithmetic */
 …
                         *decoder_mlt_ptr = temp1;
                     move16();
                         random_word  = shr(random_word,1);
+                        random_word  = shr_nocheck(random_word,1);
+                    }
                     /* pointer arithmetic */
 …
                 *decoder_mlt_ptr++ = temp1;
                 move16();
                 random_word = shr(random_word,1);
+                random_word = shr_nocheck(random_word,1);
+            }
             random_word = get_rand(randobj);
 …
                 *decoder_mlt_ptr++ = temp1;
                 move16();
                 random_word = shr(random_word,1);
+                random_word = shr_nocheck(random_word,1);
+            }
+        }
 …
+    }
     bitobj->code_bit_count = sub(bitobj->code_bit_count,1);
     temp = shr(bitobj->current_word,bitobj->code_bit_count);
+    temp = shr_nocheck(bitobj->current_word,bitobj->code_bit_count);
     logic16();
     bitobj->next_bit = (Word16 )(temp & 1);

pjproject/trunk/third_party/g7221/encode/dct4_a.c

-                      r2601
+                      r2616
         /*    set_span      = 1 << (DCT_LENGTH_LOG - set_count_log); */
         set_span = shr(dct_length,set_count_log);
         set_count     = shl(1,set_count_log);
+        set_span = shr_nocheck(dct_length,set_count_log);
+        set_count     = shl_nocheck(1,set_count_log);
         in_ptr        = in_buffer;
 …
                 in_val_low      = *in_ptr++;
                 in_val_high     = *in_ptr++;
+                acca            = L_add(in_val_low,in_val_high);
+                acca            = L_shr(acca,1);
+                // blp: addition of two 16bits vars, there's no way
+                //      they'll overflow a 32bit var
+                //acca            = L_add(in_val_low,in_val_high);
+                acca = (in_val_low + in_val_high);
+                acca            = L_shr_nocheck(acca,1);
                 out_val_low     = extract_l(acca);
                 acca            = L_sub(in_val_low,in_val_high);
                 acca            = L_shr(acca,1);
+                acca            = L_shr_nocheck(acca,1);
                 out_val_high    = extract_l(acca);
 …
     temp = sub(dct_length_log,1);
     temp = shl(1,temp);
+    temp = shl_nocheck(1,temp);
     for (pairs_left=temp; pairs_left > 0; pairs_left--)
 …
         for ( k=0; k<CORE_SIZE; k++ )
+        {
+#if PJ_HAS_INT64
+            /* blp: danger danger! not really compatible but faster */
+            pj_int64_t sum64=0;
+            move32();
+            for ( i=0; i<CORE_SIZE; i++ )
+            {
+                sum64 += L_mult(pair_ptr[i], dct_core_a[i][k]);
+            }
+            sum = L_saturate(sum64);
+#else
             sum=0L;
             move32();
 …
                 sum = L_mac(sum, pair_ptr[i],dct_core_a[i][k]);
+            }
+#endif
             buffer_swap[k] = itu_round(sum);
+        }
 …
         /*===========================================================*/
         /*    set_span      = 1 << (DCT_LENGTH_LOG - set_count_log); */
         set_span = shr(dct_length,set_count_log);
         set_count     = shl(1,set_count_log);
+        set_span = shr_nocheck(dct_length,set_count_log);
+        set_count     = shl_nocheck(1,set_count_log);
         next_in_base  = in_buffer;
         move16();
 …
             in_ptr_low     = next_in_base;
             move16();
             temp           = shr(set_span,1);
+            temp           = shr_nocheck(set_span,1);
             /* address arithmetic */

pjproject/trunk/third_party/g7221/encode/encoder.c

-                      r2563
+                      r2616
        to be exactly 3.010299957 or 20.0 times log base 10
        of square root of 2. */
     temp = shl(mag_shift,1);
+    temp = shl_nocheck(mag_shift,1);
     mag_shift_offset = add(temp,REGION_POWER_TABLE_NUM_NEGATIVES);
 …
         if (j >= 0)
+        {
             temp = extract_l(L_shr(current_word,j));
+            temp = extract_l(L_shr_nocheck(current_word,j));
             out_word = add(out_word,temp);
 …
+    {
         accb = L_deposit_l(out_word_index);
         accb = L_shl(accb,4);
+        accb = L_shl_nocheck(accb,4);
         accb = L_sub(accb,number_of_bits_per_frame);
         test();
         if(accb < 0)
+        {
             temp = shl(region,2);
+            temp = shl_nocheck(region,2);
             in_word_ptr = &region_mlt_bits[temp];
             region_bit_count = region_mlt_bit_counts[region];
 …
             acca = L_deposit_l(out_word_index);
             acca = L_shl(acca,4);
+            acca = L_shl_nocheck(acca,4);
             acca = L_sub(acca,number_of_bits_per_frame);
 …
+                }
                 acca = L_deposit_l(out_word_index);
                 acca = L_shl(acca,4);
+                acca = L_shl_nocheck(acca,4);
                 acca = L_sub(acca,number_of_bits_per_frame);
+            }
             accb = L_deposit_l(out_word_index);
             accb = L_shl(accb,4);
+            accb = L_shl_nocheck(accb,4);
             accb = L_sub(accb,number_of_bits_per_frame);
+        }
 …
         acca = L_deposit_l(out_word_index);
         acca = L_shl(acca,4);
+        acca = L_shl_nocheck(acca,4);
         acca = L_sub(acca,number_of_bits_per_frame);
+    }
 …
+    {
         n = sub(absolute_region_power_index[region],39);
         n = shr(n,1);
+        n = shr_nocheck(n,1);
         test();
 …
             for (i=0; i<REGION_SIZE; i++)
+            {
                 acca = L_shl(*raw_mlt_ptr,16);
+                acca = L_shl_nocheck(*raw_mlt_ptr,16);
                 acca = L_add(acca,32768L);
                 acca = L_shr(acca,n);
                 acca = L_shr(acca,16);
+                acca = L_shr_nocheck(acca,n);
+                acca = L_shr_nocheck(acca,16);
                 *raw_mlt_ptr++ = extract_l(acca);
+            }
             temp = shl(n,1);
+            temp = shl_nocheck(n,1);
             temp = sub(absolute_region_power_index[region],temp);
             absolute_region_power_index[region] = temp;
 …
+        {
             test();
             long_accumulator = L_shr(long_accumulator,1);
+            long_accumulator = L_shr_nocheck(long_accumulator,1);
             acca = (long_accumulator & 0x7fff0000L);
 …
             logic16();
             long_accumulator = L_shl(long_accumulator,1);
+            long_accumulator = L_shl_nocheck(long_accumulator,1);
             acca = L_sub(long_accumulator,32767);
             power_shift--;
             temp = add(power_shift,15);
+        }
         long_accumulator = L_shr(long_accumulator,1);
+        long_accumulator = L_shr_nocheck(long_accumulator,1);
         /* 28963 corresponds to square root of 2 times REGION_SIZE(20). */
         acca = L_sub(long_accumulator,28963);
 …
         acca = L_deposit_l(mag_shift);
         acca = L_shl(acca,1);
+        acca = L_shl_nocheck(acca,1);
         acca = L_sub(power_shift,acca);
         acca = L_add(35,acca);
 …
     /* Start in the middle of the categorization control range. */
     temp = shr(num_categorization_control_possibilities,1);
+    temp = shr_nocheck(num_categorization_control_possibilities,1);
     temp = sub(temp,1);
     for (*p_categorization_control = 0; *p_categorization_control < temp; (*p_categorization_control)++)
 …
             region_mlt_bit_counts[region] =
             vector_huffman(category, absolute_region_power_index[region],raw_mlt_ptr,
                            &region_mlt_bits[shl(region,2)]);
+                           &region_mlt_bits[shl_nocheck(region,2)]);
+        }
         else
 …
             region_mlt_bit_counts[region] =
                 vector_huffman(category, absolute_region_power_index[region],raw_mlt_ptr,
                            &region_mlt_bits[shl(region,2)]);
+                           &region_mlt_bits[shl_nocheck(region,2)]);
+        }
         else
 …
             region_mlt_bit_counts[region] =
                 vector_huffman(category, absolute_region_power_index[region],raw_mlt_ptr,
                            &region_mlt_bits[shl(region,2)]);
+                           &region_mlt_bits[shl_nocheck(region,2)]);
+        }
         else
 …
     /* compute inverse of step size * standard deviation */
     acca = L_mult(step_size_inverse_table[category],standard_deviation_inverse_table[power_index]);
     acca = L_shr(acca,1);
+    acca = L_shr_nocheck(acca,1);
     acca = L_add(acca,4096);
     acca = L_shr(acca,13);
+    acca = L_shr_nocheck(acca,13);
         /*
 …
         mytemp = acca & 0x3;
     acca = L_shr(acca,2);
+    acca = L_shr_nocheck(acca,2);
     inv_of_step_size_times_std_dev = extract_l(acca);
 …
             acca = L_mult(k,inv_of_step_size_times_std_dev);
             acca = L_shr(acca,1);
+            acca = L_shr_nocheck(acca,1);
                         /*
 …
                         myacca = (Word16)L_mult(k,mytemp);
                         myacca = (Word16)L_shr(myacca,1);
+                        myacca = (Word16)L_shr_nocheck(myacca,1);
                         myacca = (Word16)L_add(myacca,int_dead_zone_low_bits[category]);
                         myacca = (Word16)L_shr(myacca,2);
+                        myacca = (Word16)L_shr_nocheck(myacca,2);
             acca = L_add(acca,int_dead_zone[category]);
 …
                         acca = L_add(acca,myacca);
                         acca = L_shr(acca,13);
+                        acca = L_shr_nocheck(acca,13);
             k = extract_l(acca);
 …
+            {
                 number_of_non_zero = add(number_of_non_zero,1);
                 signs_index = shl(signs_index,1);
+                signs_index = shl_nocheck(signs_index,1);
                 test();
 …
+                }
+            }
             acca = L_shr(L_mult(index,(kmax_plus_one)),1);
+            acca = L_shr_nocheck(L_mult(index,(kmax_plus_one)),1);
             index = extract_l(acca);
             index = add(index,k);
 …
+        {
             j = negate(j);
             acca = L_shr(code_bits,j);
+            acca = L_shr_nocheck(code_bits,j);
             current_word = L_add(current_word,acca);

pjproject/trunk/third_party/g7221/encode/sam2coef.c

r2601	r2616
85	85	Word16 temp5;
86	86
87		half_dct_size = shr(dct_length,1);
	87	half_dct_size = shr_nocheck(dct_length,1);
88	88
89	89	/++++++++++++++++++++++++++++++++++++++++++++/
…	…
209	209	}
210	210	accb = L_mult(temp,9587);
211		acca = L_shr(accb,20);
	211	acca = L_shr_nocheck(accb,20);
212	212	temp5 = extract_l(acca);
213	213	temp = norm_s(temp5);
…	…
231	231	}
232	232
233		acca = L_shr(acca,7);
	233	acca = L_shr_nocheck(acca,7);
234	234
235	235	test();
…	…
244	244	for(index=0;index<dct_length;index++)
245	245	{
246		windowed_data[index] = shl(windowed_data[index],mag_shift);
	246	windowed_data[index] = shl_nocheck(windowed_data[index],mag_shift);
247	247	}
248	248	}
…	…
255	255	for(index=0;index<dct_length;index++)
256	256	{
257		windowed_data[index] = shr(windowed_data[index],n);
	257	windowed_data[index] = shr_nocheck(windowed_data[index],n);
258	258	move16();
259	259	}

Note: See TracChangeset for help on using the changeset viewer.

Context Navigation

Changeset 2616 for pjproject/trunk

Legend:

Download in other formats: