Changeset 628

pjproject/trunk/pjmedia/build/Makefile

-                      r584
+                      r628
                 speex/quant_lsp.o speex/sb_celp.o speex/smallft.o \
                 speex/speex.o speex/speex_callbacks.o speex/speex_header.o \
                 speex/stereo.o speex/vbr.o speex/vq.o
 SPEEX_CFLAGS := -DHAVE_CONFIG=1 -I../src/pjmedia-codec
+                speex/stereo.o speex/vbr.o speex/vq.o speex/window.o
+SPEEX_CFLAGS := -DHAVE_CONFIG_H=1 -I../src/pjmedia-codec
 export PJMEDIA_CODEC_SRCDIR = ../src/pjmedia-codec

pjproject/trunk/pjmedia/src/pjmedia-codec/speex/_kiss_fft_guts.h

-                      r516
+                      r628
    typedef struct { kiss_fft_scalar r; kiss_fft_scalar i; }kiss_fft_cpx; */
 #include "kiss_fft.h"
-#include <limits.h>
 #define MAXFACTORS 32
 …
  * */
 #ifdef FIXED_POINT
+#include "misc.h"
 # define FRACBITS 15
 # define SAMPPROD int32_t
+# define SAMPPROD spx_int32_t
 #define SAMP_MAX 32767

pjproject/trunk/pjmedia/src/pjmedia-codec/speex/arch.h

-                      r278
+                      r628
 #define MAX16(a,b) ((a) > (b) ? (a) : (b))   /**< Maximum 16-bit value.   */
 #define ABS32(x) ((x) < 0 ? (-(x)) : (x))    /**< Absolute 32-bit value.  */
+#define MAX32(a,b) ((a) > (b) ? (a) : (b))   /**< Maximum 32-bit value.   */
 #ifdef FIXED_POINT
 …
 typedef spx_int16_t spx_word16_t;
 typedef spx_int32_t   spx_word32_t;
-#ifdef _MSC_VER
-typedef __int64      spx_word64_t;
-#elif defined NO_LONGLONG
-typedef double    spx_word64_t;
-#else
-typedef long long    spx_word64_t;
-#endif
 typedef spx_word32_t spx_mem_t;
 typedef spx_word16_t spx_coef_t;
 …
 typedef float spx_word16_t;
 typedef float spx_word32_t;
-typedef float spx_word64_t;
 #define Q15ONE 1.0f
 …
 #define ADD32(a,b) ((a)+(b))
 #define SUB32(a,b) ((a)-(b))
-#define ADD64(a,b) ((a)+(b))
 #define MULT16_16_16(a,b)     ((a)*(b))
 #define MULT16_16(a,b)     ((spx_word32_t)(a)*(spx_word32_t)(b))
 …
 #define MAC16_16_Q11(c,a,b)     ((c)+(a)*(b))
 #define MAC16_16_Q13(c,a,b)     ((c)+(a)*(b))
+#define MAC16_16_P13(c,a,b)     ((c)+(a)*(b))
 #define MULT16_16_Q11_32(a,b)     ((a)*(b))
 #define MULT16_16_Q13(a,b)     ((a)*(b))
 …
 #define MULT16_16_Q15(a,b)     ((a)*(b))
 #define MULT16_16_P15(a,b)     ((a)*(b))
+#define MULT16_16_P13(a,b)     ((a)*(b))
+#define MULT16_16_P14(a,b)     ((a)*(b))
+#define DIV32_16(a,b)     ((a)/(b))
+#define DIV32(a,b)     ((a)/(b))
+#define DIV32_16(a,b)     (((spx_word32_t)(a))/(spx_word16_t)(b))
+#define PDIV32_16(a,b)     (((spx_word32_t)(a))/(spx_word16_t)(b))
+#define DIV32(a,b)     (((spx_word32_t)(a))/(spx_word32_t)(b))
+#define PDIV32(a,b)     (((spx_word32_t)(a))/(spx_word32_t)(b))
 …
 #ifdef CONFIG_TI_C55X
+#if defined (CONFIG_TI_C54X) || defined (CONFIG_TI_C55X)
 /* 2 on TI C5x DSP */

pjproject/trunk/pjmedia/src/pjmedia-codec/speex/bits.c

-                      r278
+                      r628
+{
    int i;
+   if (len > bits->buf_size)
+   int nchars = len / BYTES_PER_CHAR;
+   if (nchars > bits->buf_size)
+   {
       speex_warning_int("Packet is larger than allocated buffer: ", len);
       if (bits->owner)
+      {
          char *tmp = (char*)speex_realloc(bits->chars, len);
+         char *tmp = (char*)speex_realloc(bits->chars, nchars);
          if (tmp)
+         {
             bits->buf_size=len;
+            bits->buf_size=nchars;
             bits->chars=tmp;
          } else {
             len=bits->buf_size;
+            nchars=bits->buf_size;
             speex_warning("Could not resize input buffer: truncating input");
+         }
       } else {
          speex_warning("Do not own input buffer: truncating input");
+         len=bits->buf_size;
+      }
+   }
+   for (i=0;i<len;i++)
+      bits->chars[i]=chars[i];
+   bits->nbBits=len<<3;
+         nchars=bits->buf_size;
+      }
+   }
+#if (BYTES_PER_CHAR==2)
+/* Swap bytes to proper endian order (could be done externally) */
+#define HTOLS(A) ((((A) >> 8)&0xff)|(((A) & 0xff)<<8))
+#else
+#define HTOLS(A) (A)
+#endif
+   for (i=0;i<nchars;i++)
+      bits->chars[i]=HTOLS(chars[i]);
+   bits->nbBits=nchars<<LOG2_BITS_PER_CHAR;
    bits->charPtr=0;
    bits->bitPtr=0;
 …
    pos=bits->nbBits>>LOG2_BITS_PER_CHAR;
    for (i=0;i<nchars;i++)
       bits->chars[pos+i]=chars[i];
+      bits->chars[pos+i]=HTOLS(chars[i]);
    bits->nbBits+=nchars<<LOG2_BITS_PER_CHAR;
+}
 …
    if (max_nchars > ((bits->nbBits+BITS_PER_CHAR-1)>>LOG2_BITS_PER_CHAR))
       max_nchars = ((bits->nbBits+BITS_PER_CHAR-1)>>LOG2_BITS_PER_CHAR);
+#if BYTES_PER_CHAR==1
+#define HTOLS(A) (A)
+#else
+#define HTOLS(A) ((((A) >> 8)&0xff)|(((A) & 0xff)<<8))
+#endif
    for (i=0;i<max_nchars;i++)
       chars[i]=HTOLS(bits->chars[i]);
 …
       max_nchars = ((bits->nbBits)>>LOG2_BITS_PER_CHAR);
    for (i=0;i<max_nchars;i++)
       chars[i]=bits->chars[i];
+      chars[i]=HTOLS(bits->chars[i]);
    if (bits->bitPtr>0)
       bits->chars[0]=bits->chars[max_nchars];

pjproject/trunk/pjmedia/src/pjmedia-codec/speex/cb_search.c

-                      r278
+                      r628
             resj = MAC16_16(resj,shape[k],r[j-k]);
 #ifdef FIXED_POINT
          res16 = EXTRACT16(SHR32(resj, 11));
+         res16 = EXTRACT16(SHR32(resj, 13));
 #else
          res16 = 0.03125f*resj;
 …
+{
    int n;
+   int q=0;
+   for (n=0;n<len;n++,q++)
+      t[n] = SUB32(t[n],MULT16_16_Q11_32(g,r[q]));
+   for (n=0;n<len;n++)
+      t[n] = SUB16(t[n],PSHR32(MULT16_16(g,r[n]),13));
+}
 #endif
 …
 static void split_cb_search_shape_sign_N1(
 spx_sig_t target[],                     /* target vector */
+spx_word16_t target[],                  /* target vector */
 spx_coef_t ak[],                        /* LPCs for this subframe */
 spx_coef_t awk1[],                      /* Weighted LPCs for this subframe */
 …
+{
    int i,j,m,q;
-#ifndef FIXED_POINT
-   int n;
-#endif
    VARDECL(spx_word16_t *resp);
 #ifdef _USE_SSE
 …
    /* FIXME: make that adaptive? */
    for (i=0;i<nsf;i++)
       t[i]=EXTRACT16(PSHR32(target[i],6));
+      t[i]=target[i];
    compute_weighted_codebook(shape_cb, r, resp, resp2, E, shape_cb_size, subvect_size, stack);
 …
 #ifdef FIXED_POINT
          g=sign*shape_cb[rind*subvect_size+m];
+#else
+         g=sign*0.03125*shape_cb[rind*subvect_size+m];
+#endif
          target_update(t+subvect_size*(i+1), g, r+q, nsf-subvect_size*(i+1));
-#else
-         g=sign*0.03125*shape_cb[rind*subvect_size+m];
-         /*FIXME: I think that one too can be replaced by target_update */
-         for (n=subvect_size*(i+1);n<nsf;n++,q++)
-            t[n] = SUB32(t[n],g*r[q]);
-#endif
+      }
+   }
 …
       syn_percep_zero(e, ak, awk1, awk2, r2, nsf,p, stack);
       for (j=0;j<nsf;j++)
          target[j]=SUB32(target[j],r2[j]);
+         target[j]=SUB16(target[j],EXTRACT16(PSHR32(r2[j],8)));
+   }
+}
 …
 void split_cb_search_shape_sign(
 spx_sig_t target[],                     /* target vector */
+spx_word16_t target[],                  /* target vector */
 spx_coef_t ak[],                        /* LPCs for this subframe */
 spx_coef_t awk1[],                      /* Weighted LPCs for this subframe */
 …
    /* FIXME: make that adaptive? */
    for (i=0;i<nsf;i++)
       t[i]=EXTRACT16(PSHR32(target[i],6));
+      t[i]=target[i];
    for (j=0;j<N;j++)
 …
 #ifdef FIXED_POINT
             g=sign*shape_cb[rind*subvect_size+m];
+#else
+            g=sign*0.03125*shape_cb[rind*subvect_size+m];
+#endif
             target_update(nt[j]+subvect_size*(i+1), g, r+q, nsf-subvect_size*(i+1));
-#else
-            g=sign*0.03125*shape_cb[rind*subvect_size+m];
-            /*FIXME: I think that one too can be replaced by target_update */
-            for (n=subvect_size*(i+1);n<nsf;n++,q++)
-               nt[j][n] = SUB32(nt[j][n],g*r[q]);
-#endif
+         }
 …
       syn_percep_zero(e, ak, awk1, awk2, r2, nsf,p, stack);
       for (j=0;j<nsf;j++)
          target[j]=SUB32(target[j],r2[j]);
+         target[j]=SUB16(target[j],EXTRACT16(PSHR32(r2[j],8)));
+   }
+}
 …
 void noise_codebook_quant(
 spx_sig_t target[],                     /* target vector */
+spx_word16_t target[],                  /* target vector */
 spx_coef_t ak[],                        /* LPCs for this subframe */
 spx_coef_t awk1[],                      /* Weighted LPCs for this subframe */
 …
    VARDECL(spx_sig_t *tmp);
    ALLOC(tmp, nsf, spx_sig_t);
+   residue_percep_zero(target, ak, awk1, awk2, tmp, nsf, p, stack);
+   for (i=0;i<nsf;i++)
+      tmp[i]=PSHR32(EXTEND32(target[i]),SIG_SHIFT);
+   residue_percep_zero(tmp, ak, awk1, awk2, tmp, nsf, p, stack);
    for (i=0;i<nsf;i++)
 …
    for (i=0;i<nsf;i++)
       target[i]=0;
+}
 …
+)
+{
+   speex_rand_vec(1, exc, nsf);
+   int i;
+   /* FIXME: This is bad, but I don't think the function ever gets called anyway */
+   spx_int32_t seed = 0;
+   for (i=0;i<nsf;i++)
+      exc[i]=SHL32(EXTEND32(speex_rand(1, &seed)),SIG_SHIFT);
+}

pjproject/trunk/pjmedia/src/pjmedia-codec/speex/cb_search.h

-                      r278
+                      r628
 void split_cb_search_shape_sign(
 spx_sig_t target[],             /* target vector */
+spx_word16_t target[],             /* target vector */
 spx_coef_t ak[],                /* LPCs for this subframe */
 spx_coef_t awk1[],              /* Weighted LPCs for this subframe */
 …
 void noise_codebook_quant(
 spx_sig_t target[],             /* target vector */
+spx_word16_t target[],             /* target vector */
 spx_coef_t ak[],                /* LPCs for this subframe */
 spx_coef_t awk1[],              /* Weighted LPCs for this subframe */

pjproject/trunk/pjmedia/src/pjmedia-codec/speex/cb_search_bfin.h

-                      r278
+                      r628
             "LOOP_END inner%=;\n\t"
             "R0 = A0;\n\t"
             "R0 >>>= 11;\n\t"
+            "R0 >>>= 13;\n\t"
             "A1 += R0.L*R0.L (IS);\n\t"
             "W[P3++] = R0;\n\t"
 …
+         :
       : "m" (subvect_size), "m" (shape_cb), "m" (r), "m" (resp), "m" (E)
+      : "A0", "P0", "P1", "P2", "P3", "P4", "R0", "R1", "R2", "I0", "I1", "L0", "L1", "A0", "A1", "memory"
+      : "A0", "P0", "P1", "P2", "P3", "P4", "R0", "R1", "R2", "I0", "I1", "L0",
+        "L1", "A0", "A1", "memory", "LC0", "LC1"
       );
       shape_cb += subvect_size;
 …
 static inline void target_update(spx_word16_t *t, spx_word16_t g, spx_word16_t *r, int len)
+{
+   if (!len)
+      return;
    __asm__ __volatile__
+         (
 …
          "L0 = 0;\n\t"
          "L1 = 0;\n\t"
+         "R2 = 4096;\n\t"
          "LOOP tupdate%= LC0 = %3;\n\t"
          "LOOP_BEGIN tupdate%=;\n\t"
             "R0.L = W[I0] || R1.L = W[I1++];\n\t"
             "R1 = (A1 = R1.L*%2.L) (IS);\n\t"
+            "R1 >>>= 11;\n\t"
+            "R1 = R1 + R2;\n\t"
+            "R1 >>>= 13;\n\t"
             "R0.L = R0.L - R1.L;\n\t"
             "W[I0++] = R0.L;\n\t"
 …
+   :
    : "a" (t), "a" (r), "d" (g), "a" (len)
    : "R0", "R1", "A1", "I0", "I1", "L0", "L1"
+   : "R0", "R1", "R2", "A1", "I0", "I1", "L0", "L1"
          );
+}

pjproject/trunk/pjmedia/src/pjmedia-codec/speex/config.h

-                      r279
+                      r628
+#include <pj/config.h>
+/* Check if we need to use the fixed point version */
+#if !defined(PJ_HAS_FLOATING_POINT) || PJ_HAS_FLOATING_POINT==0
+#   define FIXED_POINT
+#endif
 #define inline __inline
 …
 #include "misc.h"
-#if !defined(PJ_HAS_FLOATING_POINT) || PJ_HAS_FLOATING_POINT==0
-#   define FIXED_POINT
-#endif
 #ifdef _MSC_VER

pjproject/trunk/pjmedia/src/pjmedia-codec/speex/fftwrap.c

-                      r516
+                      r628
 #include "misc.h"
+#define MAX_FFT_SIZE 2048
 #ifdef FIXED_POINT
 …
-int fixed_point = 1;
 #ifdef FIXED_POINT
+#include "smallft.h"
+/*#include "smallft.h"*/
 …
 #else
 #endif
+#ifdef VAR_ARRAYS
    spx_word16_t _in[N];
    spx_word16_t _out[N];
+#else
+   spx_word16_t _in[MAX_FFT_SIZE];
+   spx_word16_t _out[MAX_FFT_SIZE];
+#endif
    for (i=0;i<N;i++)
       _in[i] = (int)floor(.5+in[i]);
 …
    for (i=0;i<N;i++)
       out[i] = _out[i];
+#if 0
    if (!fixed_point)
+   {
 …
       spx_drft_clear(&t);
+   }
+#endif
+}
 …
 #else
 #endif
+#ifdef VAR_ARRAYS
    spx_word16_t _in[N];
    spx_word16_t _out[N];
+#else
+   spx_word16_t _in[MAX_FFT_SIZE];
+   spx_word16_t _out[MAX_FFT_SIZE];
+#endif
    for (i=0;i<N;i++)
       _in[i] = (int)floor(.5+in[i]);
 …
    for (i=0;i<N;i++)
       out[i] = _out[i];
+#if 0
    if (!fixed_point)
+   {
 …
       spx_drft_clear(&t);
+   }
+#endif
+}

pjproject/trunk/pjmedia/src/pjmedia-codec/speex/filters.c

-                      r278
+                      r628
+}
 void signal_div(const spx_sig_t *x, spx_sig_t *y, spx_word32_t scale, int len)
+void signal_div(const spx_word16_t *x, spx_word16_t *y, spx_word32_t scale, int len)
+{
    int i;
 …
       spx_word16_t scale_1;
       scale = PSHR32(scale, SIG_SHIFT);
       scale_1 = EXTRACT16(DIV32_16(SHL32(EXTEND32(SIG_SCALING),7),scale));
+      scale_1 = EXTRACT16(PDIV32_16(SHL32(EXTEND32(SIG_SCALING),7),scale));
       for (i=0;i<len;i++)
+      {
          y[i] = SHR32(MULT16_16(scale_1, EXTRACT16(SHR32(x[i],SIG_SHIFT))),7);
+      }
    } else {
+         y[i] = MULT16_16_P15(scale_1, x[i]);
+      }
+   } else if (scale > SHR32(EXTEND32(SIG_SCALING), 2)) {
       spx_word16_t scale_1;
       scale = PSHR32(scale, SIG_SHIFT-5);
 …
       for (i=0;i<len;i++)
+      {
+         y[i] = MULT16_16(scale_1, EXTRACT16(SHR32(x[i],SIG_SHIFT-2)));
+         y[i] = PSHR32(MULT16_16(scale_1, SHL16(x[i],2)),8);
+      }
+   } else {
+      spx_word16_t scale_1;
+      scale = PSHR32(scale, SIG_SHIFT-7);
+      if (scale < 5)
+         scale = 5;
+      scale_1 = DIV32_16(SHL32(EXTEND32(SIG_SCALING),3),scale);
+      for (i=0;i<len;i++)
+      {
+         y[i] = PSHR32(MULT16_16(scale_1, SHL16(x[i],2)),6);
+      }
+   }
 …
+   }
+   return EXTRACT16(SHR32(SHL32(EXTEND32(spx_sqrt(1+DIV32(sum,len))),(sig_shift+3)),SIG_SHIFT));
+}
+   return EXTRACT16(PSHR32(SHL32(EXTEND32(spx_sqrt(DIV32(sum,len))),(sig_shift+3)),SIG_SHIFT));
+}
+spx_word16_t compute_rms16(const spx_word16_t *x, int len)
+{
+   int i;
+   spx_word16_t max_val=10;
+   for (i=0;i<len;i++)
+   {
+      spx_sig_t tmp = x[i];
+      if (tmp<0)
+         tmp = -tmp;
+      if (tmp > max_val)
+         max_val = tmp;
+   }
+   if (max_val>16383)
+   {
+      spx_word32_t sum=0;
+      for (i=0;i<len;i+=4)
+      {
+         spx_word32_t sum2=0;
+         sum2 = MAC16_16(sum2,PSHR16(x[i],1),PSHR16(x[i],1));
+         sum2 = MAC16_16(sum2,PSHR16(x[i+1],1),PSHR16(x[i+1],1));
+         sum2 = MAC16_16(sum2,PSHR16(x[i+2],1),PSHR16(x[i+2],1));
+         sum2 = MAC16_16(sum2,PSHR16(x[i+3],1),PSHR16(x[i+3],1));
+         sum = ADD32(sum,SHR32(sum2,6));
+      }
+      return SHL16(spx_sqrt(DIV32(sum,len)),4);
+   } else {
+      spx_word32_t sum=0;
+      int sig_shift=0;
+      if (max_val < 8192)
+         sig_shift=1;
+      if (max_val < 4096)
+         sig_shift=2;
+      if (max_val < 2048)
+         sig_shift=3;
+      for (i=0;i<len;i+=4)
+      {
+         spx_word32_t sum2=0;
+         sum2 = MAC16_16(sum2,SHL16(x[i],sig_shift),SHL16(x[i],sig_shift));
+         sum2 = MAC16_16(sum2,SHL16(x[i+1],sig_shift),SHL16(x[i+1],sig_shift));
+         sum2 = MAC16_16(sum2,SHL16(x[i+2],sig_shift),SHL16(x[i+2],sig_shift));
+         sum2 = MAC16_16(sum2,SHL16(x[i+3],sig_shift),SHL16(x[i+3],sig_shift));
+         sum = ADD32(sum,SHR32(sum2,6));
+      }
+      return SHL16(spx_sqrt(DIV32(sum,len)),3-sig_shift);
+   }
+}
 #ifndef OVERRIDE_NORMALIZE16
 …
    return sqrt(.1+sum/len);
+}
+spx_word16_t compute_rms16(const spx_word16_t *x, int len)
+{
+   return compute_rms(x, len);
+}
 #endif
 …
    spx_sig_t xi,yi,nyi;
+   for (i=0;i<ord;i++)
+      mem[i] = SHR32(mem[i],1);
    for (i=0;i<N;i++)
+   {
 …
       y[i] = yi;
+   }
+}
+#endif
+#endif
+   for (i=0;i<ord;i++)
+      mem[i] = SHL32(mem[i],1);
+}
+#endif
+#endif
+#ifdef FIXED_POINT
+#ifndef OVERRIDE_FILTER_MEM16
+void filter_mem16(const spx_word16_t *x, const spx_coef_t *num, const spx_coef_t *den, spx_word16_t *y, int N, int ord, spx_mem_t *mem, char *stack)
+{
+   int i,j;
+   spx_word16_t xi,yi,nyi;
+   for (i=0;i<N;i++)
+   {
+      xi= x[i];
+      yi = EXTRACT16(SATURATE(ADD32(EXTEND32(x[i]),PSHR32(mem[0],LPC_SHIFT)),32767));
+      nyi = NEG16(yi);
+      for (j=0;j<ord-1;j++)
+      {
+         mem[j] = MAC16_16(MAC16_16(mem[j+1], num[j],xi), den[j],nyi);
+      }
+      mem[ord-1] = ADD32(MULT16_16(num[ord-1],xi), MULT16_16(den[ord-1],nyi));
+      y[i] = yi;
+   }
+}
+#endif
+#else
+void filter_mem16(const spx_word16_t *x, const spx_coef_t *num, const spx_coef_t *den, spx_word16_t *y, int N, int ord, spx_mem_t *mem, char *stack)
+{
+   filter_mem2(x, num, den, y, N, ord, mem);
+}
+#endif
 #ifndef OVERRIDE_IIR_MEM2
 …
    spx_word32_t xi,yi,nyi;
+   for (i=0;i<ord;i++)
+      mem[i] = SHR32(mem[i],1);
    for (i=0;i<N;i++)
+   {
 …
       y[i] = yi;
+   }
+}
+#endif
+#endif
+   for (i=0;i<ord;i++)
+      mem[i] = SHL32(mem[i],1);
+}
+#endif
+#endif
+#ifdef FIXED_POINT
+#ifndef OVERRIDE_IIR_MEM16
+void iir_mem16(const spx_word16_t *x, const spx_coef_t *den, spx_word16_t *y, int N, int ord, spx_mem_t *mem, char *stack)
+{
+   int i,j;
+   spx_word16_t yi,nyi;
+   for (i=0;i<N;i++)
+   {
+      yi = EXTRACT16(SATURATE(ADD32(EXTEND32(x[i]),PSHR32(mem[0],LPC_SHIFT)),32767));
+      nyi = NEG16(yi);
+      for (j=0;j<ord-1;j++)
+      {
+         mem[j] = MAC16_16(mem[j+1],den[j],nyi);
+      }
+      mem[ord-1] = MULT16_16(den[ord-1],nyi);
+      y[i] = yi;
+   }
+}
+#endif
+#else
+void iir_mem16(const spx_word16_t *x, const spx_coef_t *den, spx_word16_t *y, int N, int ord, spx_mem_t *mem, char *stack)
+{
+   iir_mem2(x, den, y, N, ord, mem);
+}
+#endif
 #ifndef OVERRIDE_FIR_MEM2
 …
    spx_word32_t xi,yi;
+   for (i=0;i<ord;i++)
+      mem[i] = SHR32(mem[i],1);
    for (i=0;i<N;i++)
+   {
 …
       y[i] = SATURATE(yi,805306368);
+   }
+}
+#endif
+#endif
+   for (i=0;i<ord;i++)
+      mem[i] = SHL32(mem[i],1);
+}
+#endif
+#endif
+#ifdef FIXED_POINT
+#ifndef OVERRIDE_FIR_MEM16
+void fir_mem16(const spx_word16_t *x, const spx_coef_t *num, spx_word16_t *y, int N, int ord, spx_mem_t *mem, char *stack)
+{
+   int i,j;
+   spx_word16_t xi,yi;
+   for (i=0;i<N;i++)
+   {
+      xi=x[i];
+      yi = EXTRACT16(SATURATE(ADD32(EXTEND32(x[i]),PSHR32(mem[0],LPC_SHIFT)),32767));
+      for (j=0;j<ord-1;j++)
+      {
+         mem[j] = MAC16_16(mem[j+1], num[j],xi);
+      }
+      mem[ord-1] = MULT16_16(num[ord-1],xi);
+      y[i] = yi;
+   }
+}
+#endif
+#else
+void fir_mem16(const spx_word16_t *x, const spx_coef_t *num, spx_word16_t *y, int N, int ord, spx_mem_t *mem, char *stack)
+{
+   fir_mem2(x, num, y, N, ord, mem);
+}
+#endif
 …
    for (;i<N;i++)
       y[i] = VERY_SMALL;
    for (i=0;i<ord;i++)
       mem1[i] = mem2[i] = 0;
 …
       y1 = ADD16(y[i], EXTRACT16(PSHR32(mem1[0],LPC_SHIFT)));
       ny1i = NEG16(y1);
       y[i] = ADD16(SHL16(y1,1), EXTRACT16(PSHR32(mem2[0],LPC_SHIFT)));
+      y[i] = PSHR32(ADD32(SHL32(EXTEND32(y1),LPC_SHIFT+1),mem2[0]),LPC_SHIFT);
       ny2i = NEG16(y[i]);
       for (j=0;j<ord-1;j++)
 …
       for (j=0;j<M2;j++)
+      {
          y1[k]=ADD32(y1[k],SHR(MULT16_16(a[j],ADD16(x[i+j],x2[i-j])),1));
          y2[k]=SUB32(y2[k],SHR(MULT16_16(a[j],SUB16(x[i+j],x2[i-j])),1));
+         y1[k]=ADD32(y1[k],MULT16_16(a[j],ADD16(x[i+j],x2[i-j])));
+         y2[k]=SUB32(y2[k],MULT16_16(a[j],SUB16(x[i+j],x2[i-j])));
          j++;
+         y1[k]=ADD32(y1[k],SHR(MULT16_16(a[j],ADD16(x[i+j],x2[i-j])),1));
+         y2[k]=ADD32(y2[k],SHR(MULT16_16(a[j],SUB16(x[i+j],x2[i-j])),1));
+      }
+         y1[k]=ADD32(y1[k],MULT16_16(a[j],ADD16(x[i+j],x2[i-j])));
+         y2[k]=ADD32(y2[k],MULT16_16(a[j],SUB16(x[i+j],x2[i-j])));
+      }
+      y1[k] = SHR32(y1[k],1);
+      y2[k] = SHR32(y2[k],1);
+   }
    for (i=0;i<M-1;i++)
 …
    for (i = 0; i < N/2; i++)
       xx[2*i] = SHR(x[N/2-1-i],SIG_SHIFT+1);
+      xx[2*i] = PSHR32(x[N/2-1-i],SIG_SHIFT);
    for (i = 0; i < M - 1; i += 2)
       xx[N+i] = mem[i+1];
 …
          x1 = xx[N-2+j-i];
          y0 = ADD32(y0,SHR(MULT16_16(a0, x1),1));
          y1 = ADD32(y1,SHR(MULT16_16(a1, x1),1));
          y2 = ADD32(y2,SHR(MULT16_16(a0, x0),1));
          y3 = ADD32(y3,SHR(MULT16_16(a1, x0),1));
+         y0 = ADD32(y0,SHR(MULT16_16(a0, x1),2));
+         y1 = ADD32(y1,SHR(MULT16_16(a1, x1),2));
+         y2 = ADD32(y2,SHR(MULT16_16(a0, x0),2));
+         y3 = ADD32(y3,SHR(MULT16_16(a1, x0),2));
          a0 = a[j+2];
 …
          x0 = xx[N+j-i];
          y0 = ADD32(y0,SHR(MULT16_16(a0, x0),1));
          y1 = ADD32(y1,SHR(MULT16_16(a1, x0),1));
          y2 = ADD32(y2,SHR(MULT16_16(a0, x1),1));
          y3 = ADD32(y3,SHR(MULT16_16(a1, x1),1));
+         y0 = ADD32(y0,SHR(MULT16_16(a0, x0),2));
+         y1 = ADD32(y1,SHR(MULT16_16(a1, x0),2));
+         y2 = ADD32(y2,SHR(MULT16_16(a0, x1),2));
+         y3 = ADD32(y3,SHR(MULT16_16(a1, x1),2));
+      }
       y[i] = y0;
 …
+}
+void comb_filter_mem_init (CombFilterMem *mem)
+{
+   mem->last_pitch=0;
+   mem->last_pitch_gain[0]=mem->last_pitch_gain[1]=mem->last_pitch_gain[2]=0;
+   mem->smooth_gain=1;
+}
+#ifdef FIXED_POINT
+#define COMB_STEP 32767
+#else
+#define COMB_STEP 1.0
+#endif
+void comb_filter(
+spx_sig_t *exc,          /*decoded excitation*/
+spx_sig_t *new_exc,      /*enhanced excitation*/
+#ifdef FIXED_POINT
+#if 0
+spx_word16_t shift_filt[3][7] = {{-33,    1043,   -4551,   19959,   19959,   -4551,    1043},
+                                 {-98,    1133,   -4425,   29179,    8895,   -2328,     444},
+                                 {444,   -2328,    8895,   29179,   -4425,    1133,     -98}};
+#else
+spx_word16_t shift_filt[3][7] = {{-390,    1540,   -4993,   20123,   20123,   -4993,    1540},
+                                {-1064,    2817,   -6694,   31589,    6837,    -990,    -209},
+                                 {-209,    -990,    6837,   31589,   -6694,    2817,   -1064}};
+#endif
+#else
+#if 0
+float shift_filt[3][7] = {{-9.9369e-04, 3.1831e-02, -1.3889e-01, 6.0910e-01, 6.0910e-01, -1.3889e-01, 3.1831e-02},
+                          {-0.0029937, 0.0345613, -0.1350474, 0.8904793, 0.2714479, -0.0710304, 0.0135403},
+                          {0.0135403, -0.0710304, 0.2714479, 0.8904793, -0.1350474, 0.0345613,  -0.0029937}};
+#else
+float shift_filt[3][7] = {{-0.011915, 0.046995, -0.152373, 0.614108, 0.614108, -0.152373, 0.046995},
+                          {-0.0324855, 0.0859768, -0.2042986, 0.9640297, 0.2086420, -0.0302054, -0.0063646},
+                          {-0.0063646, -0.0302054, 0.2086420, 0.9640297, -0.2042986, 0.0859768, -0.0324855}};
+#endif
+#endif
+int interp_pitch(
+spx_word16_t *exc,          /*decoded excitation*/
+spx_word16_t *interp,          /*decoded excitation*/
+int pitch,               /*pitch period*/
+int len
+)
+{
+   int i,j,k;
+   spx_word32_t corr[4][7];
+   spx_word32_t maxcorr;
+   int maxi, maxj;
+   for (i=0;i<7;i++)
+   {
+      corr[0][i] = inner_prod(exc, exc-pitch-3+i, len);
+   }
+   for (i=0;i<3;i++)
+   {
+      for (j=0;j<7;j++)
+      {
+         int i1, i2;
+         spx_word32_t tmp=0;
+         i1 = 3-j;
+         if (i1<0)
+            i1 = 0;
+         i2 = 10-j;
+         if (i2>7)
+            i2 = 7;
+         for (k=i1;k<i2;k++)
+            tmp += MULT16_32_Q15(shift_filt[i][k],corr[0][j+k-3]);
+         corr[i+1][j] = tmp;
+      }
+   }
+   maxi=maxj=0;
+   maxcorr = corr[0][0];
+   for (i=0;i<4;i++)
+   {
+      for (j=0;j<7;j++)
+      {
+         if (corr[i][j] > maxcorr)
+         {
+            maxcorr = corr[i][j];
+            maxi=i;
+            maxj=j;
+         }
+      }
+   }
+   for (i=0;i<len;i++)
+   {
+      spx_word32_t tmp = 0;
+      if (maxi>0)
+      {
+         for (k=0;k<7;k++)
+         {
+            tmp += MULT16_16(exc[i-(pitch-maxj+3)+k-3],shift_filt[maxi-1][k]);
+         }
+      } else {
+         tmp = SHL32(exc[i-(pitch-maxj+3)],15);
+      }
+      interp[i] = PSHR32(tmp,15);
+   }
+   return pitch-maxj+3;
+}
+void multicomb(
+spx_word16_t *exc,          /*decoded excitation*/
+spx_word16_t *new_exc,      /*enhanced excitation*/
 spx_coef_t *ak,           /*LPC filter coefs*/
 int p,               /*LPC order*/
 int nsf,             /*sub-frame size*/
 int pitch,           /*pitch period*/
+spx_word16_t *pitch_gain,   /*pitch gain (3-tap)*/
+int max_pitch,
 spx_word16_t  comb_gain,    /*gain of comb filter*/
+CombFilterMem *mem
+char *stack
+)
+{
+   int i;
+   spx_word16_t exc_energy=0, new_exc_energy=0;
+   spx_word16_t gain;
+   spx_word16_t step;
+   spx_word16_t fact;
+   /*Compute excitation amplitude prior to enhancement*/
+   exc_energy = compute_rms(exc, nsf);
+   /*for (i=0;i<nsf;i++)
+     exc_energy+=((float)exc[i])*exc[i];*/
+   /*Some gain adjustment if pitch is too high or if unvoiced*/
+#ifdef FIXED_POINT
+   {
+      spx_word16_t g = gain_3tap_to_1tap(pitch_gain)+gain_3tap_to_1tap(mem->last_pitch_gain);
+      if (g > 166)
+         comb_gain = MULT16_16_Q15(DIV32_16(SHL32(EXTEND32(165),15),g), comb_gain);
+      if (g < 64)
+         comb_gain = MULT16_16_Q15(SHL16(g, 9), comb_gain);
+   }
+#else
+   {
+      float g=0;
+      g = GAIN_SCALING_1*.5*(gain_3tap_to_1tap(pitch_gain)+gain_3tap_to_1tap(mem->last_pitch_gain));
+      if (g>1.3)
+         comb_gain*=1.3/g;
+      if (g<.5)
+         comb_gain*=2.*g;
+   }
+#endif
+   step = DIV32(COMB_STEP, nsf);
+   fact=0;
+   /*Apply pitch comb-filter (filter out noise between pitch harmonics)*/
+   int i;
+   VARDECL(spx_word16_t *iexc);
+   spx_word16_t old_ener, new_ener;
+   int corr_pitch;
+   spx_word16_t iexc0_mag, iexc1_mag, exc_mag;
+   spx_word32_t corr0, corr1;
+   spx_word16_t gain0, gain1;
+   spx_word16_t pgain1, pgain2;
+   spx_word16_t c1, c2;
+   spx_word16_t g1, g2;
+   spx_word16_t ngain;
+   spx_word16_t gg1, gg2;
+#if 0 /* Set to 1 to enable full pitch search */
+   int nol_pitch[6];
+   spx_word16_t nol_pitch_coef[6];
+   spx_word16_t ol_pitch_coef;
+   open_loop_nbest_pitch(exc, 20, 120, nsf,
+                         nol_pitch, nol_pitch_coef, 6, stack);
+   corr_pitch=nol_pitch[0];
+   ol_pitch_coef = nol_pitch_coef[0];
+   /*Try to remove pitch multiples*/
+   for (i=1;i<6;i++)
+   {
+#ifdef FIXED_POINT
+      if ((nol_pitch_coef[i]>MULT16_16_Q15(nol_pitch_coef[0],19661)) &&
+#else
+      if ((nol_pitch_coef[i]>.6*nol_pitch_coef[0]) &&
+#endif
+         (ABS(2*nol_pitch[i]-corr_pitch)<=2 || ABS(3*nol_pitch[i]-corr_pitch)<=3 ||
+         ABS(4*nol_pitch[i]-corr_pitch)<=4 || ABS(5*nol_pitch[i]-corr_pitch)<=5))
+      {
+         corr_pitch = nol_pitch[i];
+      }
+   }
+#else
+   corr_pitch = pitch;
+#endif
+   ALLOC(iexc, 2*nsf, spx_word16_t);
+   interp_pitch(exc, iexc, corr_pitch, 80);
+   if (corr_pitch>max_pitch)
+      interp_pitch(exc, iexc+nsf, 2*corr_pitch, 80);
+   else
+      interp_pitch(exc, iexc+nsf, -corr_pitch, 80);
+   /*interp_pitch(exc, iexc+2*nsf, 2*corr_pitch, 80);*/
+   /*printf ("%d %d %f\n", pitch, corr_pitch, max_corr*ener_1);*/
+   iexc0_mag = spx_sqrt(1000+inner_prod(iexc,iexc,nsf));
+   iexc1_mag = spx_sqrt(1000+inner_prod(iexc+nsf,iexc+nsf,nsf));
+   exc_mag = spx_sqrt(1+inner_prod(exc,exc,nsf));
+   corr0  = inner_prod(iexc,exc,nsf);
+   if (corr0<0)
+      corr0=0;
+   corr1 = inner_prod(iexc+nsf,exc,nsf);
+   if (corr1<0)
+      corr1=0;
+#ifdef FIXED_POINT
+   /* Doesn't cost much to limit the ratio and it makes the rest easier */
+   if (SHL32(EXTEND32(iexc0_mag),6) < EXTEND32(exc_mag))
+      iexc0_mag = ADD16(1,PSHR16(exc_mag,6));
+   if (SHL32(EXTEND32(iexc1_mag),6) < EXTEND32(exc_mag))
+      iexc1_mag = ADD16(1,PSHR16(exc_mag,6));
+#endif
+   if (corr0 > MULT16_16(iexc0_mag,exc_mag))
+      pgain1 = QCONST16(1., 14);
+   else
+      pgain1 = PDIV32_16(SHL32(PDIV32(corr0, exc_mag),14),iexc0_mag);
+   if (corr1 > MULT16_16(iexc1_mag,exc_mag))
+      pgain2 = QCONST16(1., 14);
+   else
+      pgain2 = PDIV32_16(SHL32(PDIV32(corr1, exc_mag),14),iexc1_mag);
+   gg1 = PDIV32_16(SHL32(EXTEND32(exc_mag),8), iexc0_mag);
+   gg2 = PDIV32_16(SHL32(EXTEND32(exc_mag),8), iexc1_mag);
+   if (comb_gain>0)
+   {
+#ifdef FIXED_POINT
+      c1 = (MULT16_16_Q15(QCONST16(.4,15),comb_gain)+QCONST16(.07,15));
+      c2 = QCONST16(.5,15)+MULT16_16_Q14(QCONST16(1.72,14),(c1-QCONST16(.07,15)));
+#else
+      c1 = .4*comb_gain+.07;
+      c2 = .5+1.72*(c1-.07);
+#endif
+   } else
+   {
+      c1=c2=0;
+   }
+#ifdef FIXED_POINT
+   g1 = 32767 - MULT16_16_Q13(MULT16_16_Q15(c2, pgain1),pgain1);
+   g2 = 32767 - MULT16_16_Q13(MULT16_16_Q15(c2, pgain2),pgain2);
+#else
+   g1 = 1-c2*pgain1*pgain1;
+   g2 = 1-c2*pgain2*pgain2;
+#endif
+   if (g1<c1)
+      g1 = c1;
+   if (g2<c1)
+      g2 = c1;
+   g1 = (spx_word16_t)PDIV32_16(SHL32(EXTEND32(c1),14),(spx_word16_t)g1);
+   g2 = (spx_word16_t)PDIV32_16(SHL32(EXTEND32(c1),14),(spx_word16_t)g2);
+   if (corr_pitch>max_pitch)
+   {
+      gain0 = MULT16_16_Q15(QCONST16(.7,15),MULT16_16_Q14(g1,gg1));
+      gain1 = MULT16_16_Q15(QCONST16(.3,15),MULT16_16_Q14(g2,gg2));
+   } else {
+      gain0 = MULT16_16_Q15(QCONST16(.6,15),MULT16_16_Q14(g1,gg1));
+      gain1 = MULT16_16_Q15(QCONST16(.6,15),MULT16_16_Q14(g2,gg2));
+   }
    for (i=0;i<nsf;i++)
+   {
+      spx_word32_t exc1, exc2;
+      fact = ADD16(fact,step);
+      exc1 = SHL32(MULT16_32_Q15(SHL16(pitch_gain[0],7),exc[i-pitch+1]) +
+                 MULT16_32_Q15(SHL16(pitch_gain[1],7),exc[i-pitch]) +
+                 MULT16_32_Q15(SHL16(pitch_gain[2],7),exc[i-pitch-1]) , 2);
+      exc2 = SHL32(MULT16_32_Q15(SHL16(mem->last_pitch_gain[0],7),exc[i-mem->last_pitch+1]) +
+                 MULT16_32_Q15(SHL16(mem->last_pitch_gain[1],7),exc[i-mem->last_pitch]) +
+                 MULT16_32_Q15(SHL16(mem->last_pitch_gain[2],7),exc[i-mem->last_pitch-1]),2);
+      new_exc[i] = exc[i] + MULT16_32_Q15(comb_gain, ADD32(MULT16_32_Q15(fact,exc1), MULT16_32_Q15(SUB16(COMB_STEP,fact), exc2)));
+   }
+   mem->last_pitch_gain[0] = pitch_gain[0];
+   mem->last_pitch_gain[1] = pitch_gain[1];
+   mem->last_pitch_gain[2] = pitch_gain[2];
+   mem->last_pitch = pitch;
+   /*Amplitude after enhancement*/
+   new_exc_energy = compute_rms(new_exc, nsf);
+   if (exc_energy > new_exc_energy)
+      exc_energy = new_exc_energy;
+   gain = DIV32_16(SHL32(EXTEND32(exc_energy),15),ADD16(1,new_exc_energy));
+#ifdef FIXED_POINT
+   if (gain < 16384)
+      gain = 16384;
+#else
+   if (gain < .5)
+      gain=.5;
+#endif
+#ifdef FIXED_POINT
+      new_exc[i] = ADD16(exc[i], EXTRACT16(PSHR32(ADD32(MULT16_16(gain0,iexc[i]), MULT16_16(gain1,iexc[i+nsf])),8)));
+   /* FIXME: compute_rms16 is currently not quite accurate enough (but close) */
+   new_ener = compute_rms16(new_exc, nsf);
+   old_ener = compute_rms16(exc, nsf);
+   if (old_ener < 1)
+      old_ener = 1;
+   if (new_ener < 1)
+      new_ener = 1;
+   if (old_ener > new_ener)
+      old_ener = new_ener;
+   ngain = PDIV32_16(SHL32(EXTEND32(old_ener),14),new_ener);
    for (i=0;i<nsf;i++)
+   {
+      mem->smooth_gain = ADD16(MULT16_16_Q15(31457,mem->smooth_gain), MULT16_16_Q15(1311,gain));
+      new_exc[i] = MULT16_32_Q15(mem->smooth_gain, new_exc[i]);
+   }
+#else
+   for (i=0;i<nsf;i++)
+   {
+      mem->smooth_gain = .96*mem->smooth_gain + .04*gain;
+      new_exc[i] *= mem->smooth_gain;
+   }
+#endif
+}
+      new_exc[i] = MULT16_16_Q14(ngain, new_exc[i]);
+}

pjproject/trunk/pjmedia/src/pjmedia-codec/speex/filters.h

-                      r278
+                      r628
 spx_word16_t compute_rms(const spx_sig_t *x, int len);
+spx_word16_t compute_rms16(const spx_word16_t *x, int len);
 void signal_mul(const spx_sig_t *x, spx_sig_t *y, spx_word32_t scale, int len);
 void signal_div(const spx_sig_t *x, spx_sig_t *y, spx_word32_t scale, int len);
+void signal_div(const spx_word16_t *x, spx_word16_t *y, spx_word32_t scale, int len);
 #ifdef FIXED_POINT
 …
 #endif
-/** Combined filter memory. */
-typedef struct {
-   int   last_pitch;
-   spx_word16_t last_pitch_gain[3];
-   spx_word16_t smooth_gain;
-} CombFilterMem;
 …
 void fir_mem2(const spx_sig_t *x, const spx_coef_t *num, spx_sig_t *y, int N, int ord, spx_mem_t *mem);
 void iir_mem2(const spx_sig_t *x, const spx_coef_t *den, spx_sig_t *y, int N, int ord, spx_mem_t *mem);
+void filter_mem16(const spx_word16_t *x, const spx_coef_t *num, const spx_coef_t *den, spx_word16_t *y, int N, int ord, spx_mem_t *mem, char *stack);
+void iir_mem16(const spx_word16_t *x, const spx_coef_t *den, spx_word16_t *y, int N, int ord, spx_mem_t *mem, char *stack);
+void fir_mem16(const spx_word16_t *x, const spx_coef_t *num, spx_word16_t *y, int N, int ord, spx_mem_t *mem, char *stack);
 /* Apply bandwidth expansion on LPC coef */
 …
 void compute_impulse_response(const spx_coef_t *ak, const spx_coef_t *awk1, const spx_coef_t *awk2, spx_word16_t *y, int N, int ord, char *stack);
+void comb_filter_mem_init (CombFilterMem *mem);
+void comb_filter(
+spx_sig_t *exc,          /*decoded excitation*/
+spx_sig_t *new_exc,      /*enhanced excitation*/
+void multicomb(
+spx_word16_t *exc,          /*decoded excitation*/
+spx_word16_t *new_exc,      /*enhanced excitation*/
 spx_coef_t *ak,           /*LPC filter coefs*/
 int p,               /*LPC order*/
 int nsf,             /*sub-frame size*/
 int pitch,           /*pitch period*/
 spx_word16_t *pitch_gain,   /*pitch gain (3-tap)*/
+int max_pitch,   /*pitch gain (3-tap)*/
 spx_word16_t  comb_gain,    /*gain of comb filter*/
+CombFilterMem *mem
+char *stack
 );
 #endif

pjproject/trunk/pjmedia/src/pjmedia-codec/speex/filters_arm4.h

r278	r628
102	102	spx_sig_t xi,yi,nyi;
103	103
	104	for (i=0;i<ord;i++)
	105	mem[i] = SHR32(mem[i],1);
104	106	for (i=0;i<N;i++)
105	107	{
…	…
253	255
254	256	}
	257	for (i=0;i<ord;i++)
	258	mem[i] = SHL32(mem[i],1);
255	259	}
256	260
…	…
260	264	int i,j;
261	265	spx_sig_t xi,yi,nyi;
	266
	267	for (i=0;i<ord;i++)
	268	mem[i] = SHR32(mem[i],1);
262	269
263	270	for (i=0;i<N;i++)
…	…
377	384
378	385	}
	386	for (i=0;i<ord;i++)
	387	mem[i] = SHL32(mem[i],1);
	388
379	389	}

pjproject/trunk/pjmedia/src/pjmedia-codec/speex/filters_bfin.h

-                      r278
+                      r628
 */
-#include <stdio.h>
 #define OVERRIDE_NORMALIZE16
 int normalize16(const spx_sig_t *x, spx_word16_t *y, spx_sig_t max_scale, int len)
 …
    spx_sig_t max_val=1;
    int sig_shift;
    __asm__
+   (
 …
    "I0 = %0;\n\t"
    "L0 = 0;\n\t"
+   "I1 = %1;\n\t"
+   "L1 = 0;\n\t"
+   "P1 = %1;\n\t"
    "R0 = [I0++];\n\t"
    "LOOP norm_shift%= LC0 = %3 >> 1;\n\t"
+   "LOOP norm_shift%= LC0 = %3;\n\t"
    "LOOP_BEGIN norm_shift%=;\n\t"
+      "R1 = ASHIFT R0 by %2.L || R2 = [I0++];\n\t"
+      "R3 = ASHIFT R2 by %2.L || R0 = [I0++];\n\t"
+      "R3 = PACK(R3.L, R1.L);\n\t"
+      "[I1++] = R3;\n\t"
+      "R1 = ASHIFT R0 by %2.L || R0 = [I0++];\n\t"
+      "W[P1++] = R1;\n\t"
    "LOOP_END norm_shift%=;\n\t"
+   : : "a" (x), "a" (y), "d" (-sig_shift), "a" (len)
+   : "I0", "L0", "I1", "L1", "R0", "R1", "R2", "R3", "memory"
+   "R1 = ASHIFT R0 by %2.L;\n\t"
+   "W[P1++] = R1;\n\t"
+   : : "a" (x), "a" (y), "d" (-sig_shift), "a" (len-1)
+   : "I0", "L0", "P1", "R0", "R1", "memory"
    );
    return sig_shift;
 …
    "P0 = %3;\n\t"
    "I0 = P0;\n\t"
    "B0 = P0;\n\t"
+   "B0 = P0;\n\t" /* numden */
    "L0 = 0;\n\t"
    "P2 = %0;\n\t"
+   "P2 = %0;\n\t" /* Fused xy */
    "I2 = P2;\n\t"
    "L2 = 0;\n\t"
    "P4 = %6;\n\t"
    "P0 = %1;\n\t"
    "P1 = %2;\n\t"
+   "P4 = %6;\n\t" /* mem */
+   "P0 = %1;\n\t" /* _x */
+   "P1 = %2;\n\t" /* _y */
    /* First sample */
    "R1 = [P4++];\n\t"
    "R1 <<= 1;\n\t"
    "R2 = [P0++];\n\t"
+   "R1 <<= 1;\n\t" /* shift mem */
+   "R2 = [P0++];\n\t" /* load x[0] */
    "R1 = R1 + R2;\n\t"
    "[P1++] = R1;\n\t"
+   "[P1++] = R1;\n\t" /* store y[0] */
    "R1 <<= 2;\n\t"
    "R2 <<= 2;\n\t"
    "R2 = PACK(R1.H, R2.H);\n\t"
+   "R2 = PACK(R1.H, R2.H);\n\t" /* pack x16 and y16 */
    "[P2] = R2;\n\t"
 …
       "A0 += A1;\n\t"
       "R4 = A0;\n\t"
       "R4 <<= 1;\n\t"
       "R2 = [P0++];\n\t"
+      "R4 <<= 1;\n\t" /* shift mem */
+      "R2 = [P0++];\n\t" /* load x */
       "R4 = R4 + R2;\n\t"
       "[P1++] = R4;\n\t"
+      "[P1++] = R4;\n\t" /* store y */
       "R4 <<= 2;\n\t"
       "R2 <<= 2;\n\t"
       "R2 = PACK(R4.H, R2.H);\n\t"
+      "R2 = PACK(R4.H, R2.H);\n\t" /* pack x16 and y16 */
       "[P2] = R2;\n\t"
 …
    "R0 = %5;\n\t"
    "R0 <<= 1;\n\t"
    "I0 = B0;\n\t"
+   "I0 = B0;\n\t" /* numden */
    "R0 <<= 1;\n\t"
    "L0 = R0;\n\t"
    "R0 = %5;\n\t"
    "R2 = %4;\n\t"
+   "R0 = %5;\n\t" /* org */
+   "R2 = %4;\n\t" /* N */
    "R2 = R2 - R0;\n\t"
    "R4 = [I0++];\n\t"
+   "R4 = [I0++];\n\t" /* numden */
    "LC0 = R2;\n\t"
    "P3 = R0;\n\t"
 …
    "M0 = R0;\n\t"
    "A1 = A0 = 0;\n\t"
    "R5 = [I2--];\n\t"
+   "R5 = [I2--];\n\t" /* load xy */
    "LOOP filter_mid%= LC0;\n\t"
    "LOOP_BEGIN filter_mid%=;\n\t"
 …
       "LOOP_END filter_mid_inner%=;\n\t"
       "R0 = (A0 += A1) || I2 += M0;\n\t"
       "R0 = R0 << 1 || R5 = [P0++];\n\t"
+      "R0 = R0 << 1 || R5 = [P0++];\n\t" /* load x */
       "R0 = R0 + R5;\n\t"
       "R0 = R0 << 2 || [P1++] = R0;\n\t"
+      "R0 = R0 << 2 || [P1++] = R0;\n\t" /* shift y | store y */
       "R5 = R5 << 2;\n\t"
       "R5 = PACK(R0.H, R5.H);\n\t"
+      "A1 = A0 = 0 || [I2--] = R5\n\t"
+      "LOOP_END filter_mid%=;\n\t"
+   "I2 += 4;\n\t"
+   "P2 = I2;\n\t"
+   /* Update memory */
+   "P4 = %6;\n\t"
+   "R0 = %5;\n\t"
+   "LC0 = R0;\n\t"
+   "P0 = B0;\n\t"
+   "A1 = A0 = 0;\n\t"
+   "LOOP mem_update%= LC0;\n\t"
+   "LOOP_BEGIN mem_update%=;\n\t"
+      "I2 = P2;\n\t"
+      "I0 = P0;\n\t"
+      "P0 += 4;\n\t"
+      "R0 = LC0;\n\t"
+      "LC1 = R0;\n\t"
+      "R5 = [I2--] || R4 = [I0++];\n\t"
+      "LOOP mem_accum%= LC1;\n\t"
+      "LOOP_BEGIN mem_accum%=;\n\t"
+         "A1 -= R4.H*R5.H, A0 += R4.L*R5.L (IS) || R4 = [I0++] || R5 = [I2--];\n\t"
+      "LOOP_END mem_accum%=;\n\t"
+      "R0 = (A0 += A1);\n\t"
+      "A1 = A0 = 0 || [P4++] = R0;\n\t"
+   "LOOP_END mem_update%=;\n\t"
+   "L0 = 0;\n\t"
+   : : "m" (xy), "m" (_x), "m" (_y), "m" (numden), "m" (N), "m" (ord), "m" (mem)
+   : "A0", "A1", "R0", "R1", "R2", "R3", "R4", "R5", "P0", "P1", "P2", "P3", "P4", "B0", "I0", "I2", "L0", "L2", "M0", "memory"
+   );
+}
+#define OVERRIDE_FILTER_MEM16
+void filter_mem16(const spx_word16_t *_x, const spx_coef_t *num, const spx_coef_t *den, spx_word16_t *_y, int N, int ord, spx_mem_t *mem, char *stack)
+{
+   VARDECL(spx_word32_t *xy2);
+   VARDECL(spx_word32_t *numden_a);
+   spx_word32_t *xy;
+   spx_word16_t *numden;
+   int i;
+   ALLOC(xy2, (N+1), spx_word32_t);
+   ALLOC(numden_a, (2*ord+2), spx_word32_t);
+   xy = xy2+1;
+   numden = (spx_word16_t*) numden_a;
+   for (i=0;i<ord;i++)
+   {
+      numden[2*i] = num[i];
+      numden[2*i+1] = den[i];
+   }
+   __asm__ __volatile__
+   (
+   /* Register setup */
+   "R0 = %5;\n\t"      /*ord */
+   "P0 = %3;\n\t"
+   "I0 = P0;\n\t"
+   "B0 = P0;\n\t" /* numden */
+   "L0 = 0;\n\t"
+   "P2 = %0;\n\t" /* Fused xy */
+   "I2 = P2;\n\t"
+   "L2 = 0;\n\t"
+   "P4 = %6;\n\t" /* mem */
+   "P0 = %1;\n\t" /* _x */
+   "P1 = %2;\n\t" /* _y */
+   /* First sample */
+   "R1 = [P4++];\n\t"
+   "R1 <<= 3;\n\t" /* shift mem */
+   "R1.L = R1 (RND);\n\t"
+   "R2 = W[P0++];\n\t" /* load x[0] */
+   "R1.L = R1.L + R2.L;\n\t"
+   "W[P1++] = R1;\n\t" /* store y[0] */
+   "R2 = PACK(R1.L, R2.L);\n\t" /* pack x16 and y16 */
+   "[P2] = R2;\n\t"
+   /* Samples 1 to ord-1 (using memory) */
+   "R0 += -1;\n\t"
+   "R3 = 0;\n\t"
+   "LC0 = R0;\n\t"
+   "LOOP filter_start%= LC0;\n\t"
+   "LOOP_BEGIN filter_start%=;\n\t"
+      "R3 += 1;\n\t"
+      "LC1 = R3;\n\t"
+      "R1 = [P4++];\n\t"
+      "A1 = R1;\n\t"
+      "A0 = 0;\n\t"
+      "I0 = B0;\n\t"
+      "I2 = P2;\n\t"
+      "P2 += 4;\n\t"
+      "R4 = [I0++] || R5 = [I2--];\n\t"
+      "LOOP filter_start_inner%= LC1;\n\t"
+      "LOOP_BEGIN filter_start_inner%=;\n\t"
+         "A1 -= R4.H*R5.H, A0 += R4.L*R5.L (IS) || R4 = [I0++] || R5 = [I2--];\n\t"
+      "LOOP_END filter_start_inner%=;\n\t"
+      "A0 += A1;\n\t"
+      "R4 = A0;\n\t"
+      "R4 <<= 3;\n\t" /* shift mem */
+      "R4.L = R4 (RND);\n\t"
+      "R2 = W[P0++];\n\t" /* load x */
+      "R4.L = R4.L + R2.L;\n\t"
+      "W[P1++] = R4;\n\t" /* store y */
+      //"R4 <<= 2;\n\t"
+      //"R2 <<= 2;\n\t"
+      "R2 = PACK(R4.L, R2.L);\n\t" /* pack x16 and y16 */
+      "[P2] = R2;\n\t"
+   "LOOP_END filter_start%=;\n\t"
+   /* Samples ord to N*/
+   "R0 = %5;\n\t"
+   "R0 <<= 1;\n\t"
+   "I0 = B0;\n\t" /* numden */
+   "R0 <<= 1;\n\t"
+   "L0 = R0;\n\t"
+   "R0 = %5;\n\t" /* org */
+   "R2 = %4;\n\t" /* N */
+   "R2 = R2 - R0;\n\t"
+   "R4 = [I0++];\n\t" /* numden */
+   "LC0 = R2;\n\t"
+   "P3 = R0;\n\t"
+   "R0 <<= 2;\n\t"
+   "R0 += 8;\n\t"
+   "I2 = P2;\n\t"
+   "M0 = R0;\n\t"
+   "A1 = A0 = 0;\n\t"
+   "R5 = [I2--];\n\t" /* load xy */
+   "LOOP filter_mid%= LC0;\n\t"
+   "LOOP_BEGIN filter_mid%=;\n\t"
+      "LOOP filter_mid_inner%= LC1=P3;\n\t"
+      "LOOP_BEGIN filter_mid_inner%=;\n\t"
+         "A1 -= R4.H*R5.H, A0 += R4.L*R5.L (IS) || R4 = [I0++] || R5 = [I2--];\n\t"
+      "LOOP_END filter_mid_inner%=;\n\t"
+      "R0 = (A0 += A1) || I2 += M0;\n\t"
+      "R0 = R0 << 3 || R5 = W[P0++];\n\t" /* load x */
+      "R0.L = R0 (RND);\n\t"
+      "R0.L = R0.L + R5.L;\n\t"
+      "R5 = PACK(R0.L, R5.L) || W[P1++] = R0;\n\t" /* shift y | store y */
       "A1 = A0 = 0 || [I2--] = R5\n\t"
       "LOOP_END filter_mid%=;\n\t"
 …
+}
+#define OVERRIDE_IIR_MEM16
+void iir_mem16(const spx_word16_t *_x, const spx_coef_t *den, spx_word16_t *_y, int N, int ord, spx_mem_t *mem, char *stack)
+{
+   VARDECL(spx_word16_t *y);
+   spx_word16_t *yy;
+   ALLOC(y, (N+2), spx_word16_t);
+   yy = y+2;
+   __asm__ __volatile__
+   (
+   /* Register setup */
+   "R0 = %5;\n\t"      /*ord */
+   "P1 = %3;\n\t"
+   "I1 = P1;\n\t"
+   "B1 = P1;\n\t"
+   "L1 = 0;\n\t"
+   "P3 = %0;\n\t"
+   "I3 = P3;\n\t"
+   "L3 = 0;\n\t"
+   "P4 = %6;\n\t"
+   "P0 = %1;\n\t"
+   "P1 = %2;\n\t"
+   /* First sample */
+   "R1 = [P4++];\n\t"
+   "R1 = R1 << 3 (S);\n\t"
+   "R1.L = R1 (RND);\n\t"
+   "R2 = W[P0++];\n\t"
+   "R1 = R1 + R2;\n\t"
+   "W[P1++] = R1;\n\t"
+   "W[P3] = R1;\n\t"
+   /* Samples 1 to ord-1 (using memory) */
+   "R0 += -1;\n\t"
+   "R3 = 0;\n\t"
+   "LC0 = R0;\n\t"
+   "LOOP filter_start%= LC0;\n\t"
+   "LOOP_BEGIN filter_start%=;\n\t"
+      "R3 += 1;\n\t"
+      "LC1 = R3;\n\t"
+      "R1 = [P4++];\n\t"
+      "A1 = R1;\n\t"
+      "I1 = B1;\n\t"
+      "I3 = P3;\n\t"
+      "P3 += 2;\n\t"
+      "LOOP filter_start_inner%= LC1;\n\t"
+      "LOOP_BEGIN filter_start_inner%=;\n\t"
+         "R4.L = W[I1++];\n\t"
+         "R5.L = W[I3--];\n\t"
+         "A1 -= R4.L*R5.L (IS);\n\t"
+      "LOOP_END filter_start_inner%=;\n\t"
+      "R1 = A1;\n\t"
+      "R1 <<= 3;\n\t"
+      "R1.L = R1 (RND);\n\t"
+      "R2 = W[P0++];\n\t"
+      "R1 = R1 + R2;\n\t"
+      "W[P1++] = R1;\n\t"
+      "W[P3] = R1;\n\t"
+   "LOOP_END filter_start%=;\n\t"
+   /* Samples ord to N*/
+   "R0 = %5;\n\t"
+   "R0 <<= 1;\n\t"
+   "I1 = B1;\n\t"
+   "L1 = R0;\n\t"
+   "R0 = %5;\n\t"
+   "R2 = %4;\n\t"
+   "R2 = R2 - R0;\n\t"
+   "R4.L = W[I1++];\n\t"
+   "LC0 = R2;\n\t"
+   "LOOP filter_mid%= LC0;\n\t"
+   "LOOP_BEGIN filter_mid%=;\n\t"
+      "LC1 = R0;\n\t"
+      "A1 = 0;\n\t"
+      "I3 = P3;\n\t"
+      "P3 += 2;\n\t"
+      "R5.L = W[I3--];\n\t"
+      "LOOP filter_mid_inner%= LC1;\n\t"
+      "LOOP_BEGIN filter_mid_inner%=;\n\t"
+         "A1 -= R4.L*R5.L (IS) || R4.L = W[I1++] || R5.L = W[I3--];\n\t"
+      "LOOP_END filter_mid_inner%=;\n\t"
+      "R1 = A1;\n\t"
+      "R1 = R1 << 3 || R2 = W[P0++];\n\t"
+      "R1.L = R1 (RND);\n\t"
+      "R1 = R1 + R2;\n\t"
+      "W[P1++] = R1;\n\t"
+      "W[P3] = R1;\n\t"
+   "LOOP_END filter_mid%=;\n\t"
+   /* Update memory */
+   "P4 = %6;\n\t"
+   "R0 = %5;\n\t"
+   "LC0 = R0;\n\t"
+   "P1 = B1;\n\t"
+   "LOOP mem_update%= LC0;\n\t"
+   "LOOP_BEGIN mem_update%=;\n\t"
+      "A0 = 0;\n\t"
+      "I3 = P3;\n\t"
+      "I1 = P1;\n\t"
+      "P1 += 2;\n\t"
+      "R0 = LC0;\n\t"
+      "LC1=R0;\n\t"
+      "R5.L = W[I3--] || R4.L = W[I1++];\n\t"
+      "LOOP mem_accum%= LC1;\n\t"
+      "LOOP_BEGIN mem_accum%=;\n\t"
+         "A0 -= R4.L*R5.L (IS) || R4.L = W[I1++] || R5.L = W[I3--];\n\t"
+      "LOOP_END mem_accum%=;\n\t"
+      "R0 = A0;\n\t"
+      "[P4++] = R0;\n\t"
+   "LOOP_END mem_update%=;\n\t"
+   "L1 = 0;\n\t"
+   : : "m" (yy), "m" (_x), "m" (_y), "m" (den), "m" (N), "m" (ord), "m" (mem)
+   : "A0", "A1", "R0", "R1", "R2", "R3", "R4", "R5", "P0", "P1", "P2", "P3", "P4", "B1", "I1", "I3", "L1", "L3", "memory"
+   );
+}
 #define OVERRIDE_FIR_MEM2
 void fir_mem2(const spx_sig_t *x, const spx_coef_t *num, spx_sig_t *y, int N, int ord, spx_mem_t *mem)
 …
       den[i] = 0;
    filter_mem2(x, num, den, y, N, ord, mem);
+}
+#define OVERRIDE_FIR_MEM16
+void fir_mem16(const spx_word16_t *x, const spx_coef_t *num, spx_word16_t *y, int N, int ord, spx_mem_t *mem, char *stack)
+{
+   int i;
+   spx_coef_t den2[12];
+   spx_coef_t *den;
+   den = (spx_coef_t*)((((int)den2)+4)&0xfffffffc);
+   for (i=0;i<10;i++)
+      den[i] = 0;
+   filter_mem16(x, num, den, y, N, ord, mem, stack);
+}

pjproject/trunk/pjmedia/src/pjmedia-codec/speex/fixed_bfin.h

-                      r278
+                      r628
 #define FIXED_BFIN_H
+#undef PDIV32_16
+static inline spx_word16_t PDIV32_16(spx_word32_t a, spx_word16_t b)
+{
+   spx_word32_t res, bb;
+   bb = b;
+   a += b>>1;
+   __asm__  (
+         "P0 = 15;\n\t"
+         "R0 = %1;\n\t"
+         "R1 = %2;\n\t"
+         //"R0 = R0 + R1;\n\t"
+         "R0 <<= 1;\n\t"
+         "DIVS (R0, R1);\n\t"
+         "LOOP divide%= LC0 = P0;\n\t"
+         "LOOP_BEGIN divide%=;\n\t"
+            "DIVQ (R0, R1);\n\t"
+         "LOOP_END divide%=;\n\t"
+         "R0 = R0.L;\n\t"
+         "%0 = R0;\n\t"
+   : "=m" (res)
+   : "m" (a), "m" (bb)
+   : "P0", "R0", "R1", "cc");
+   return res;
+}
 #undef DIV32_16
 static inline spx_word16_t DIV32_16(spx_word32_t a, spx_word16_t b)
 …
    spx_word32_t res, bb;
    bb = b;
+   /* Make the roundinf consistent with the C version
+      (do we need to do that?)*/
+   if (a<0)
+      a += (b-1);
    __asm__  (
          "P0 = 15;\n\t"
 …
    __asm__
+   (
+         "%1 <<= 1;\n\t"
+         "A1 = %2.L*%1.L (M,IS);\n\t"
+         "A1 = A1 >>> 16;\n\t"
+         "R1 = (A1 += %2.L*%1.H) (IS);\n\t"
+         "%0 = R1;\n\t"
+   : "=&d" (res), "=&d" (b)
+         "A1 = %2.L*%1.L (M);\n\t"
+         "A1 = A1 >>> 15;\n\t"
+         "%0 = (A1 += %2.L*%1.H) ;\n\t"
+   : "=&W" (res), "=&d" (b)
    : "d" (a), "1" (b)
    : "A1", "R1"
+   : "A1"
    );
    return res;
 …
    __asm__
+         (
+         "%1 <<= 1;\n\t"
+         "A1 = %2.L*%1.L (M,IS);\n\t"
+         "A1 = A1 >>> 16;\n\t"
+         "R1 = (A1 += %2.L*%1.H) (IS);\n\t"
+         "%0 = R1 + %4;\n\t"
+   : "=&d" (res), "=&d" (b)
+         "A1 = %2.L*%1.L (M);\n\t"
+         "A1 = A1 >>> 15;\n\t"
+         "%0 = (A1 += %2.L*%1.H);\n\t"
+         "%0 = %0 + %4;\n\t"
+   : "=&W" (res), "=&d" (b)
    : "d" (a), "1" (b), "d" (c)
    : "A1", "R1"
+   : "A1"
          );
    return res;
 …
    __asm__
+         (
+         "%2 <<= 2;\n\t"
+         "A1 = %1.L*%2.L (M,IS);\n\t"
+         "A1 = A1 >>> 16;\n\t"
+         "R1 = (A1 += %1.L*%2.H) (IS);\n\t"
+         "%0 = R1;\n\t"
+   : "=d" (res), "=d" (a), "=d" (b)
+         "%2 <<= 1;\n\t"
+         "A1 = %1.L*%2.L (M);\n\t"
+         "A1 = A1 >>> 15;\n\t"
+         "%0 = (A1 += %1.L*%2.H);\n\t"
+   : "=W" (res), "=d" (a), "=d" (b)
    : "1" (a), "2" (b)
    : "A1", "R1"
+   : "A1"
          );
    return res;
 …
    __asm__
+         (
          "%1 <<= 2;\n\t"
          "A1 = %2.L*%1.L (M,IS);\n\t"
          "A1 = A1 >>> 16;\n\t"
          "R1 = (A1 += %2.L*%1.H) (IS);\n\t"
          "%0 = R1 + %4;\n\t"
    : "=&d" (res), "=&d" (b)
+         "%1 <<= 1;\n\t"
+         "A1 = %2.L*%1.L (M);\n\t"
+         "A1 = A1 >>> 15;\n\t"
+         "%0 = (A1 += %2.L*%1.H);\n\t"
+         "%0 = %0 + %4;\n\t"
+   : "=&W" (res), "=&d" (b)
    : "d" (a), "1" (b), "d" (c)
    : "A1", "R1"
+   : "A1"
          );
    return res;

pjproject/trunk/pjmedia/src/pjmedia-codec/speex/fixed_debug.h

-                      r278
+                      r628
 #define MIPS_INC spx_mips++,
+#define QCONST16(x,bits) ((spx_word16_t)((x)*(1<<(bits))+(1<<((bits)-1))))
+#define QCONST32(x,bits) ((spx_word32_t)((x)*(1<<(bits))+(1<<((bits)-1))))
+#define QCONST16(x,bits) ((spx_word16_t)(.5+(x)*(((spx_word32_t)1)<<(bits))))
+#define QCONST32(x,bits) ((spx_word32_t)(.5+(x)*(((spx_word32_t)1)<<(bits))))
 #define VERIFY_SHORT(x) ((x)<=32767&&(x)>=-32768)
 …
    res = a+b;
    if (!VERIFY_SHORT(res))
       fprintf (stderr, "ADD16: output is not short: %d\n", res);
+      fprintf (stderr, "ADD16: output is not short: %d+%d=%d\n", a,b,res);
    spx_mips++;
    return res;
 …
    res = a+b;
    if (!VERIFY_INT(res))
+   {
       fprintf (stderr, "ADD32: output is not int: %d\n", (int)res);
+   }
    spx_mips++;
    return res;
 …
 #define MAC16_16_Q11(c,a,b)     (ADD16((c),EXTRACT16(SHR32(MULT16_16((a),(b)),11))))
 #define MAC16_16_Q13(c,a,b)     (ADD16((c),EXTRACT16(SHR32(MULT16_16((a),(b)),13))))
+#define MAC16_16_P13(c,a,b)     (ADD32((c),SHR(ADD32(4096,MULT16_16((a),(b))),13)))
 static inline int MULT16_32_QX(int a, long long b, int Q)
 …
    return res;
+}
+#define PDIV32(a,b) DIV32(ADD32((a),(b)>>1),b)
+#define PDIV32_16(a,b) DIV32_16(ADD32((a),(b)>>1),b)
 #endif

pjproject/trunk/pjmedia/src/pjmedia-codec/speex/fixed_generic.h

-                      r278
+                      r628
 #define FIXED_GENERIC_H
 #define QCONST16(x,bits) ((spx_word16_t)(.5+(x)*(1<<(bits))))
 #define QCONST32(x,bits) ((spx_word32_t)(.5+(x)*(1<<(bits))))
+#define QCONST16(x,bits) ((spx_word16_t)(.5+(x)*(((spx_word32_t)1)<<(bits))))
+#define QCONST32(x,bits) ((spx_word32_t)(.5+(x)*(((spx_word32_t)1)<<(bits))))
 #define NEG16(x) (-(x))
 #define NEG32(x) (-(x))
 #define EXTRACT16(x) ((spx_word16_t)x)
 #define EXTEND32(x) ((spx_word32_t)x)
+#define EXTRACT16(x) ((spx_word16_t)(x))
+#define EXTEND32(x) ((spx_word32_t)(x))
 #define SHR16(a,shift) ((a) >> (shift))
 #define SHL16(a,shift) ((a) << (shift))
 …
 #define ADD32(a,b) ((spx_word32_t)(a)+(spx_word32_t)(b))
 #define SUB32(a,b) ((spx_word32_t)(a)-(spx_word32_t)(b))
-#define ADD64(a,b) ((spx_word64_t)(a)+(spx_word64_t)(b))
 …
 #define MAC16_16_Q11(c,a,b)     (ADD32((c),SHR(MULT16_16((a),(b)),11)))
 #define MAC16_16_Q13(c,a,b)     (ADD32((c),SHR(MULT16_16((a),(b)),13)))
+#define MAC16_16_P13(c,a,b)     (ADD32((c),SHR(ADD32(4096,MULT16_16((a),(b))),13)))
 #define MULT16_16_Q11_32(a,b) (SHR(MULT16_16((a),(b)),11))
 …
 #define DIV32_16(a,b) ((spx_word16_t)(((spx_word32_t)(a))/((spx_word16_t)(b))))
+#define PDIV32_16(a,b) ((spx_word16_t)(((spx_word32_t)(a)+((spx_word16_t)(b)>>1))/((spx_word16_t)(b))))
 #define DIV32(a,b) (((spx_word32_t)(a))/((spx_word32_t)(b)))
+#define PDIV32(a,b) (((spx_word32_t)(a)+((spx_word16_t)(b)>>1))/((spx_word32_t)(b)))
 #endif

pjproject/trunk/pjmedia/src/pjmedia-codec/speex/gain_table.c

-                      r278
+                      r628
 */
 const signed char gain_cdbk_nb[384] = {
 -32,-32,-32,
 -28,-67,-5,
 -42,-6,-32,
 -57,-10,-54,
 -16,27,-41,
 ,-19,-40,
 -45,24,-21,
 -8,-14,-18,
 ,14,-58,
 -18,-88,-39,
 -38,21,-18,
 -19,20,-43,
 ,17,-48,
 -52,-58,-13,
 -44,-1,-11,
 -12,-11,-34,
 ,0,-46,
 -37,-35,-34,
 -25,44,-30,
 ,-4,-63,
 -31,43,-41,
 -23,30,-43,
 -43,26,-14,
 -33,1,-13,
 -13,18,-37,
 -46,-73,-45,
 -36,24,-25,
 -36,-11,-20,
 -25,12,-18,
 -36,-69,-59,
 -45,6,8,
 -22,-14,-24,
 -1,13,-44,
 -39,-48,-26,
 -32,31,-37,
 -33,15,-46,
 -24,30,-36,
 -41,31,-23,
 -50,22,-4,
 -22,2,-21,
 -17,30,-34,
 -7,-60,-28,
 -38,42,-28,
 -44,-11,21,
 -16,8,-44,
 -39,-55,-43,
 -11,-35,26,
 -9,0,-34,
 -8,121,-81,
 ,-16,-22,
 -37,33,-31,
 -27,-7,-36,
 -34,70,-57,
 -37,-11,-48,
 -40,17,-1,
 -33,6,-6,
 -9,0,-20,
 -21,69,-33,
 -29,33,-31,
 -55,12,-1,
 -33,27,-22,
 -50,-33,-47,
 -50,54,51,
 -1,-5,-44,
 -4,22,-40,
 -39,-66,-25,
 -33,1,-26,
 -24,-23,-25,
 -11,21,-45,
 -25,-45,-19,
 -43,105,-16,
 ,-21,1,
 -16,11,-33,
 -13,-99,-4,
 -37,33,-15,
 -25,37,-63,
 -36,24,-31,
 -53,-56,-38,
 -41,-4,4,
 -33,13,-30,
 ,52,-94,
 -5,-30,-15,
 ,38,-40,
 -23,12,-36,
 -17,40,-47,
 -37,-41,-39,
 -49,34,0,
 -18,-7,-4,
 -16,17,-27,
 ,5,-62,
 ,48,-68,
 -43,11,-11,
 -18,19,-15,
 -23,-62,-39,
 -42,10,-2,
 -21,-13,-13,
 -9,13,-47,
 -23,-62,-24,
 -44,60,-21,
 -18,-3,-52,
 -22,22,-36,
 -75,57,16,
 -19,3,10,
 -29,23,-38,
 -5,-62,-51,
 -51,40,-18,
 -42,13,-24,
 -34,14,-20,
 -56,-75,-26,
 -26,32,15,
 -26,17,-29,
 -7,28,-52,
 -12,-30,5,
 -5,-48,-5,
 ,2,-43,
 ,16,16,
 -25,-45,-32,
 -43,18,-10,
 ,0,-1,
 -1,7,-30,
 ,-48,-4,
 -28,25,-29,
 -22,0,-31,
 -32,17,-10,
 -64,-41,-62,
 -52,15,16,
 -30,-22,-32,
 -7,9,-38};
+const signed char gain_cdbk_nb[512] = {
+-32, -32, -32, 0,
+-28, -67, -5, 33,
+-42, -6, -32, 18,
+-57, -10, -54, 35,
+-16, 27, -41, 42,
+, -19, -40, 36,
+-45, 24, -21, 40,
+-8, -14, -18, 28,
+, 14, -58, 53,
+-18, -88, -39, 39,
+-38, 21, -18, 37,
+-19, 20, -43, 38,
+, 17, -48, 54,
+-52, -58, -13, 33,
+-44, -1, -11, 32,
+-12, -11, -34, 22,
+, 0, -46, 46,
+-37, -35, -34, 5,
+-25, 44, -30, 43,
+, -4, -63, 49,
+-31, 43, -41, 43,
+-23, 30, -43, 41,
+-43, 26, -14, 44,
+-33, 1, -13, 27,
+-13, 18, -37, 37,
+-46, -73, -45, 34,
+-36, 24, -25, 34,
+-36, -11, -20, 19,
+-25, 12, -18, 33,
+-36, -69, -59, 34,
+-45, 6, 8, 46,
+-22, -14, -24, 18,
+-1, 13, -44, 44,
+-39, -48, -26, 15,
+-32, 31, -37, 34,
+-33, 15, -46, 31,
+-24, 30, -36, 37,
+-41, 31, -23, 41,
+-50, 22, -4, 50,
+-22, 2, -21, 28,
+-17, 30, -34, 40,
+-7, -60, -28, 29,
+-38, 42, -28, 42,
+-44, -11, 21, 43,
+-16, 8, -44, 34,
+-39, -55, -43, 21,
+-11, -35, 26, 41,
+-9, 0, -34, 29,
+-8, 121, -81, 113,
+, -16, -22, 33,
+-37, 33, -31, 36,
+-27, -7, -36, 17,
+-34, 70, -57, 65,
+-37, -11, -48, 21,
+-40, 17, -1, 44,
+-33, 6, -6, 33,
+-9, 0, -20, 34,
+-21, 69, -33, 57,
+-29, 33, -31, 35,
+-55, 12, -1, 49,
+-33, 27, -22, 35,
+-50, -33, -47, 17,
+-50, 54, 51, 94,
+-1, -5, -44, 35,
+-4, 22, -40, 45,
+-39, -66, -25, 24,
+-33, 1, -26, 20,
+-24, -23, -25, 12,
+-11, 21, -45, 44,
+-25, -45, -19, 17,
+-43, 105, -16, 82,
+, -21, 1, 41,
+-16, 11, -33, 30,
+-13, -99, -4, 57,
+-37, 33, -15, 44,
+-25, 37, -63, 54,
+-36, 24, -31, 31,
+-53, -56, -38, 26,
+-41, -4, 4, 37,
+-33, 13, -30, 24,
+, 52, -94, 114,
+-5, -30, -15, 23,
+, 38, -40, 56,
+-23, 12, -36, 29,
+-17, 40, -47, 51,
+-37, -41, -39, 11,
+-49, 34, 0, 58,
+-18, -7, -4, 34,
+-16, 17, -27, 35,
+, 5, -62, 65,
+, 48, -68, 76,
+-43, 11, -11, 38,
+-18, 19, -15, 41,
+-23, -62, -39, 23,
+-42, 10, -2, 41,
+-21, -13, -13, 25,
+-9, 13, -47, 42,
+-23, -62, -24, 24,
+-44, 60, -21, 58,
+-18, -3, -52, 32,
+-22, 22, -36, 34,
+-75, 57, 16, 90,
+-19, 3, 10, 45,
+-29, 23, -38, 32,
+-5, -62, -51, 38,
+-51, 40, -18, 53,
+-42, 13, -24, 32,
+-34, 14, -20, 30,
+-56, -75, -26, 37,
+-26, 32, 15, 59,
+-26, 17, -29, 29,
+-7, 28, -52, 53,
+-12, -30, 5, 30,
+-5, -48, -5, 35,
+, 2, -43, 40,
+, 16, 16, 75,
+-25, -45, -32, 10,
+-43, 18, -10, 42,
+, 0, -1, 52,
+-1, 7, -30, 36,
+, -48, -4, 48,
+-28, 25, -29, 32,
+-22, 0, -31, 22,
+-32, 17, -10, 36,
+-64, -41, -62, 36,
+-52, 15, 16, 58,
+-30, -22, -32, 6,
+-7, 9, -38, 36};

pjproject/trunk/pjmedia/src/pjmedia-codec/speex/gain_table_lbr.c

-                      r278
+                      r628
 */
 const signed char gain_cdbk_lbr[96] = {
 -32,-32,-32,
 -31,-58,-16,
 -41,-24,-43,
 -56,-22,-55,
 -13,33,-41,
 -4,-39,-9,
 -41,15,-12,
 -8,-15,-12,
 ,2,-44,
 -22,-66,-42,
 -38,28,-23,
 -21,14,-37,
 ,21,-50,
 -53,-71,-27,
 -37,-1,-19,
 -19,-5,-28,
 ,65,-44,
 -33,-48,-33,
 -40,57,-14,
 -17,4,-45,
 -31,38,-33,
 -23,28,-40,
 -43,29,-12,
 -34,13,-23,
 -16,15,-27,
 -14,-82,-15,
 -31,25,-32,
 -21,5,-5,
 -47,-63,-51,
 -46,12,3,
 -28,-17,-29,
 -10,14,-40};
+const signed char gain_cdbk_lbr[128] = {
+-32, -32, -32, 0,
+-31, -58, -16, 22,
+-41, -24, -43, 14,
+-56, -22, -55, 29,
+-13, 33, -41, 47,
+-4, -39, -9, 29,
+-41, 15, -12, 38,
+-8, -15, -12, 31,
+, 2, -44, 40,
+-22, -66, -42, 27,
+-38, 28, -23, 38,
+-21, 14, -37, 31,
+, 21, -50, 52,
+-53, -71, -27, 33,
+-37, -1, -19, 25,
+-19, -5, -28, 22,
+, 65, -44, 74,
+-33, -48, -33, 9,
+-40, 57, -14, 58,
+-17, 4, -45, 32,
+-31, 38, -33, 36,
+-23, 28, -40, 39,
+-43, 29, -12, 46,
+-34, 13, -23, 28,
+-16, 15, -27, 34,
+-14, -82, -15, 43,
+-31, 25, -32, 29,
+-21, 5, -5, 38,
+-47, -63, -51, 33,
+-46, 12, 3, 47,
+-28, -17, -29, 11,
+-10, 14, -40, 38};

pjproject/trunk/pjmedia/src/pjmedia-codec/speex/jitter.c

-                      r523
+                      r628
 #endif
-#ifndef NULL
-#define NULL 0
-#endif
 #include "misc.h"
 …
 #include <stdio.h>
+#define LATE_BINS 4
+void speex_jitter_init(SpeexJitter *jitter, void *decoder, int sampling_rate)
+#define LATE_BINS 10
+#define MAX_MARGIN 30                     /**< Number of bins in margin histogram */
+#define SPEEX_JITTER_MAX_BUFFER_SIZE 200   /**< Maximum number of packets in jitter buffer */
+#define GT32(a,b) (((spx_int32_t)((a)-(b)))>0)
+#define GE32(a,b) (((spx_int32_t)((a)-(b)))>=0)
+#define LT32(a,b) (((spx_int32_t)((a)-(b)))<0)
+#define LE32(a,b) (((spx_int32_t)((a)-(b)))<=0)
+/** Jitter buffer structure */
+struct JitterBuffer_ {
+   spx_uint32_t pointer_timestamp;                                        /**< Timestamp of what we will *get* next */
+   spx_uint32_t current_timestamp;                                        /**< Timestamp of the local clock (what we will *play* next) */
+   char *buf[SPEEX_JITTER_MAX_BUFFER_SIZE];                               /**< Buffer of packets (NULL if slot is free) */
+   spx_uint32_t timestamp[SPEEX_JITTER_MAX_BUFFER_SIZE];                  /**< Timestamp of packet                 */
+   int span[SPEEX_JITTER_MAX_BUFFER_SIZE];                                /**< Timestamp of packet                 */
+   int len[SPEEX_JITTER_MAX_BUFFER_SIZE];                                 /**< Number of bytes in packet           */
+   int tick_size;                                                         /**< Output granularity                  */
+   int reset_state;                                                       /**< True if state was just reset        */
+   int buffer_margin;                                                     /**< How many frames we want to keep in the buffer (lower bound) */
+   int lost_count;                                                        /**< Number of consecutive lost packets  */
+   float shortterm_margin[MAX_MARGIN];                                    /**< Short term margin histogram         */
+   float longterm_margin[MAX_MARGIN];                                     /**< Long term margin histogram          */
+   float loss_rate;                                                       /**< Average loss rate                   */
+};
+/** Initialise jitter buffer */
+JitterBuffer *jitter_buffer_init(int tick)
+{
+   JitterBuffer *jitter = speex_alloc(sizeof(JitterBuffer));
+   if (jitter)
+   {
+      int i;
+      for (i=0;i<SPEEX_JITTER_MAX_BUFFER_SIZE;i++)
+         jitter->buf[i]=NULL;
+      jitter->tick_size = tick;
+      jitter->buffer_margin = 1;
+      jitter_buffer_reset(jitter);
+   }
+   return jitter;
+}
+/** Reset jitter buffer */
+void jitter_buffer_reset(JitterBuffer *jitter)
+{
    int i;
    for (i=0;i<SPEEX_JITTER_MAX_BUFFER_SIZE;i++)
+   {
+      jitter->len[i]=-1;
+      jitter->timestamp[i]=-1;
+   }
+   jitter->dec = decoder;
+   speex_decoder_ctl(decoder, SPEEX_GET_FRAME_SIZE, &jitter->frame_size);
+   jitter->frame_time = jitter->frame_size;
+   speex_bits_init(&jitter->current_packet);
+   jitter->valid_bits = 0;
+   jitter->buffer_size = 4;
+   jitter->pointer_timestamp = -jitter->frame_time * jitter->buffer_size;
+      if (jitter->buf[i])
+      {
+         speex_free(jitter->buf[i]);
+         jitter->buf[i] = NULL;
+      }
+   }
+   /* Timestamp is actually undefined at this point */
+   jitter->pointer_timestamp = 0;
+   jitter->current_timestamp = 0;
    jitter->reset_state = 1;
    jitter->lost_count = 0;
    jitter->loss_rate = 0;
+}
+void speex_jitter_destroy(SpeexJitter *jitter)
+{
+   speex_bits_destroy(&jitter->current_packet);
+}
+void speex_jitter_put(SpeexJitter *jitter, char *packet, int len, int timestamp)
+   for (i=0;i<MAX_MARGIN;i++)
+   {
+      jitter->shortterm_margin[i] = 0;
+      jitter->longterm_margin[i] = 0;
+   }
+   /*fprintf (stderr, "reset\n");*/
+}
+/** Destroy jitter buffer */
+void jitter_buffer_destroy(JitterBuffer *jitter)
+{
+   jitter_buffer_reset(jitter);
+   speex_free(jitter);
+}
+/** Put one packet into the jitter buffer */
+void jitter_buffer_put(JitterBuffer *jitter, const JitterBufferPacket *packet)
+{
    int i,j;
    int arrival_margin;
+   spx_int32_t arrival_margin;
+   /*fprintf (stderr, "put packet %d %d\n", timestamp, span);*/
    if (jitter->reset_state)
+   {
       jitter->reset_state=0;
+      jitter->pointer_timestamp = timestamp-jitter->frame_time * jitter->buffer_size;
+      for (i=0;i<MAX_MARGIN;i++)
+      {
+         jitter->shortterm_margin[i] = 0;
+         jitter->longterm_margin[i] = 0;
+      }
+      for (i=0;i<SPEEX_JITTER_MAX_BUFFER_SIZE;i++)
+      {
+         jitter->len[i]=-1;
+         jitter->timestamp[i]=-1;
+      }
+      fprintf(stderr, "reset to %d\n", timestamp);
+      jitter->pointer_timestamp = packet->timestamp;
+      jitter->current_timestamp = packet->timestamp;
+      /*fprintf(stderr, "reset to %d\n", timestamp);*/
+   }
 …
    for (i=0;i<SPEEX_JITTER_MAX_BUFFER_SIZE;i++)
+   {
       if (jitter->timestamp[i]<jitter->pointer_timestamp)
+      {
          jitter->len[i]=-1;
          /*if (jitter->timestamp[i] != -1)
             fprintf (stderr, "discarding %d %d\n", jitter->timestamp[i], jitter->pointer_timestamp);*/
+      if (jitter->buf[i] && LE32(jitter->timestamp[i] + jitter->span[i], jitter->pointer_timestamp))
+      {
+         /*fprintf (stderr, "cleaned (not played)\n");*/
+         speex_free(jitter->buf[i]);
+         jitter->buf[i] = NULL;
+      }
+   }
 …
    for (i=0;i<SPEEX_JITTER_MAX_BUFFER_SIZE;i++)
+   {
       if (jitter->len[i]==-1)
+      if (jitter->buf[i]==NULL)
          break;
+   }
    /*fprintf(stderr, "%d %d %f\n", timestamp, jitter->pointer_timestamp, jitter->drift_average);*/
+   /*No place left in the buffer*/
    if (i==SPEEX_JITTER_MAX_BUFFER_SIZE)
+   {
 …
       for (j=1;j<SPEEX_JITTER_MAX_BUFFER_SIZE;j++)
+      {
          if (jitter->timestamp[j]<earliest)
+         if (!jitter->buf[i] || LT32(jitter->timestamp[j],earliest))
+         {
             earliest = jitter->timestamp[j];
 …
+         }
+      }
+      /*fprintf (stderr, "Buffer is full, discarding earliest frame %d (currently at %d)\n", timestamp, jitter->pointer_timestamp);*/
+      /*No place left in the buffer*/
+      /*skip some frame(s) */
+      /*return;*/
+      speex_free(jitter->buf[i]);
+      jitter->buf[i]=NULL;
+      if (jitter->lost_count>20)
+      {
+         jitter_buffer_reset(jitter);
+      }
+      /*fprintf (stderr, "Buffer is full, discarding earliest frame %d (currently at %d)\n", timestamp, jitter->pointer_timestamp);*/
+   }
    /* Copy packet in buffer */
+   if (len>SPEEX_JITTER_MAX_PACKET_SIZE)
+      len=SPEEX_JITTER_MAX_PACKET_SIZE;
+   for (j=0;j<len/BYTES_PER_CHAR;j++)
+      jitter->buf[i][j]=packet[j];
+   jitter->timestamp[i]=timestamp;
+   jitter->len[i]=len;
+   /* Don't count late packets when adjusting the synchro (we're taking care of them elsewhere) */
+   /*if (timestamp <= jitter->pointer_timestamp)
+   {
+      fprintf (stderr, "frame for timestamp %d arrived too late (at time %d)\n", timestamp, jitter->pointer_timestamp);
+   }*/
+   jitter->buf[i]=speex_alloc(packet->len);
+   for (j=0;j<packet->len;j++)
+      jitter->buf[i][j]=packet->data[j];
+   jitter->timestamp[i]=packet->timestamp;
+   jitter->span[i]=packet->span;
+   jitter->len[i]=packet->len;
    /* Adjust the buffer size depending on network conditions */
    arrival_margin = (timestamp - jitter->pointer_timestamp - jitter->frame_time);
    if (arrival_margin >= -LATE_BINS*jitter->frame_time)
+   {
       int int_margin;
+   arrival_margin = (packet->timestamp - jitter->current_timestamp) - jitter->buffer_margin*jitter->tick_size;
+   if (arrival_margin >= -LATE_BINS*jitter->tick_size)
+   {
+      spx_int32_t int_margin;
       for (i=0;i<MAX_MARGIN;i++)
+      {
 …
          jitter->longterm_margin[i] *= .995;
+      }
       int_margin = (arrival_margin + LATE_BINS*jitter->frame_time)/jitter->frame_time;
+      int_margin = LATE_BINS + arrival_margin/jitter->tick_size;
       if (int_margin>MAX_MARGIN-1)
          int_margin = MAX_MARGIN-1;
 …
          jitter->longterm_margin[int_margin] += .005;
+      }
+   }
+   /*fprintf (stderr, "margin : %d %d %f %f %f %f\n", arrival_margin, jitter->buffer_size, 100*jitter->loss_rate, 100*jitter->late_ratio, 100*jitter->ontime_ratio, 100*jitter->early_ratio);*/
+}
+void speex_jitter_get(SpeexJitter *jitter, short *out, int *current_timestamp)
+{
+   int i;
+   int ret;
+   } else {
+      /*fprintf (stderr, "way too late = %d\n", arrival_margin);*/
+      if (jitter->lost_count>20)
+      {
+         jitter_buffer_reset(jitter);
+      }
+   }
+#if 0 /* Enable to check how much is being buffered */
+   if (rand()%1000==0)
+   {
+      int count = 0;
+      for (j=0;j<SPEEX_JITTER_MAX_BUFFER_SIZE;j++)
+      {
+         if (jitter->buf[j])
+            count++;
+      }
+      fprintf (stderr, "buffer_size = %d\n", count);
+   }
+#endif
+}
+/** Get one packet from the jitter buffer */
+int jitter_buffer_get(JitterBuffer *jitter, JitterBufferPacket *packet, spx_uint32_t *start_offset)
+{
+   int i, j;
    float late_ratio_short;
    float late_ratio_long;
 …
    float early_ratio_short;
    float early_ratio_long;
+   int chunk_size;
+   int incomplete = 0;
+   if (LT32(jitter->current_timestamp+jitter->tick_size, jitter->pointer_timestamp))
+   {
+      jitter->current_timestamp = jitter->pointer_timestamp;
+      speex_warning("did you forget to call jitter_buffer_tick() by any chance?");
+   }
+   /*fprintf (stderr, "get packet %d %d\n", jitter->pointer_timestamp, jitter->current_timestamp);*/
+   /* FIXME: This should be only what remaining of the current tick */
+   chunk_size = jitter->tick_size;
+   /* Compiling arrival statistics */
    late_ratio_short = 0;
 …
    if (0&&jitter->pointer_timestamp%1000==0)
+   {
       fprintf (stderr, "%f %f %f %f %f %f\n", early_ratio_short, early_ratio_long, ontime_ratio_short, ontime_ratio_long, late_ratio_short, late_ratio_long);
+      /*fprintf (stderr, "%f %f %f %f %f %f\n", early_ratio_short, early_ratio_long, ontime_ratio_short, ontime_ratio_long, late_ratio_short, late_ratio_long);*/
       /*fprintf (stderr, "%f %f\n", early_ratio_short + ontime_ratio_short + late_ratio_short, early_ratio_long + ontime_ratio_long + late_ratio_long);*/
+   }
+   /* Adjusting the buffering */
    if (late_ratio_short > .1 || late_ratio_long > .03)
+   {
+      /* If too many packets are arriving late */
       jitter->shortterm_margin[MAX_MARGIN-1] += jitter->shortterm_margin[MAX_MARGIN-2];
       jitter->longterm_margin[MAX_MARGIN-1] += jitter->longterm_margin[MAX_MARGIN-2];
 …
       jitter->shortterm_margin[0] = 0;
       jitter->longterm_margin[0] = 0;
+      /*fprintf (stderr, "interpolate frame\n");*/
+      speex_decode_int(jitter->dec, NULL, (spx_int16_t*)out);
+      if (current_timestamp)
+         *current_timestamp = jitter->pointer_timestamp;
+      return;
+   }
+   /* Increment timestamp */
+   jitter->pointer_timestamp += jitter->frame_time;
+   if (late_ratio_short + ontime_ratio_short < .005 && late_ratio_long + ontime_ratio_long < .01 && early_ratio_short > .8)
+   {
+      jitter->pointer_timestamp -= jitter->tick_size;
+      jitter->current_timestamp -= jitter->tick_size;
+      /*fprintf (stderr, "i");*/
+      /*fprintf (stderr, "interpolate (getting some slack)\n");*/
+   } else if (late_ratio_short + ontime_ratio_short < .005 && late_ratio_long + ontime_ratio_long < .01 && early_ratio_short > .8)
+   {
+      /* Many frames arriving early */
       jitter->shortterm_margin[0] += jitter->shortterm_margin[1];
       jitter->longterm_margin[0] += jitter->longterm_margin[1];
 …
       jitter->longterm_margin[MAX_MARGIN-1] = 0;
       /*fprintf (stderr, "drop frame\n");*/
+      jitter->pointer_timestamp += jitter->frame_time;
+   }
+   if (current_timestamp)
+      *current_timestamp = jitter->pointer_timestamp;
+   /* Send zeros while we fill in the buffer */
+   if (jitter->pointer_timestamp<0)
+   {
+      for (i=0;i<jitter->frame_size;i++)
+         out[i]=0;
+      return;
+   }
+   /* Search the buffer for a packet with the right timestamp */
+      /*fprintf (stderr, "d");*/
+      jitter->pointer_timestamp += jitter->tick_size;
+      jitter->current_timestamp += jitter->tick_size;
+      /*fprintf (stderr, "dropping packet (getting more aggressive)\n");*/
+   }
+   /* Searching for the packet that fits best */
+   /* Search the buffer for a packet with the right timestamp and spanning the whole current chunk */
    for (i=0;i<SPEEX_JITTER_MAX_BUFFER_SIZE;i++)
+   {
       if (jitter->len[i]!=-1 && jitter->timestamp[i]==jitter->pointer_timestamp)
+      if (jitter->buf[i] && jitter->timestamp[i]==jitter->pointer_timestamp && GE32(jitter->timestamp[i]+jitter->span[i],jitter->pointer_timestamp+chunk_size))
          break;
+   }
+   /* If no match, try for an "older" packet that still spans (fully) the current chunk */
    if (i==SPEEX_JITTER_MAX_BUFFER_SIZE)
+   {
+      for (i=0;i<SPEEX_JITTER_MAX_BUFFER_SIZE;i++)
+      {
+         if (jitter->buf[i] && jitter->timestamp[i]<=jitter->pointer_timestamp && GE32(jitter->timestamp[i]+jitter->span[i],jitter->pointer_timestamp+chunk_size))
+            break;
+      }
+   }
+   /* If still no match, try for an "older" packet that spans part of the current chunk */
+   if (i==SPEEX_JITTER_MAX_BUFFER_SIZE)
+   {
+      for (i=0;i<SPEEX_JITTER_MAX_BUFFER_SIZE;i++)
+      {
+         if (jitter->buf[i] && jitter->timestamp[i]<=jitter->pointer_timestamp && GT32(jitter->timestamp[i]+jitter->span[i],jitter->pointer_timestamp))
+            break;
+      }
+   }
+   /* If still no match, try for earliest packet possible */
+   if (i==SPEEX_JITTER_MAX_BUFFER_SIZE)
+   {
+      int found = 0;
+      spx_uint32_t best_time=0;
+      int best_span=0;
+      int besti=0;
+      for (i=0;i<SPEEX_JITTER_MAX_BUFFER_SIZE;i++)
+      {
+         /* check if packet starts within current chunk */
+         if (jitter->buf[i] && LT32(jitter->timestamp[i],jitter->pointer_timestamp+chunk_size) && GE32(jitter->timestamp[i],jitter->pointer_timestamp))
+         {
+            if (!found || LT32(jitter->timestamp[i],best_time) || (jitter->timestamp[i]==best_time && GT32(jitter->span[i],best_span)))
+            {
+               best_time = jitter->timestamp[i];
+               best_span = jitter->span[i];
+               besti = i;
+               found = 1;
+            }
+         }
+      }
+      if (found)
+      {
+         i=besti;
+         incomplete = 1;
+         /*fprintf (stderr, "incomplete: %d %d %d %d\n", jitter->timestamp[i], jitter->pointer_timestamp, chunk_size, jitter->span[i]);*/
+      }
+   }
+   /* If we find something */
+   if (i!=SPEEX_JITTER_MAX_BUFFER_SIZE)
+   {
+      /* We (obviously) haven't lost this packet */
+      jitter->lost_count = 0;
+      jitter->loss_rate = .999*jitter->loss_rate;
+      /* Check for potential overflow */
+      packet->len = jitter->len[i];
+      /* Copy packet */
+      for (j=0;j<packet->len;j++)
+         packet->data[j] = jitter->buf[i][j];
+      /* Remove packet */
+      speex_free(jitter->buf[i]);
+      jitter->buf[i] = NULL;
+      /* Set timestamp and span (if requested) */
+      if (start_offset)
+         *start_offset = jitter->timestamp[i]-jitter->pointer_timestamp;
+      packet->timestamp = jitter->timestamp[i];
+      packet->span = jitter->span[i];
+      /* Point at the end of the current packet */
+      jitter->pointer_timestamp = jitter->timestamp[i]+jitter->span[i];
+      if (incomplete)
+         return JITTER_BUFFER_INCOMPLETE;
+      else
+         return JITTER_BUFFER_OK;
+   }
+   /* If we haven't found anything worth returning */
+   /*fprintf (stderr, "not found\n");*/
+   jitter->lost_count++;
+   /*fprintf (stderr, "m");*/
+   /*fprintf (stderr, "lost_count = %d\n", jitter->lost_count);*/
+   jitter->loss_rate = .999*jitter->loss_rate + .001;
+   if (start_offset)
+      *start_offset = 0;
+   packet->timestamp = jitter->pointer_timestamp;
+   packet->span = jitter->tick_size;
+   jitter->pointer_timestamp += chunk_size;
+   packet->len = 0;
+   return JITTER_BUFFER_MISSING;
+}
+/** Get pointer timestamp of jitter buffer */
+int jitter_buffer_get_pointer_timestamp(JitterBuffer *jitter)
+{
+   return jitter->pointer_timestamp;
+}
+void jitter_buffer_tick(JitterBuffer *jitter)
+{
+   jitter->current_timestamp += jitter->tick_size;
+}
+void speex_jitter_init(SpeexJitter *jitter, void *decoder, int sampling_rate)
+{
+   jitter->dec = decoder;
+   speex_decoder_ctl(decoder, SPEEX_GET_FRAME_SIZE, &jitter->frame_size);
+   jitter->packets = jitter_buffer_init(jitter->frame_size);
+   speex_bits_init(&jitter->current_packet);
+   jitter->valid_bits = 0;
+}
+void speex_jitter_destroy(SpeexJitter *jitter)
+{
+   jitter_buffer_destroy(jitter->packets);
+   speex_bits_destroy(&jitter->current_packet);
+}
+void speex_jitter_put(SpeexJitter *jitter, char *packet, int len, int timestamp)
+{
+   JitterBufferPacket p;
+   p.data = packet;
+   p.len = len;
+   p.timestamp = timestamp;
+   p.span = jitter->frame_size;
+   jitter_buffer_put(jitter->packets, &p);
+}
+void speex_jitter_get(SpeexJitter *jitter, short *out, int *current_timestamp)
+{
+   int i;
+   int ret;
+   char data[2048];
+   JitterBufferPacket packet;
+   packet.data = data;
+   if (jitter->valid_bits)
+   {
+      /* Try decoding last received packet */
+      ret = speex_decode_int(jitter->dec, &jitter->current_packet, out);
+      if (ret == 0)
+      {
+         jitter_buffer_tick(jitter->packets);
+         return;
+      } else {
+         jitter->valid_bits = 0;
+      }
+   }
+   ret = jitter_buffer_get(jitter->packets, &packet, NULL);
+   if (ret != JITTER_BUFFER_OK)
+   {
       /* No packet found */
+      if (jitter->valid_bits)
+      {
+         /* Try decoding last received packet */
+         ret = speex_decode_int(jitter->dec, &jitter->current_packet, (spx_int16_t*)out);
+         if (ret == 0)
+         {
+            jitter->lost_count = 0;
+            return;
+         } else {
+            jitter->valid_bits = 0;
+         }
+      }
+      /*fprintf (stderr, "lost/late frame %d\n", jitter->pointer_timestamp);*/
+      /*fprintf (stderr, "lost/late frame\n");*/
       /*Packet is late or lost*/
+      speex_decode_int(jitter->dec, NULL, (spx_int16_t*)out);
+      jitter->lost_count++;
+      if (jitter->lost_count>=25)
+      {
+         jitter->lost_count = 0;
+         jitter->reset_state = 1;
+         speex_decoder_ctl(jitter->dec, SPEEX_RESET_STATE, NULL);
+      }
+      jitter->loss_rate = .999*jitter->loss_rate + .001;
+      speex_decode_int(jitter->dec, NULL, out);
    } else {
+      jitter->lost_count = 0;
+      /* Found the right packet */
+      speex_bits_read_from(&jitter->current_packet, jitter->buf[i], jitter->len[i]);
+      jitter->len[i]=-1;
+      speex_bits_read_from(&jitter->current_packet, packet.data, packet.len);
       /* Decode packet */
       ret = speex_decode_int(jitter->dec, &jitter->current_packet, (spx_int16_t*)out);
+      ret = speex_decode_int(jitter->dec, &jitter->current_packet, out);
       if (ret == 0)
+      {
 …
             out[i]=0;
+      }
+      jitter->loss_rate = .999*jitter->loss_rate;
+   }
+   }
+   jitter_buffer_tick(jitter->packets);
+}
 int speex_jitter_get_pointer_timestamp(SpeexJitter *jitter)
+{
    return jitter->pointer_timestamp;
+}
+   return jitter_buffer_get_pointer_timestamp(jitter->packets);
+}

pjproject/trunk/pjmedia/src/pjmedia-codec/speex/kiss_fft.c

-                      r523
+                      r628
     do { \
         if ( nbuf < (size_t)(n) ) {\
             free(buf); \
+            speex_free(buf); \
             buf = (kiss_fft_cpx*)KISS_FFT_MALLOC(sizeof(kiss_fft_cpx)*(n)); \
             nbuf = (size_t)(n); \
 …
        int i;
        kiss_fft_cpx *x=Fout;
        for (i=0;i<(int)(4*m);i++)
+       for (i=0;i<4*m;i++)
+       {
           x[i].r = PSHR16(x[i].r,2);
 …
         CHECKBUF(tmpbuf,ntmpbuf,st->nfft);
         kf_work(tmpbuf,fin,1,in_stride, st->factors,st);
         memcpy(fout,tmpbuf,sizeof(kiss_fft_cpx)*st->nfft);
+        speex_move(fout,tmpbuf,sizeof(kiss_fft_cpx)*st->nfft);
     }else{
         kf_work( fout, fin, 1,in_stride, st->factors,st );
 …
 void kiss_fft_cleanup(void)
+{
     free(scratchbuf);
+    speex_free(scratchbuf);
     scratchbuf = NULL;
     nscratchbuf=0;
     free(tmpbuf);
+    speex_free(tmpbuf);
     tmpbuf=NULL;
     ntmpbuf=0;

pjproject/trunk/pjmedia/src/pjmedia-codec/speex/kiss_fft.h

-                      r529
+                      r628
 #include <stdlib.h>
-#include <stdio.h>
 #include <math.h>
+#include <memory.h>
+//Not available in gcc MacOS X (bennylp)
+//#include <malloc.h>
+#include "misc.h"
 #ifdef __cplusplus
 …
 #define KISS_FFT_MALLOC(nbytes) memalign(16,nbytes)
 #else
 #define KISS_FFT_MALLOC malloc
+#define KISS_FFT_MALLOC speex_alloc
 #endif
 #ifdef FIXED_POINT
 #include <sys/types.h>
 #  define kiss_fft_scalar int16_t
+#include "misc.h"
+#  define kiss_fft_scalar spx_int16_t
 #else
 # ifndef kiss_fft_scalar
 …
 /* If kiss_fft_alloc allocated a buffer, it is one contiguous
    buffer and can be simply free()d when no longer needed*/
 #define kiss_fft_free free
+#define kiss_fft_free speex_free
 /*

pjproject/trunk/pjmedia/src/pjmedia-codec/speex/kiss_fftr.c

-                      r516
+                      r628
     if (nfft & 1) {
         fprintf(stderr,"Real FFT optimization must be even.\n");
+        speex_warning("Real FFT optimization must be even.\n");
         return NULL;
+    }
 …
     if ( st->substate->inverse) {
         fprintf(stderr,"kiss fft usage error: improper alloc\n");
+        speex_warning("kiss fft usage error: improper alloc\n");
         exit(1);
+    }
 …
     if (st->substate->inverse == 0) {
         fprintf (stderr, "kiss fft usage error: improper alloc\n");
+        speex_warning ("kiss fft usage error: improper alloc\n");
         exit (1);
+    }

pjproject/trunk/pjmedia/src/pjmedia-codec/speex/kiss_fftr.h

r516	r628
39	39	*/
40	40
41		#define kiss_fftr_free free
	41	#define kiss_fftr_free speex_free
42	42
43	43	#ifdef __cplusplus

pjproject/trunk/pjmedia/src/pjmedia-codec/speex/lpc.c

-                      r516
+                      r628
          rr = SUB32(rr,MULT16_16(lpc[j],ac[i - j]));
 #ifdef FIXED_POINT
       r = DIV32_16(rr,ADD16(error,16));
+      r = DIV32_16(rr+PSHR32(error,1),ADD16(error,8));
 #else
       r = rr/(error+.003*ac[0]);
 …
+      {
          spx_word16_t tmp  = lpc[j];
          lpc[j]     = MAC16_16_Q13(lpc[j],r,lpc[i-1-j]);
          lpc[i-1-j] = MAC16_16_Q13(lpc[i-1-j],r,tmp);
+         lpc[j]     = MAC16_16_P13(lpc[j],r,lpc[i-1-j]);
+         lpc[i-1-j] = MAC16_16_P13(lpc[i-1-j],r,tmp);
+      }
       if (i & 1)
          lpc[j] = MAC16_16_Q13(lpc[j],lpc[j],r);
+         lpc[j] = MAC16_16_P13(lpc[j],lpc[j],r);
       error = SUB16(error,MULT16_16_Q13(r,MULT16_16_Q13(error,r)));

pjproject/trunk/pjmedia/src/pjmedia-codec/speex/lsp.c

-                      r278
+                      r628
 /*---------------------------------------------------------------------------*\
 Original copyright
+        FILE........: AKSLSPD.C
+        TYPE........: Turbo C
+        COMPANY.....: Voicetronix
+        FILE........: lsp.c
         AUTHOR......: David Rowe
         DATE CREATED: 24/2/93
 …
 */
+/*---------------------------------------------------------------------------*\
+  Introduction to Line Spectrum Pairs (LSPs)
+  ------------------------------------------
+  LSPs are used to encode the LPC filter coefficients {ak} for
+  transmission over the channel.  LSPs have several properties (like
+  less sensitivity to quantisation noise) that make them superior to
+  direct quantisation of {ak}.
+  A(z) is a polynomial of order lpcrdr with {ak} as the coefficients.
+  A(z) is transformed to P(z) and Q(z) (using a substitution and some
+  algebra), to obtain something like:
+    A(z) = 0.5[P(z)(z+z^-1) + Q(z)(z-z^-1)]  (1)
+  As you can imagine A(z) has complex zeros all over the z-plane. P(z)
+  and Q(z) have the very neat property of only having zeros _on_ the
+  unit circle.  So to find them we take a test point z=exp(jw) and
+  evaluate P (exp(jw)) and Q(exp(jw)) using a grid of points between 0
+  and pi.
+  The zeros (roots) of P(z) also happen to alternate, which is why we
+  swap coefficients as we find roots.  So the process of finding the
+  LSP frequencies is basically finding the roots of 5th order
+  polynomials.
+  The root so P(z) and Q(z) occur in symmetrical pairs at +/-w, hence
+  the name Line Spectrum Pairs (LSPs).
+  To convert back to ak we just evaluate (1), "clocking" an impulse
+  thru it lpcrdr times gives us the impulse response of A(z) which is
+  {ak}.
+\*---------------------------------------------------------------------------*/
 #ifdef HAVE_CONFIG_H
 #include "config.h"
 …
 #ifdef FIXED_POINT
 #define FREQ_SCALE 16384
 …
 /*#define X2ANGLE(x) (acos(.00006103515625*(x))*LSP_SCALING)*/
 #define X2ANGLE(x) (spx_acos(x))
+#ifdef BFIN_ASM
+#include "lsp_bfin.h"
+#endif
 #else
 …
 /*---------------------------------------------------------------------------*\
         FUNCTION....: cheb_poly_eva()
         AUTHOR......: David Rowe
         DATE CREATED: 24/2/93
     This function evaluates a series of Chebyshev polynomials
+   FUNCTION....: cheb_poly_eva()
+   AUTHOR......: David Rowe
+   DATE CREATED: 24/2/93
+   This function evaluates a series of Chebyshev polynomials
 \*---------------------------------------------------------------------------*/
 …
 #ifdef FIXED_POINT
+static inline spx_word32_t cheb_poly_eva(spx_word32_t *coef,spx_word16_t x,int m,char *stack)
+/*  float coef[]        coefficients of the polynomial to be evaluated  */
+/*  float x             the point where polynomial is to be evaluated   */
+/*  int m               order of the polynomial                         */
+#ifndef OVERRIDE_CHEB_POLY_EVA
+static inline spx_word32_t cheb_poly_eva(
+  spx_word16_t *coef, /* P or Q coefs in Q13 format               */
+  spx_word16_t     x, /* cos of freq (-1.0 to 1.0) in Q14 format  */
+  int              m, /* LPC order/2                              */
+  char         *stack
+)
+{
     int i;
     VARDECL(spx_word16_t *T);
+    spx_word16_t b0, b1;
     spx_word32_t sum;
-    int m2=m>>1;
-    VARDECL(spx_word16_t *coefn);
     /*Prevents overflows*/
 …
        x = -16383;
+    /* Allocate memory for Chebyshev series formulation */
+    ALLOC(T, m2+1, spx_word16_t);
+    ALLOC(coefn, m2+1, spx_word16_t);
+    for (i=0;i<m2+1;i++)
+    /* Initialise values */
+    b1=16384;
+    b0=x;
+    /* Evaluate Chebyshev series formulation usin g iterative approach  */
+    sum = ADD32(EXTEND32(coef[m]), EXTEND32(MULT16_16_P14(coef[m-1],x)));
+    for(i=2;i<=m;i++)
+    {
+       coefn[i] = coef[i];
+       /*printf ("%f ", coef[i]);*/
+    }
+    /*printf ("\n");*/
+    /* Initialise values */
+    T[0]=16384;
+    T[1]=x;
+    /* Evaluate Chebyshev series formulation using iterative approach  */
+    /* Evaluate polynomial and return value also free memory space */
+    sum = ADD32(EXTEND32(coefn[m2]), EXTEND32(MULT16_16_P14(coefn[m2-1],x)));
+    /*x *= 2;*/
+    for(i=2;i<=m2;i++)
+    {
+       T[i] = SUB16(MULT16_16_Q13(x,T[i-1]), T[i-2]);
+       sum = ADD32(sum, EXTEND32(MULT16_16_P14(coefn[m2-i],T[i])));
+       /*printf ("%f ", sum);*/
+    }
+    /*printf ("\n");*/
+    return sum;
+}
+#else
+static float cheb_poly_eva(spx_word32_t *coef,float x,int m,char *stack)
+/*  float coef[]        coefficients of the polynomial to be evaluated  */
+/*  float x             the point where polynomial is to be evaluated   */
+/*  int m               order of the polynomial                         */
+{
+    int i;
+    VARDECL(float *T);
+    float sum;
+    int m2=m>>1;
+    /* Allocate memory for Chebyshev series formulation */
+    ALLOC(T, m2+1, float);
+    /* Initialise values */
+    T[0]=1;
+    T[1]=x;
+    /* Evaluate Chebyshev series formulation using iterative approach  */
+    /* Evaluate polynomial and return value also free memory space */
+    sum = coef[m2] + coef[m2-1]*x;
+    x *= 2;
+    for(i=2;i<=m2;i++)
+    {
+       T[i] = x*T[i-1] - T[i-2];
+       sum += coef[m2-i] * T[i];
+       spx_word16_t tmp=b0;
+       b0 = SUB16(MULT16_16_Q13(x,b0), b1);
+       b1 = tmp;
+       sum = ADD32(sum, EXTEND32(MULT16_16_P14(coef[m-i],b0)));
+    }
 …
 #endif
+#else
+static float cheb_poly_eva(spx_word32_t *coef, spx_word16_t x, int m, char *stack)
+{
+   int k;
+   float b0, b1, tmp;
+   /* Initial conditions */
+   b0=0; /* b_(m+1) */
+   b1=0; /* b_(m+2) */
+   x*=2;
+   /* Calculate the b_(k) */
+   for(k=m;k>0;k--)
+   {
+      tmp=b0;                           /* tmp holds the previous value of b0 */
+      b0=x*b0-b1+coef[m-k];    /* b0 holds its new value based on b0 and b1 */
+      b1=tmp;                           /* b1 holds the previous value of b0 */
+   }
+   return(-b1+.5*x*b0+coef[m]);
+}
+#endif
 /*---------------------------------------------------------------------------*\
         FUNCTION....: lpc_to_lsp()
         AUTHOR......: David Rowe
         DATE CREATED: 24/2/93
+    FUNCTION....: lpc_to_lsp()
+    AUTHOR......: David Rowe
+    DATE CREATED: 24/2/93
     This function converts LPC coefficients to LSP
 …
     VARDECL(spx_word32_t *Q);                   /* ptrs for memory allocation           */
     VARDECL(spx_word32_t *P);
+    VARDECL(spx_word16_t *Q16);         /* ptrs for memory allocation           */
+    VARDECL(spx_word16_t *P16);
     spx_word32_t *px;                   /* ptrs of respective P'(z) & Q'(z)     */
     spx_word32_t *qx;
     spx_word32_t *p;
     spx_word32_t *q;
     spx_word32_t *pt;                   /* ptr used for cheb_poly_eval()
+    spx_word16_t *pt;                   /* ptr used for cheb_poly_eval()
                                 whether P' or Q'                        */
     int roots=0;                /* DR 8/2/94: number of roots found     */
 …
     qx = Q;
+    /* now that we have computed P and Q convert to 16 bits to
+       speed up cheb_poly_eval */
+    ALLOC(P16, m+1, spx_word16_t);
+    ALLOC(Q16, m+1, spx_word16_t);
+    for (i=0;i<m+1;i++)
+    {
+       P16[i] = P[i];
+       Q16[i] = Q[i];
+    }
     /* Search for a zero in P'(z) polynomial first and then alternate to Q'(z).
     Keep alternating between the two polynomials as each zero is found  */
 …
     xl = FREQ_SCALE;                    /* start at point xl = 1                */
     for(j=0;j<lpcrdr;j++){
         if(j&1)                 /* determines whether P' or Q' is eval. */
             pt = qx;
+            pt = Q16;
         else
             pt = px;
         psuml = cheb_poly_eva(pt,xl,lpcrdr,stack);      /* evals poly. at xl    */
+            pt = P16;
+        psuml = cheb_poly_eva(pt,xl,m,stack);   /* evals poly. at xl    */
         flag = 1;
         while(flag && (xr >= -FREQ_SCALE)){
 …
 #endif
            xr = SUB16(xl, dd);                          /* interval spacing     */
             psumr = cheb_poly_eva(pt,xr,lpcrdr,stack);/* poly(xl-delta_x)       */
+            psumr = cheb_poly_eva(pt,xr,m,stack);/* poly(xl-delta_x)    */
             temp_psumr = psumr;
             temp_xr = xr;
 …
                     xm = .5*(xl+xr);            /* bisect the interval  */
 #endif
                     psumm=cheb_poly_eva(pt,xm,lpcrdr,stack);
+                    psumm=cheb_poly_eva(pt,xm,m,stack);
                     /*if(psumm*psuml>0.)*/
                     if(!SIGN_CHANGE(psumm,psuml))
 …
+}
 /*---------------------------------------------------------------------------*\
 …
         DATE CREATED: 24/2/93
+    lsp_to_lpc: This function converts LSP coefficients to LPC
+    coefficients.
+        Converts LSP coefficients to LPC coefficients.
 \*---------------------------------------------------------------------------*/
 …
 /*  float *ak           array of LPC coefficients                       */
 /*  int lpcrdr          order of LPC coefficients                       */
+{
     int i,j;
+    spx_word32_t xout1,xout2,xin1,xin2;
+    VARDECL(spx_word32_t *Wp);
+    spx_word32_t *pw,*n1,*n2,*n3,*n4=NULL;
+    spx_word32_t xout1,xout2,xin;
+    spx_word32_t mult, a;
     VARDECL(spx_word16_t *freqn);
+    VARDECL(spx_word32_t **xp);
+    VARDECL(spx_word32_t *xpmem);
+    VARDECL(spx_word32_t **xq);
+    VARDECL(spx_word32_t *xqmem);
     int m = lpcrdr>>1;
+    /*
+       Reconstruct P(z) and Q(z) by cascading second order polynomials
+       in form 1 - 2cos(w)z(-1) + z(-2), where w is the LSP frequency.
+       In the time domain this is:
+       y(n) = x(n) - 2cos(w)x(n-1) + x(n-2)
+       This is what the ALLOCS below are trying to do:
+         int xp[m+1][lpcrdr+1+2]; // P matrix in QIMP
+         int xq[m+1][lpcrdr+1+2]; // Q matrix in QIMP
+       These matrices store the output of each stage on each row.  The
+       final (m-th) row has the output of the final (m-th) cascaded
+nd order filter.  The first row is the impulse input to the
+       system (not written as it is known).
+       The version below takes advantage of the fact that a lot of the
+       outputs are zero or known, for example if we put an inpulse
+       into the first section the "clock" it 10 times only the first 3
+       outputs samples are non-zero (it's an FIR filter).
+    */
+    ALLOC(xp, (m+1), spx_word32_t*);
+    ALLOC(xpmem, (m+1)*(lpcrdr+1+2), spx_word32_t);
+    ALLOC(xq, (m+1), spx_word32_t*);
+    ALLOC(xqmem, (m+1)*(lpcrdr+1+2), spx_word32_t);
+    for(i=0; i<=m; i++) {
+      xp[i] = xpmem + i*(lpcrdr+1+2);
+      xq[i] = xqmem + i*(lpcrdr+1+2);
+    }
+    /* work out 2cos terms in Q14 */
     ALLOC(freqn, lpcrdr, spx_word16_t);
     for (i=0;i<lpcrdr;i++)
+    for (i=0;i<lpcrdr;i++)
        freqn[i] = ANGLE2X(freq[i]);
+    ALLOC(Wp, 4*m+2, spx_word32_t);
+    pw = Wp;
+    /* initialise contents of array */
+    for(i=0;i<=4*m+1;i++){              /* set contents of buffer to 0 */
+        *pw++ = 0;
+    }
+    /* Set pointers up */
+    pw = Wp;
+    xin1 = 1048576;
+    xin2 = 1048576;
+    /* reconstruct P(z) and Q(z) by  cascading second order
+      polynomials in form 1 - 2xz(-1) +z(-2), where x is the
+      LSP coefficient */
+    for(j=0;j<=lpcrdr;j++){
+       spx_word16_t *fr=freqn;
+        for(i=0;i<m;i++){
+            n1 = pw+(i<<2);
+            n2 = n1 + 1;
+            n3 = n2 + 1;
+            n4 = n3 + 1;
+            xout1 = ADD32(SUB32(xin1, MULT16_32_Q14(*fr,*n1)), *n2);
+            fr++;
+            xout2 = ADD32(SUB32(xin2, MULT16_32_Q14(*fr,*n3)), *n4);
+            fr++;
+            *n2 = *n1;
+            *n4 = *n3;
+            *n1 = xin1;
+            *n3 = xin2;
+            xin1 = xout1;
+            xin2 = xout2;
+        }
+        xout1 = xin1 + *(n4+1);
+        xout2 = xin2 - *(n4+2);
+        /* FIXME: perhaps apply bandwidth expansion in case of overflow? */
+        if (j>0)
+        {
+        if (xout1 + xout2>SHL32(EXTEND32(32766),8))
+           ak[j-1] = 32767;
+        else if (xout1 + xout2 < -SHL32(EXTEND32(32766),8))
+           ak[j-1] = -32767;
+        else
+           ak[j-1] = EXTRACT16(PSHR32(ADD32(xout1,xout2),8));
+        } else {/*speex_warning_int("ak[0] = ", EXTRACT16(PSHR32(ADD32(xout1,xout2),8)));*/}
+        *(n4+1) = xin1;
+        *(n4+2) = xin2;
+        xin1 = 0;
+        xin2 = 0;
+    }
+}
+    #define QIMP  21   /* scaling for impulse */
+    xin = SHL32(EXTEND32(1), (QIMP-1)); /* 0.5 in QIMP format */
+    /* first col and last non-zero values of each row are trivial */
+    for(i=0;i<=m;i++) {
+     xp[i][1] = 0;
+     xp[i][2] = xin;
+     xp[i][2+2*i] = xin;
+     xq[i][1] = 0;
+     xq[i][2] = xin;
+     xq[i][2+2*i] = xin;
+    }
+    /* 2nd row (first output row) is trivial */
+    xp[1][3] = -MULT16_32_Q14(freqn[0],xp[0][2]);
+    xq[1][3] = -MULT16_32_Q14(freqn[1],xq[0][2]);
+    xout1 = xout2 = 0;
+    /* now generate remaining rows */
+    for(i=1;i<m;i++) {
+      for(j=1;j<2*(i+1)-1;j++) {
+        mult = MULT16_32_Q14(freqn[2*i],xp[i][j+1]);
+        xp[i+1][j+2] = ADD32(SUB32(xp[i][j+2], mult), xp[i][j]);
+        mult = MULT16_32_Q14(freqn[2*i+1],xq[i][j+1]);
+        xq[i+1][j+2] = ADD32(SUB32(xq[i][j+2], mult), xq[i][j]);
+      }
+      /* for last col xp[i][j+2] = xq[i][j+2] = 0 */
+      mult = MULT16_32_Q14(freqn[2*i],xp[i][j+1]);
+      xp[i+1][j+2] = SUB32(xp[i][j], mult);
+      mult = MULT16_32_Q14(freqn[2*i+1],xq[i][j+1]);
+      xq[i+1][j+2] = SUB32(xq[i][j], mult);
+    }
+    /* process last row to extra a{k} */
+    for(j=1;j<=lpcrdr;j++) {
+      int shift = QIMP-13;
+      /* final filter sections */
+      a = PSHR32(xp[m][j+2] + xout1 + xq[m][j+2] - xout2, shift);
+      xout1 = xp[m][j+2];
+      xout2 = xq[m][j+2];
+      /* hard limit ak's to +/- 32767 */
+      if (a < -32767) a = 32767;
+      if (a > 32767) a = 32767;
+      ak[j-1] = (short)a;
+    }
+}
 #else

pjproject/trunk/pjmedia/src/pjmedia-codec/speex/ltp.c

-                      r516
+                      r628
 #ifndef OVERRIDE_INNER_PROD
 static spx_word32_t inner_prod(const spx_word16_t *x, const spx_word16_t *y, int len)
+spx_word32_t inner_prod(const spx_word16_t *x, const spx_word16_t *y, int len)
+{
    spx_word32_t sum=0;
 …
 #ifndef OVERRIDE_PITCH_XCORR
 #if 0 /* HINT: Enable this for machines with enough registers (i.e. not x86) */
 static void pitch_xcorr(const spx_word16_t *_x, const spx_word16_t *_y, spx_word32_t *corr, int len, int nb_pitch, char *stack)
+void pitch_xcorr(const spx_word16_t *_x, const spx_word16_t *_y, spx_word32_t *corr, int len, int nb_pitch, char *stack)
+{
    int i,j;
 …
+}
 #else
 static void pitch_xcorr(const spx_word16_t *_x, const spx_word16_t *_y, spx_word32_t *corr, int len, int nb_pitch, char *stack)
+void pitch_xcorr(const spx_word16_t *_x, const spx_word16_t *_y, spx_word32_t *corr, int len, int nb_pitch, char *stack)
+{
    int i;
 …
 #ifndef OVERRIDE_COMPUTE_PITCH_ERROR
 static inline spx_word32_t compute_pitch_error(spx_word32_t *C, spx_word16_t *g, spx_word16_t pitch_control)
+static inline spx_word32_t compute_pitch_error(spx_word16_t *C, spx_word16_t *g, spx_word16_t pitch_control)
+{
    spx_word32_t sum = 0;
    sum = ADD32(sum,MULT16_32_Q15(MULT16_16_16(g[0],pitch_control),C[0]));
    sum = ADD32(sum,MULT16_32_Q15(MULT16_16_16(g[1],pitch_control),C[1]));
    sum = ADD32(sum,MULT16_32_Q15(MULT16_16_16(g[2],pitch_control),C[2]));
    sum = SUB32(sum,MULT16_32_Q15(MULT16_16_16(g[0],g[1]),C[3]));
    sum = SUB32(sum,MULT16_32_Q15(MULT16_16_16(g[2],g[1]),C[4]));
    sum = SUB32(sum,MULT16_32_Q15(MULT16_16_16(g[2],g[0]),C[5]));
    sum = SUB32(sum,MULT16_32_Q15(MULT16_16_16(g[0],g[0]),C[6]));
    sum = SUB32(sum,MULT16_32_Q15(MULT16_16_16(g[1],g[1]),C[7]));
    sum = SUB32(sum,MULT16_32_Q15(MULT16_16_16(g[2],g[2]),C[8]));
+   sum = ADD32(sum,MULT16_16(MULT16_16_16(g[0],pitch_control),C[0]));
+   sum = ADD32(sum,MULT16_16(MULT16_16_16(g[1],pitch_control),C[1]));
+   sum = ADD32(sum,MULT16_16(MULT16_16_16(g[2],pitch_control),C[2]));
+   sum = SUB32(sum,MULT16_16(MULT16_16_16(g[0],g[1]),C[3]));
+   sum = SUB32(sum,MULT16_16(MULT16_16_16(g[2],g[1]),C[4]));
+   sum = SUB32(sum,MULT16_16(MULT16_16_16(g[2],g[0]),C[5]));
+   sum = SUB32(sum,MULT16_16(MULT16_16_16(g[0],g[0]),C[6]));
+   sum = SUB32(sum,MULT16_16(MULT16_16_16(g[1],g[1]),C[7]));
+   sum = SUB32(sum,MULT16_16(MULT16_16_16(g[2],g[2]),C[8]));
    return sum;
+}
 #endif
+void open_loop_nbest_pitch(spx_sig_t *sw, int start, int end, int len, int *pitch, spx_word16_t *gain, int N, char *stack)
+#ifndef OVERRIDE_OPEN_LOOP_NBEST_PITCH
+void open_loop_nbest_pitch(spx_word16_t *sw, int start, int end, int len, int *pitch, spx_word16_t *gain, int N, char *stack)
+{
    int i,j,k;
    VARDECL(spx_word32_t *best_score);
+   VARDECL(spx_word32_t *best_ener);
    spx_word32_t e0;
    VARDECL(spx_word32_t *corr);
    VARDECL(spx_word32_t *energy);
-   VARDECL(spx_word32_t *score);
-   VARDECL(spx_word16_t *swn2);
-   spx_word16_t *swn;
    ALLOC(best_score, N, spx_word32_t);
+   ALLOC(best_ener, N, spx_word32_t);
    ALLOC(corr, end-start+1, spx_word32_t);
    ALLOC(energy, end-start+2, spx_word32_t);
-   ALLOC(score, end-start+1, spx_word32_t);
-#ifdef FIXED_POINT
-   ALLOC(swn2, end+len, spx_word16_t);
-   normalize16(sw-end, swn2, 16384, end+len);
-   swn = swn2 + end;
-#else
-   swn = sw;
-#endif
    for (i=0;i<N;i++)
+   {
         best_score[i]=-1;
+        best_ener[i]=0;
         pitch[i]=start;
+   }
+   energy[0]=inner_prod(swn-start, swn-start, len);
+   e0=inner_prod(swn, swn, len);
+   for (i=start;i<=end;i++)
+   energy[0]=inner_prod(sw-start, sw-start, len);
+   e0=inner_prod(sw, sw, len);
+   for (i=start;i<end;i++)
+   {
       /* Update energy for next pitch*/
       energy[i-start+1] = SUB32(ADD32(energy[i-start],SHR32(MULT16_16(swn[-i-1],swn[-i-1]),6)), SHR32(MULT16_16(swn[-i+len-1],swn[-i+len-1]),6));
+      energy[i-start+1] = SUB32(ADD32(energy[i-start],SHR32(MULT16_16(sw[-i-1],sw[-i-1]),6)), SHR32(MULT16_16(sw[-i+len-1],sw[-i+len-1]),6));
       if (energy[i-start+1] < 0)
          energy[i-start+1] = 0;
+   }
+   pitch_xcorr(swn, swn-end, corr, len, end-start+1, stack);
+   pitch_xcorr(sw, sw-end, corr, len, end-start+1, stack);
+   /* FIXME: Fixed-point and floating-point code should be merged */
 #ifdef FIXED_POINT
+   {
 …
       ALLOC(corr16, end-start+1, spx_word16_t);
       ALLOC(ener16, end-start+1, spx_word16_t);
+      normalize16(corr, corr16, 16384, end-start+1);
+      normalize16(energy, ener16, 16384, end-start+1);
+      /* Normalize to 180 so we can square it and it still fits in 16 bits */
+      normalize16(corr, corr16, 180, end-start+1);
+      normalize16(energy, ener16, 180, end-start+1);
       for (i=start;i<=end;i++)
+      {
+         spx_word16_t g;
+         spx_word32_t tmp;
+         tmp = corr16[i-start];
+         if (tmp>0)
+         spx_word16_t tmp = MULT16_16_16(corr16[i-start],corr16[i-start]);
+         /* Instead of dividing the tmp by the energy, we multiply on the other side */
+         if (MULT16_16(tmp,best_ener[N-1])>MULT16_16(best_score[N-1],ADD16(1,ener16[i-start])))
+         {
+            if (SHR16(corr16[i-start],4)>ener16[i-start])
+               tmp = SHL32(EXTEND32(ener16[i-start]),14);
+            else if (-SHR16(corr16[i-start],4)>ener16[i-start])
+               tmp = -SHL32(EXTEND32(ener16[i-start]),14);
+            else
+               tmp = SHL32(tmp,10);
+            g = DIV32_16(tmp, 8+ener16[i-start]);
+            score[i-start] = MULT16_16(corr16[i-start],g);
+         } else
+         {
+            score[i-start] = 1;
+            /* We can safely put it last and then check */
+            best_score[N-1]=tmp;
+            best_ener[N-1]=ener16[i-start]+1;
+            pitch[N-1]=i;
+            /* Check if it comes in front of others */
+            for (j=0;j<N-1;j++)
+            {
+               if (MULT16_16(tmp,best_ener[j])>MULT16_16(best_score[j],ADD16(1,ener16[i-start])))
+               {
+                  for (k=N-1;k>j;k--)
+                  {
+                     best_score[k]=best_score[k-1];
+                     best_ener[k]=best_ener[k-1];
+                     pitch[k]=pitch[k-1];
+                  }
+                  best_score[j]=tmp;
+                  best_ener[j]=ener16[i-start]+1;
+                  pitch[j]=i;
+                  break;
+               }
+            }
+         }
+      }
 …
    for (i=start;i<=end;i++)
+   {
+      float g = corr[i-start]/(1+energy[i-start]);
+      if (g>16)
+         g = 16;
+      else if (g<-16)
+         g = -16;
+      score[i-start] = g*corr[i-start];
+   }
+#endif
+   /* Extract best scores */
+   for (i=start;i<=end;i++)
+   {
+      if (score[i-start]>best_score[N-1])
+      float tmp = corr[i-start]*corr[i-start];
+      if (tmp*best_ener[N-1]>best_score[N-1]*(1+energy[i-start]))
+      {
          for (j=0;j<N;j++)
+         {
             if (score[i-start] > best_score[j])
+            if (tmp*best_ener[j]>best_score[j]*(1+energy[i-start]))
+            {
                for (k=N-1;k>j;k--)
+               {
                   best_score[k]=best_score[k-1];
+                  best_ener[k]=best_ener[k-1];
                   pitch[k]=pitch[k-1];
+               }
+               best_score[j]=score[i-start];
+               best_score[j]=tmp;
+               best_ener[j]=energy[i-start]+1;
                pitch[j]=i;
                break;
 …
+      }
+   }
+#endif
    /* Compute open-loop gain */
 …
+   }
+}
+#endif
+#ifndef OVERRIDE_PITCH_GAIN_SEARCH_3TAP_VQ
+static int pitch_gain_search_3tap_vq(
+  const signed char *gain_cdbk,
+  int                gain_cdbk_size,
+  spx_word16_t      *C16,
+  spx_word16_t       max_gain
+)
+{
+  const signed char *ptr=gain_cdbk;
+  int                best_cdbk=0;
+  spx_word32_t       best_sum=-VERY_LARGE32;
+  spx_word32_t       sum=0;
+  spx_word16_t       g[3];
+  spx_word16_t       pitch_control=64;
+  spx_word16_t       gain_sum;
+  int                i;
+  for (i=0;i<gain_cdbk_size;i++) {
+    ptr = gain_cdbk+4*i;
+    g[0]=ADD16((spx_word16_t)ptr[0],32);
+    g[1]=ADD16((spx_word16_t)ptr[1],32);
+    g[2]=ADD16((spx_word16_t)ptr[2],32);
+    gain_sum = (spx_word16_t)ptr[3];
+    sum = compute_pitch_error(C16, g, pitch_control);
+    if (sum>best_sum && gain_sum<=max_gain) {
+      best_sum=sum;
+      best_cdbk=i;
+    }
+  }
+  return best_cdbk;
+}
+#endif
 /** Finds the best quantized 3-tap pitch predictor by analysis by synthesis */
 static spx_word64_t pitch_gain_search_3tap(
 const spx_sig_t target[],       /* Target vector */
+static spx_word32_t pitch_gain_search_3tap(
+const spx_word16_t target[],       /* Target vector */
 const spx_coef_t ak[],          /* LPCs for this subframe */
 const spx_coef_t awk1[],        /* Weighted LPCs #1 for this subframe */
 const spx_coef_t awk2[],        /* Weighted LPCs #2 for this subframe */
 spx_sig_t exc[],                /* Excitation */
+const void *par,
+const signed char *gain_cdbk,
+int gain_cdbk_size,
 int   pitch,                    /* Pitch value */
 int   p,                        /* Number of LPC coeffs */
 …
 SpeexBits *bits,
 char *stack,
 const spx_sig_t *exc2,
+const spx_word16_t *exc2,
 const spx_word16_t *r,
 spx_sig_t *new_target,
+spx_word16_t *new_target,
 int  *cdbk_index,
 int cdbk_offset,
+int plc_tuning
+int plc_tuning,
+spx_word32_t cumul_gain
+)
+{
    int i,j;
+   VARDECL(spx_sig_t *tmp1);
+   VARDECL(spx_sig_t *tmp2);
+   spx_sig_t *x[3];
+   spx_sig_t *e[3];
+   VARDECL(spx_word16_t *tmp1);
+   VARDECL(spx_word16_t *e);
+   spx_word16_t *x[3];
    spx_word32_t corr[3];
    spx_word32_t A[3][3];
-   int   gain_cdbk_size;
-   const signed char *gain_cdbk;
    spx_word16_t gain[3];
+   spx_word64_t err;
+   const ltp_params *params;
+   params = (const ltp_params*) par;
+   gain_cdbk_size = 1<<params->gain_bits;
+   gain_cdbk = params->gain_cdbk + 3*gain_cdbk_size*cdbk_offset;
+   ALLOC(tmp1, 3*nsf, spx_sig_t);
+   ALLOC(tmp2, 3*nsf, spx_sig_t);
+   spx_word32_t err;
+   spx_word16_t max_gain=128;
+   int          best_cdbk=0;
+   ALLOC(tmp1, 3*nsf, spx_word16_t);
+   ALLOC(e, nsf, spx_word16_t);
+   if (cumul_gain > 262144)
+      max_gain = 31;
    x[0]=tmp1;
    x[1]=tmp1+nsf;
    x[2]=tmp1+2*nsf;
+   e[0]=tmp2;
+   e[1]=tmp2+nsf;
+   e[2]=tmp2+2*nsf;
+   for (i=2;i>=0;i--)
+   {
+      int pp=pitch+1-i;
+   {
+      VARDECL(spx_mem_t *mm);
+      int pp=pitch-1;
+      ALLOC(mm, p, spx_mem_t);
       for (j=0;j<nsf;j++)
+      {
          if (j-pp<0)
             e[i][j]=exc2[j-pp];
+            e[j]=exc2[j-pp];
          else if (j-pp-pitch<0)
             e[i][j]=exc2[j-pp-pitch];
+            e[j]=exc2[j-pp-pitch];
          else
             e[i][j]=0;
+            e[j]=0;
+      }
+      if (i==2)
+         syn_percep_zero(e[i], ak, awk1, awk2, x[i], nsf, p, stack);
+      else {
+         for (j=0;j<nsf-1;j++)
+            x[i][j+1]=x[i+1][j];
+         x[i][0]=0;
+         for (j=0;j<nsf;j++)
+         {
+            x[i][j]=ADD32(x[i][j],SHL32(MULT16_32_Q15(r[j], e[i][0]),1));
+         }
+      }
+   }
+#ifdef FIXED_POINT
+   {
+      /* If using fixed-point, we need to normalize the signals first */
+      spx_word16_t *y[3];
+      VARDECL(spx_word16_t *ytmp);
+      VARDECL(spx_word16_t *t);
+      spx_sig_t max_val=1;
+      int sig_shift;
+      ALLOC(ytmp, 3*nsf, spx_word16_t);
+#if 0
+      ALLOC(y[0], nsf, spx_word16_t);
+      ALLOC(y[1], nsf, spx_word16_t);
+      ALLOC(y[2], nsf, spx_word16_t);
+#else
+      y[0] = ytmp;
+      y[1] = ytmp+nsf;
+      y[2] = ytmp+2*nsf;
+#endif
+      ALLOC(t, nsf, spx_word16_t);
+      for (j=0;j<3;j++)
+      {
+         for (i=0;i<nsf;i++)
+         {
+            spx_sig_t tmp = x[j][i];
+            if (tmp<0)
+               tmp = -tmp;
+            if (tmp > max_val)
+               max_val = tmp;
+         }
+      }
+      for (i=0;i<nsf;i++)
+      {
+         spx_sig_t tmp = target[i];
+         if (tmp<0)
+            tmp = -tmp;
+         if (tmp > max_val)
+            max_val = tmp;
+      }
+      sig_shift=0;
+      while (max_val>16384)
+      {
+         sig_shift++;
+         max_val >>= 1;
+      }
+      for (j=0;j<3;j++)
+      {
+         for (i=0;i<nsf;i++)
+         {
+            y[j][i] = EXTRACT16(SHR32(x[j][i],sig_shift));
+         }
+      }
+      for (i=0;i<nsf;i++)
+      {
+         t[i] = EXTRACT16(SHR32(target[i],sig_shift));
+      }
+      for (i=0;i<3;i++)
+         corr[i]=inner_prod(y[i],t,nsf);
+      for (i=0;i<3;i++)
+         for (j=0;j<=i;j++)
+            A[i][j]=A[j][i]=inner_prod(y[i],y[j],nsf);
+   }
+#else
+   {
+      for (i=0;i<3;i++)
+         corr[i]=inner_prod(x[i],target,nsf);
+      for (i=0;i<3;i++)
+         for (j=0;j<=i;j++)
+            A[i][j]=A[j][i]=inner_prod(x[i],x[j],nsf);
+   }
+#endif
+      for (j=0;j<p;j++)
+         mm[j] = 0;
+      iir_mem16(e, ak, e, nsf, p, mm, stack);
+      for (j=0;j<p;j++)
+         mm[j] = 0;
+      filter_mem16(e, awk1, awk2, e, nsf, p, mm, stack);
+      for (j=0;j<nsf;j++)
+         x[2][j] = e[j];
+   }
+   for (i=1;i>=0;i--)
+   {
+      spx_word16_t e0=exc2[-pitch-1+i];
+      x[i][0]=MULT16_16_Q14(r[0], e0);
+      for (j=0;j<nsf-1;j++)
+         x[i][j+1]=ADD32(x[i+1][j],MULT16_16_P14(r[j+1], e0));
+   }
+   for (i=0;i<3;i++)
+      corr[i]=inner_prod(x[i],target,nsf);
+   for (i=0;i<3;i++)
+      for (j=0;j<=i;j++)
+         A[i][j]=A[j][i]=inner_prod(x[i],x[j],nsf);
+   {
       spx_word32_t C[9];
+      const signed char *ptr=gain_cdbk;
+      int best_cdbk=0;
+      spx_word32_t best_sum=0;
+#ifdef FIXED_POINT
+      spx_word16_t C16[9];
+#else
+      spx_word16_t *C16=C;
+#endif
       C[0]=corr[2];
       C[1]=corr[1];
 …
       if (plc_tuning<2)
          plc_tuning=2;
+#ifdef FIXED_POINT
+      C[0] = MAC16_32_Q15(C[0],MULT16_16_16(plc_tuning,-327),C[0]);
+      C[1] = MAC16_32_Q15(C[1],MULT16_16_16(plc_tuning,-327),C[1]);
+      C[2] = MAC16_32_Q15(C[2],MULT16_16_16(plc_tuning,-327),C[2]);
+      if (plc_tuning>30)
+         plc_tuning=30;
+#ifdef FIXED_POINT
       C[0] = SHL32(C[0],1);
       C[1] = SHL32(C[1],1);
 …
       C[4] = SHL32(C[4],1);
       C[5] = SHL32(C[5],1);
+#else
+      C[0]*=1-.01*plc_tuning;
+      C[1]*=1-.01*plc_tuning;
+      C[2]*=1-.01*plc_tuning;
+      C[6]*=.5*(1+.01*plc_tuning);
+      C[7]*=.5*(1+.01*plc_tuning);
+      C[8]*=.5*(1+.01*plc_tuning);
+#endif
+      for (i=0;i<gain_cdbk_size;i++)
+      {
+         spx_word32_t sum=0;
+         spx_word16_t g[3];
+         spx_word16_t pitch_control=64;
+         spx_word16_t gain_sum;
+         ptr = gain_cdbk+3*i;
+         g[0]=ADD16((spx_word16_t)ptr[0],32);
+         g[1]=ADD16((spx_word16_t)ptr[1],32);
+         g[2]=ADD16((spx_word16_t)ptr[2],32);
+         /* We favor "safe" pitch values to handle packet loss better */
+         gain_sum = ADD16(ADD16(g[1],MAX16(g[0], 0)),MAX16(g[2], 0));
+         if (gain_sum > 64)
+         {
+            gain_sum = SUB16(gain_sum, 64);
+            if (gain_sum > 127)
+               gain_sum = 127;
+#ifdef FIXED_POINT
+            pitch_control =  SUB16(64,EXTRACT16(PSHR32(MULT16_16(64,MULT16_16_16(plc_tuning, gain_sum)),10)));
+#else
+            pitch_control = 64*(1.-.001*plc_tuning*gain_sum);
+#endif
+            if (pitch_control < 0)
+               pitch_control = 0;
+         }
+         sum = compute_pitch_error(C, g, pitch_control);
+         if (sum>best_sum || i==0)
+         {
+            best_sum=sum;
+            best_cdbk=i;
+         }
+      }
+#ifdef FIXED_POINT
+      gain[0] = ADD16(32,(spx_word16_t)gain_cdbk[best_cdbk*3]);
+      gain[1] = ADD16(32,(spx_word16_t)gain_cdbk[best_cdbk*3+1]);
+      gain[2] = ADD16(32,(spx_word16_t)gain_cdbk[best_cdbk*3+2]);
+      C[6] = MAC16_32_Q15(C[6],MULT16_16_16(plc_tuning,655),C[6]);
+      C[7] = MAC16_32_Q15(C[7],MULT16_16_16(plc_tuning,655),C[7]);
+      C[8] = MAC16_32_Q15(C[8],MULT16_16_16(plc_tuning,655),C[8]);
+      normalize16(C, C16, 32767, 9);
+#else
+      C[6]*=.5*(1+.02*plc_tuning);
+      C[7]*=.5*(1+.02*plc_tuning);
+      C[8]*=.5*(1+.02*plc_tuning);
+#endif
+      best_cdbk = pitch_gain_search_3tap_vq(gain_cdbk, gain_cdbk_size, C16, max_gain);
+#ifdef FIXED_POINT
+      gain[0] = ADD16(32,(spx_word16_t)gain_cdbk[best_cdbk*4]);
+      gain[1] = ADD16(32,(spx_word16_t)gain_cdbk[best_cdbk*4+1]);
+      gain[2] = ADD16(32,(spx_word16_t)gain_cdbk[best_cdbk*4+2]);
       /*printf ("%d %d %d %d\n",gain[0],gain[1],gain[2], best_cdbk);*/
 #else
       gain[0] = 0.015625*gain_cdbk[best_cdbk*3]  + .5;
       gain[1] = 0.015625*gain_cdbk[best_cdbk*3+1]+ .5;
       gain[2] = 0.015625*gain_cdbk[best_cdbk*3+2]+ .5;
+      gain[0] = 0.015625*gain_cdbk[best_cdbk*4]  + .5;
+      gain[1] = 0.015625*gain_cdbk[best_cdbk*4+1]+ .5;
+      gain[2] = 0.015625*gain_cdbk[best_cdbk*4+2]+ .5;
 #endif
       *cdbk_index=best_cdbk;
+   }
-#ifdef FIXED_POINT
    for (i=0;i<nsf;i++)
+     exc[i]=SHL32(ADD32(ADD32(MULT16_32_Q15(SHL16(gain[0],7),e[2][i]), MULT16_32_Q15(SHL16(gain[1],7),e[1][i])),
+                        MULT16_32_Q15(SHL16(gain[2],7),e[0][i])), 2);
+   err=0;
+      exc[i]=0;
+   for (i=0;i<3;i++)
+   {
+      int j;
+      int tmp1, tmp3;
+      int pp=pitch+1-i;
+      tmp1=nsf;
+      if (tmp1>pp)
+         tmp1=pp;
+      for (j=0;j<tmp1;j++)
+         exc[j]=MAC16_16(exc[j],SHL16(gain[2-i],7),exc2[j-pp]);
+      tmp3=nsf;
+      if (tmp3>pp+pitch)
+         tmp3=pp+pitch;
+      for (j=tmp1;j<tmp3;j++)
+         exc[j]=MAC16_16(exc[j],SHL16(gain[2-i],7),exc2[j-pp-pitch]);
+   }
    for (i=0;i<nsf;i++)
+   {
+      spx_word16_t perr2;
+      spx_sig_t tmp = SHL32(ADD32(ADD32(MULT16_32_Q15(SHL16(gain[0],7),x[2][i]),MULT16_32_Q15(SHL16(gain[1],7),x[1][i])),
+                                  MULT16_32_Q15(SHL16(gain[2],7),x[0][i])),2);
+      spx_sig_t perr=SUB32(target[i],tmp);
+      new_target[i] = SUB32(target[i], tmp);
+      perr2 = EXTRACT16(PSHR32(perr,15));
+      err = ADD64(err,MULT16_16(perr2,perr2));
+   }
+#else
+   for (i=0;i<nsf;i++)
+      exc[i]=gain[0]*e[2][i]+gain[1]*e[1][i]+gain[2]*e[0][i];
+   err=0;
+   for (i=0;i<nsf;i++)
+   {
+      spx_sig_t tmp = gain[2]*x[0][i]+gain[1]*x[1][i]+gain[0]*x[2][i];
+      new_target[i] = target[i] - tmp;
+      err+=new_target[i]*new_target[i];
+   }
+#endif
+      spx_word32_t tmp = ADD32(ADD32(MULT16_16(gain[0],x[2][i]),MULT16_16(gain[1],x[1][i])),
+                            MULT16_16(gain[2],x[0][i]));
+      new_target[i] = SUB16(target[i], EXTRACT16(PSHR32(tmp,6)));
+   }
+   err = inner_prod(new_target, new_target, nsf);
    return err;
+}
 /** Finds the best quantized 3-tap pitch predictor by analysis by synthesis */
 int pitch_search_3tap(
 spx_sig_t target[],                 /* Target vector */
 spx_sig_t *sw,
+spx_word16_t target[],                 /* Target vector */
+spx_word16_t *sw,
 spx_coef_t ak[],                     /* LPCs for this subframe */
 spx_coef_t awk1[],                   /* Weighted LPCs #1 for this subframe */
 …
 SpeexBits *bits,
 char *stack,
 spx_sig_t *exc2,
+spx_word16_t *exc2,
 spx_word16_t *r,
 int complexity,
 int cdbk_offset,
+int plc_tuning
+int plc_tuning,
+spx_word32_t *cumul_gain
+)
+{
 …
    int cdbk_index, pitch=0, best_gain_index=0;
    VARDECL(spx_sig_t *best_exc);
    VARDECL(spx_sig_t *new_target);
    VARDECL(spx_sig_t *best_target);
+   VARDECL(spx_word16_t *new_target);
+   VARDECL(spx_word16_t *best_target);
    int best_pitch=0;
    spx_word64_t err, best_err=-1;
+   spx_word32_t err, best_err=-1;
    int N;
    const ltp_params *params;
+   const signed char *gain_cdbk;
+   int   gain_cdbk_size;
    VARDECL(int *nbest);
+   params = (const ltp_params*) par;
+   gain_cdbk_size = 1<<params->gain_bits;
+   gain_cdbk = params->gain_cdbk + 4*gain_cdbk_size*cdbk_offset;
    N=complexity;
    if (N>10)
 …
       return start;
+   }
-   ALLOC(best_exc, nsf, spx_sig_t);
-   ALLOC(new_target, nsf, spx_sig_t);
-   ALLOC(best_target, nsf, spx_sig_t);
    if (N>end-start+1)
 …
    else
       nbest[0] = start;
+   ALLOC(best_exc, nsf, spx_sig_t);
+   ALLOC(new_target, nsf, spx_word16_t);
+   ALLOC(best_target, nsf, spx_word16_t);
    for (i=0;i<N;i++)
+   {
 …
       for (j=0;j<nsf;j++)
          exc[j]=0;
       err=pitch_gain_search_3tap(target, ak, awk1, awk2, exc, par, pitch, p, nsf,
                                  bits, stack, exc2, r, new_target, &cdbk_index, cdbk_offset, plc_tuning);
+      err=pitch_gain_search_3tap(target, ak, awk1, awk2, exc, gain_cdbk, gain_cdbk_size, pitch, p, nsf,
+                                 bits, stack, exc2, r, new_target, &cdbk_index, plc_tuning, *cumul_gain);
       if (err<best_err || best_err<0)
+      {
 …
+      }
+   }
    /*printf ("pitch: %d %d\n", best_pitch, best_gain_index);*/
    speex_bits_pack(bits, best_pitch-start, params->pitch_bits);
    speex_bits_pack(bits, best_gain_index, params->gain_bits);
+#ifdef FIXED_POINT
+   *cumul_gain = MULT16_32_Q13(SHL16(params->gain_cdbk[4*best_gain_index+3],8), MAX32(1024,*cumul_gain));
+#else
+   *cumul_gain = 0.03125*MAX32(1024,*cumul_gain)*params->gain_cdbk[4*best_gain_index+3];
+#endif
+   /*printf ("%f\n", cumul_gain);*/
    /*printf ("encode pitch: %d %d\n", best_pitch, best_gain_index);*/
    for (i=0;i<nsf;i++)
 …
 void pitch_unquant_3tap(
+spx_sig_t exc[],                    /* Excitation */
+spx_word16_t exc[],             /* Input excitation */
+spx_word32_t exc_out[],         /* Output excitation */
 int   start,                    /* Smallest pitch value allowed */
 int   end,                      /* Largest pitch value allowed */
 spx_word16_t pitch_coef,               /* Voicing (pitch) coefficient */
+spx_word16_t pitch_coef,        /* Voicing (pitch) coefficient */
 const void *par,
 int   nsf,                      /* Number of samples in subframe */
 …
    params = (const ltp_params*) par;
    gain_cdbk_size = 1<<params->gain_bits;
    gain_cdbk = params->gain_cdbk + 3*gain_cdbk_size*cdbk_offset;
+   gain_cdbk = params->gain_cdbk + 4*gain_cdbk_size*cdbk_offset;
    pitch = speex_bits_unpack_unsigned(bits, params->pitch_bits);
 …
    /*printf ("decode pitch: %d %d\n", pitch, gain_index);*/
 #ifdef FIXED_POINT
    gain[0] = ADD16(32,(spx_word16_t)gain_cdbk[gain_index*3]);
    gain[1] = ADD16(32,(spx_word16_t)gain_cdbk[gain_index*3+1]);
    gain[2] = ADD16(32,(spx_word16_t)gain_cdbk[gain_index*3+2]);
 #else
    gain[0] = 0.015625*gain_cdbk[gain_index*3]+.5;
    gain[1] = 0.015625*gain_cdbk[gain_index*3+1]+.5;
    gain[2] = 0.015625*gain_cdbk[gain_index*3+2]+.5;
+   gain[0] = ADD16(32,(spx_word16_t)gain_cdbk[gain_index*4]);
+   gain[1] = ADD16(32,(spx_word16_t)gain_cdbk[gain_index*4+1]);
+   gain[2] = ADD16(32,(spx_word16_t)gain_cdbk[gain_index*4+2]);
+#else
+   gain[0] = 0.015625*gain_cdbk[gain_index*4]+.5;
+   gain[1] = 0.015625*gain_cdbk[gain_index*4+1]+.5;
+   gain[2] = 0.015625*gain_cdbk[gain_index*4+2]+.5;
 #endif
 …
    gain_val[1]=gain[1];
    gain_val[2]=gain[2];
+   {
+      spx_sig_t *e[3];
+      VARDECL(spx_sig_t *tmp2);
+      ALLOC(tmp2, 3*nsf, spx_sig_t);
+      e[0]=tmp2;
+      e[1]=tmp2+nsf;
+      e[2]=tmp2+2*nsf;
+      for (i=0;i<3;i++)
+      {
+         int j;
+         int pp=pitch+1-i;
+#if 0
+         for (j=0;j<nsf;j++)
+         {
+            if (j-pp<0)
+               e[i][j]=exc[j-pp];
+            else if (j-pp-pitch<0)
+               e[i][j]=exc[j-pp-pitch];
+            else
+               e[i][j]=0;
+         }
+#else
+         {
+            int tmp1, tmp3;
+            tmp1=nsf;
+            if (tmp1>pp)
+               tmp1=pp;
+            for (j=0;j<tmp1;j++)
+               e[i][j]=exc[j-pp];
+            tmp3=nsf;
+            if (tmp3>pp+pitch)
+               tmp3=pp+pitch;
+            for (j=tmp1;j<tmp3;j++)
+               e[i][j]=exc[j-pp-pitch];
+            for (j=tmp3;j<nsf;j++)
+               e[i][j]=0;
+         }
+#endif
+      }
+#ifdef FIXED_POINT
+      {
+         for (i=0;i<nsf;i++)
+            exc[i]=SHL32(ADD32(ADD32(MULT16_32_Q15(SHL16(gain[0],7),e[2][i]), MULT16_32_Q15(SHL16(gain[1],7),e[1][i])),
+                               MULT16_32_Q15(SHL16(gain[2],7),e[0][i])), 2);
+      }
+#else
+      for (i=0;i<nsf;i++)
+         exc[i]=VERY_SMALL+gain[0]*e[2][i]+gain[1]*e[1][i]+gain[2]*e[0][i];
+#endif
+   }
+   gain[0] = SHL16(gain[0],7);
+   gain[1] = SHL16(gain[1],7);
+   gain[2] = SHL16(gain[2],7);
+   for (i=0;i<nsf;i++)
+      exc_out[i]=0;
+   for (i=0;i<3;i++)
+   {
+      int j;
+      int tmp1, tmp3;
+      int pp=pitch+1-i;
+      tmp1=nsf;
+      if (tmp1>pp)
+         tmp1=pp;
+      for (j=0;j<tmp1;j++)
+         exc_out[j]=MAC16_16(exc_out[j],gain[2-i],exc[j-pp]);
+      tmp3=nsf;
+      if (tmp3>pp+pitch)
+         tmp3=pp+pitch;
+      for (j=tmp1;j<tmp3;j++)
+         exc_out[j]=MAC16_16(exc_out[j],gain[2-i],exc[j-pp-pitch]);
+   }
+   /*for (i=0;i<nsf;i++)
+   exc[i]=PSHR32(exc32[i],13);*/
+}
 …
 /** Forced pitch delay and gain */
 int forced_pitch_quant(
 spx_sig_t target[],                 /* Target vector */
 spx_sig_t *sw,
+spx_word16_t target[],                 /* Target vector */
+spx_word16_t *sw,
 spx_coef_t ak[],                     /* LPCs for this subframe */
 spx_coef_t awk1[],                   /* Weighted LPCs #1 for this subframe */
 …
 SpeexBits *bits,
 char *stack,
 spx_sig_t *exc2,
+spx_word16_t *exc2,
 spx_word16_t *r,
 int complexity,
 int cdbk_offset,
+int plc_tuning
+int plc_tuning,
+spx_word32_t *cumul_gain
+)
+{
    int i;
+   float coef = GAIN_SCALING_1*pitch_coef;
+   if (coef>.99)
+      coef=.99;
+   VARDECL(spx_sig_t *res);
+   ALLOC(res, nsf, spx_sig_t);
+#ifdef FIXED_POINT
+   if (pitch_coef>63)
+      pitch_coef=63;
+#else
+   if (pitch_coef>.99)
+      pitch_coef=.99;
+#endif
+   for (i=0;i<nsf&&i<start;i++)
+   {
+      exc[i]=MULT16_16(SHL16(pitch_coef, 7),exc2[i-start]);
+   }
+   for (;i<nsf;i++)
+   {
+      exc[i]=MULT16_32_Q15(SHL16(pitch_coef, 9),exc[i-start]);
+   }
+   syn_percep_zero(exc, ak, awk1, awk2, res, nsf, p, stack);
    for (i=0;i<nsf;i++)
+   {
+      exc[i]=exc[i-start]*coef;
+   }
+      target[i]=EXTRACT16(SATURATE(SUB32(EXTEND32(target[i]),PSHR32(res[i],SIG_SHIFT-1)),32700));
    return start;
+}
 …
 /** Unquantize forced pitch delay and gain */
 void forced_pitch_unquant(
+spx_sig_t exc[],                    /* Excitation */
+spx_word16_t exc[],             /* Input excitation */
+spx_word32_t exc_out[],         /* Output excitation */
 int   start,                    /* Smallest pitch value allowed */
 int   end,                      /* Largest pitch value allowed */
 spx_word16_t pitch_coef,               /* Voicing (pitch) coefficient */
+spx_word16_t pitch_coef,        /* Voicing (pitch) coefficient */
 const void *par,
 int   nsf,                      /* Number of samples in subframe */
 …
+{
    int i;
+   float coef = GAIN_SCALING_1*pitch_coef;
+   if (coef>.99)
+      coef=.99;
+#ifdef FIXED_POINT
+   if (pitch_coef>63)
+      pitch_coef=63;
+#else
+   if (pitch_coef>.99)
+      pitch_coef=.99;
+#endif
    for (i=0;i<nsf;i++)
+   {
+      exc[i]=exc[i-start]*coef;
+      exc_out[i]=MULT16_16(exc[i-start],SHL16(pitch_coef,7));
+      exc[i] = PSHR(exc_out[i],13);
+   }
    *pitch_val = start;

pjproject/trunk/pjmedia/src/pjmedia-codec/speex/ltp.h

-                      r278
+                      r628
 #endif
+void open_loop_nbest_pitch(spx_sig_t *sw, int start, int end, int len, int *pitch, spx_word16_t *gain, int N, char *stack);
+spx_word32_t inner_prod(const spx_word16_t *x, const spx_word16_t *y, int len);
+void pitch_xcorr(const spx_word16_t *_x, const spx_word16_t *_y, spx_word32_t *corr, int len, int nb_pitch, char *stack);
+void open_loop_nbest_pitch(spx_word16_t *sw, int start, int end, int len, int *pitch, spx_word16_t *gain, int N, char *stack);
 /** Finds the best quantized 3-tap pitch predictor by analysis by synthesis */
 int pitch_search_3tap(
 spx_sig_t target[],                 /* Target vector */
 spx_sig_t *sw,
+spx_word16_t target[],                 /* Target vector */
+spx_word16_t *sw,
 spx_coef_t ak[],                     /* LPCs for this subframe */
 spx_coef_t awk1[],                   /* Weighted LPCs #1 for this subframe */
 …
 SpeexBits *bits,
 char *stack,
 spx_sig_t *exc2,
+spx_word16_t *exc2,
 spx_word16_t *r,
 int   complexity,
 int   cdbk_offset,
+int plc_tuning
+int plc_tuning,
+spx_word32_t *cumul_gain
 );
 /*Unquantize adaptive codebook and update pitch contribution*/
 void pitch_unquant_3tap(
+spx_sig_t exc[],                    /* Excitation */
+spx_word16_t exc[],             /* Input excitation */
+spx_word32_t exc_out[],         /* Output excitation */
 int   start,                    /* Smallest pitch value allowed */
 int   end,                      /* Largest pitch value allowed */
 spx_word16_t pitch_coef,               /* Voicing (pitch) coefficient */
+spx_word16_t pitch_coef,        /* Voicing (pitch) coefficient */
 const void *par,
 int   nsf,                      /* Number of samples in subframe */
 …
 /** Forced pitch delay and gain */
 int forced_pitch_quant(
 spx_sig_t target[],                 /* Target vector */
 spx_sig_t *sw,
+spx_word16_t target[],                 /* Target vector */
+spx_word16_t *sw,
 spx_coef_t ak[],                     /* LPCs for this subframe */
 spx_coef_t awk1[],                   /* Weighted LPCs #1 for this subframe */
 …
 SpeexBits *bits,
 char *stack,
 spx_sig_t *exc2,
+spx_word16_t *exc2,
 spx_word16_t *r,
 int complexity,
 int cdbk_offset,
+int plc_tuning
+int plc_tuning,
+spx_word32_t *cumul_gain
 );
 /** Unquantize forced pitch delay and gain */
 void forced_pitch_unquant(
+spx_sig_t exc[],                    /* Excitation */
+spx_word16_t exc[],             /* Input excitation */
+spx_word32_t exc_out[],         /* Output excitation */
 int   start,                    /* Smallest pitch value allowed */
 int   end,                      /* Largest pitch value allowed */
 spx_word16_t pitch_coef,               /* Voicing (pitch) coefficient */
+spx_word16_t pitch_coef,        /* Voicing (pitch) coefficient */
 const void *par,
 int   nsf,                      /* Number of samples in subframe */

pjproject/trunk/pjmedia/src/pjmedia-codec/speex/ltp_arm4.h

-                      r278
+                      r628
 #define OVERRIDE_INNER_PROD
 static spx_word32_t inner_prod(const spx_word16_t *x, const spx_word16_t *y, int len)
+spx_word32_t inner_prod(const spx_word16_t *x, const spx_word16_t *y, int len)
+{
    spx_word32_t sum1=0,sum2=0;
 …
 #define OVERRIDE_PITCH_XCORR
 static void pitch_xcorr(const spx_word16_t *_x, const spx_word16_t *_y, spx_word32_t *corr, int len, int nb_pitch, char *stack)
+void pitch_xcorr(const spx_word16_t *_x, const spx_word16_t *_y, spx_word32_t *corr, int len, int nb_pitch, char *stack)
+{
    int i,j;

pjproject/trunk/pjmedia/src/pjmedia-codec/speex/ltp_bfin.h

-                      r278
+                      r628
 #define OVERRIDE_INNER_PROD
 static spx_word32_t inner_prod(const spx_word16_t *x, const spx_word16_t *y, int len)
+spx_word32_t inner_prod(const spx_word16_t *x, const spx_word16_t *y, int len)
+{
    spx_word32_t sum=0;
 …
 #define OVERRIDE_PITCH_XCORR
 static void pitch_xcorr(const spx_word16_t *_x, const spx_word16_t *_y, spx_word32_t *corr, int len, int nb_pitch, char *stack)
+void pitch_xcorr(const spx_word16_t *_x, const spx_word16_t *_y, spx_word32_t *corr, int len, int nb_pitch, char *stack)
+{
    corr += nb_pitch - 1;
 …
 #define OVERRIDE_COMPUTE_PITCH_ERROR
 static inline spx_word32_t compute_pitch_error(spx_word32_t *C, spx_word16_t *g, spx_word16_t pitch_control)
+static inline spx_word32_t compute_pitch_error(spx_word16_t *C, spx_word16_t *g, spx_word16_t pitch_control)
+{
    spx_word32_t sum;
    __asm__ __volatile__
+         (
          "A1 = A0 = 0;\n\t"
          "R0 = [%1++];\n\t"
+         "A0 = 0;\n\t"
+         "R0 = W[%1++];\n\t"
          "R1.L = %2.L*%5.L (IS);\n\t"
+         "R0 <<= 1;\n\t"
+         "A1 += R1.L*R0.L (M), A0 += R1.L*R0.H (IS) || R0 = [%1++];\n\t"
+         "A0 += R1.L*R0.L (IS) || R0 = W[%1++];\n\t"
          "R1.L = %3.L*%5.L (IS);\n\t"
+         "R0 <<= 1;\n\t"
+         "A1 += R1.L*R0.L (M), A0 += R1.L*R0.H (IS) || R0 = [%1++];\n\t"
+         "A0 += R1.L*R0.L (IS) || R0 = W[%1++];\n\t"
          "R1.L = %4.L*%5.L (IS);\n\t"
+         "R0 <<= 1;\n\t"
+         "A1 += R1.L*R0.L (M), A0 += R1.L*R0.H (IS) || R0 = [%1++];\n\t"
+         "A0 += R1.L*R0.L (IS) || R0 = W[%1++];\n\t"
          "R1.L = %2.L*%3.L (IS);\n\t"
+         "R0 <<= 1;\n\t"
+         "A1 -= R1.L*R0.L (M), A0 -= R1.L*R0.H (IS) || R0 = [%1++];\n\t"
+         "A0 -= R1.L*R0.L (IS) || R0 = W[%1++];\n\t"
          "R1.L = %4.L*%3.L (IS);\n\t"
+         "R0 <<= 1;\n\t"
+         "A1 -= R1.L*R0.L (M), A0 -= R1.L*R0.H (IS) || R0 = [%1++];\n\t"
+         "A0 -= R1.L*R0.L (IS) || R0 = W[%1++];\n\t"
          "R1.L = %4.L*%2.L (IS);\n\t"
+         "R0 <<= 1;\n\t"
+         "A1 -= R1.L*R0.L (M), A0 -= R1.L*R0.H (IS) || R0 = [%1++];\n\t"
+         "A0 -= R1.L*R0.L (IS) || R0 = W[%1++];\n\t"
          "R1.L = %2.L*%2.L (IS);\n\t"
+         "R0 <<= 1;\n\t"
+         "A1 -= R1.L*R0.L (M), A0 -= R1.L*R0.H (IS) || R0 = [%1++];\n\t"
+         "A0 -= R1.L*R0.L (IS) || R0 = W[%1++];\n\t"
          "R1.L = %3.L*%3.L (IS);\n\t"
+         "R0 <<= 1;\n\t"
+         "A1 -= R1.L*R0.L (M), A0 -= R1.L*R0.H (IS) || R0 = [%1++];\n\t"
+         "A0 -= R1.L*R0.L (IS) || R0 = W[%1++];\n\t"
          "R1.L = %4.L*%4.L (IS);\n\t"
+         "R0 <<= 1;\n\t"
+         "A1 -= R1.L*R0.L (M), A0 -= R1.L*R0.H (IS);\n\t"
+         "A1 = A1 >>> 16;\n\t"
+         "A0 += A1;\n\t"
+         "A0 -= R1.L*R0.L (IS);\n\t"
          "%0 = A0;\n\t"
    : "=&D" (sum), "=a" (C)
 …
+}
+#define OVERRIDE_OPEN_LOOP_NBEST_PITCH
+#ifdef OVERRIDE_OPEN_LOOP_NBEST_PITCH
+void open_loop_nbest_pitch(spx_word16_t *sw, int start, int end, int len, int *pitch, spx_word16_t *gain, int N, char *stack)
+{
+   int i,j,k;
+   VARDECL(spx_word32_t *best_score);
+   VARDECL(spx_word32_t *best_ener);
+   spx_word32_t e0;
+   VARDECL(spx_word32_t *corr);
+   VARDECL(spx_word32_t *energy);
+   ALLOC(best_score, N, spx_word32_t);
+   ALLOC(best_ener, N, spx_word32_t);
+   ALLOC(corr, end-start+1, spx_word32_t);
+   ALLOC(energy, end-start+2, spx_word32_t);
+   for (i=0;i<N;i++)
+   {
+        best_score[i]=-1;
+        best_ener[i]=0;
+        pitch[i]=start;
+   }
+   energy[0]=inner_prod(sw-start, sw-start, len);
+   e0=inner_prod(sw, sw, len);
+   /* energy update -------------------------------------*/
+      __asm__ __volatile__
+      (
+"        P0 = %0;\n\t"
+"        I1 = %1;\n\t"
+"        L1 = 0;\n\t"
+"        I2 = %2;\n\t"
+"        L2 = 0;\n\t"
+"        R2 = [P0++];\n\t"
+"        R3 = 0;\n\t"
+"        LSETUP (eu1, eu2) LC1 = %3;\n\t"
+"eu1:      R1.L = W [I1--] || R0.L = W [I2--] ;\n\t"
+"          R1 = R1.L * R1.L (IS);\n\t"
+"          R0 = R0.L * R0.L (IS);\n\t"
+"          R1 >>>= 6;\n\t"
+"          R1 = R1 + R2;\n\t"
+"          R0 >>>= 6;\n\t"
+"          R1 = R1 - R0;\n\t"
+"          R2 = MAX(R1,R3);\n\t"
+"eu2:      [P0++] = R2;\n\t"
+       : : "d" (energy), "d" (&sw[-start-1]), "d" (&sw[-start+len-1]),
+           "a" (end-start)
+       : "P0", "I1", "I2", "R0", "R1", "R2", "R3"
+#if (__GNUC__ == 4)
+         , "LC1"
+#endif
+       );
+   pitch_xcorr(sw, sw-end, corr, len, end-start+1, stack);
+   /* FIXME: Fixed-point and floating-point code should be merged */
+   {
+      VARDECL(spx_word16_t *corr16);
+      VARDECL(spx_word16_t *ener16);
+      ALLOC(corr16, end-start+1, spx_word16_t);
+      ALLOC(ener16, end-start+1, spx_word16_t);
+      /* Normalize to 180 so we can square it and it still fits in 16 bits */
+      normalize16(corr, corr16, 180, end-start+1);
+      normalize16(energy, ener16, 180, end-start+1);
+      if (N == 1) {
+        /* optimised asm to handle N==1 case */
+      __asm__ __volatile__
+      (
+"        I0 = %1;\n\t"                     /* I0: corr16[]    */
+"        L0 = 0;\n\t"
+"        I1 = %2;\n\t"                     /* I1: energy      */
+"        L1 = 0;\n\t"
+"        R2 = -1;\n\t"                     /* R2: best score  */
+"        R3 = 0;\n\t"                      /* R3: best energy */
+"        P0 = %4;\n\t"                     /* P0: best pitch  */
+"        P1 = %4;\n\t"                     /* P1: counter     */
+"        LSETUP (sl1, sl2) LC1 = %3;\n\t"
+"sl1:      R0.L = W [I0++] || R1.L = W [I1++];\n\t"
+"          R0 = R0.L * R0.L (IS);\n\t"
+"          R1   += 1;\n\t"
+"          R4   = R0.L * R3.L;\n\t"
+"          R5   = R2.L * R1.L;\n\t"
+"          cc   = R5 < R4;\n\t"
+"          if cc R2 = R0;\n\t"
+"          if cc R3 = R1;\n\t"
+"          if cc P0 = P1;\n\t"
+"sl2:      P1 += 1;\n\t"
+"        %0 = P0;\n\t"
+       : "=&d" (pitch[0])
+       : "a" (corr16), "a" (ener16), "a" (end+1-start), "d" (start)
+       : "P0", "P1", "I0", "I1", "R0", "R1", "R2", "R3", "R4", "R5"
+#if (__GNUC__ == 4)
+         , "LC1"
+#endif
+       );
+      }
+      else {
+        for (i=start;i<=end;i++)
+          {
+            spx_word16_t tmp = MULT16_16_16(corr16[i-start],corr16[i-start]);
+            /* Instead of dividing the tmp by the energy, we multiply on the other side */
+            if (MULT16_16(tmp,best_ener[N-1])>MULT16_16(best_score[N-1],ADD16(1,ener16[i-start])))
+              {
+                /* We can safely put it last and then check */
+                best_score[N-1]=tmp;
+                best_ener[N-1]=ener16[i-start]+1;
+                pitch[N-1]=i;
+                /* Check if it comes in front of others */
+                for (j=0;j<N-1;j++)
+                  {
+                    if (MULT16_16(tmp,best_ener[j])>MULT16_16(best_score[j],ADD16(1,ener16[i-start])))
+                      {
+                        for (k=N-1;k>j;k--)
+                          {
+                            best_score[k]=best_score[k-1];
+                            best_ener[k]=best_ener[k-1];
+                            pitch[k]=pitch[k-1];
+                          }
+                        best_score[j]=tmp;
+                        best_ener[j]=ener16[i-start]+1;
+                        pitch[j]=i;
+                        break;
+                      }
+                  }
+              }
+          }
+      }
+   }
+   /* Compute open-loop gain */
+   if (gain)
+   {
+       for (j=0;j<N;j++)
+       {
+          spx_word16_t g;
+          i=pitch[j];
+          g = DIV32(corr[i-start], 10+SHR32(MULT16_16(spx_sqrt(e0),spx_sqrt(energy[i-start])),6));
+          /* FIXME: g = max(g,corr/energy) */
+                   if (g<0)
+                   g = 0;
+             gain[j]=g;
+       }
+   }
+}
+#endif
+#define OVERRIDE_PITCH_GAIN_SEARCH_3TAP_VQ
+#ifdef OVERRIDE_PITCH_GAIN_SEARCH_3TAP_VQ
+static int pitch_gain_search_3tap_vq(
+  const signed char *gain_cdbk,
+  int                gain_cdbk_size,
+  spx_word16_t      *C16,
+  spx_word16_t       max_gain
+)
+{
+  const signed char *ptr=gain_cdbk;
+  int                best_cdbk=0;
+  spx_word32_t       best_sum=-VERY_LARGE32;
+  spx_word32_t       sum=0;
+  spx_word16_t       g[3];
+  spx_word16_t       pitch_control=64;
+  spx_word16_t       gain_sum;
+  int                i;
+      /* fast asm version of VQ codebook search */
+      __asm__ __volatile__
+      (
+"        P0 = %2;\n\t"                     /* P0: ptr to gain_cdbk */
+"        L1 = 0;\n\t"                      /* no circ addr for L1  */
+"        %0 = 0;\n\t"                      /* %0: best_sum         */
+"        %1 = 0;\n\t"                      /* %1: best_cbdk        */
+"        P1 = 0;\n\t"                      /* P1: loop counter     */
+"        R5 = 64;\n\t"                     /* R5: pitch_control    */
+"        LSETUP (pgs1, pgs2) LC1 = %4;\n\t"
+"pgs1:     R2  = B [P0++] (X);\n\t"        /* R2: g[0]             */
+"          R3  = B [P0++] (X);\n\t"        /* R3: g[1]             */
+"          R4  = B [P0++] (X);\n\t"        /* R4: g[2]             */
+"          R2 += 32;\n\t"
+"          R3 += 32;\n\t"
+"          R4 += 32;\n\t"
+"          R0  = B [P0++] (X);\n\t"
+"          B0  = R0;\n\t"                  /* BO: gain_sum         */
+           /* compute_pitch_error() -------------------------------*/
+"          I1 = %3;\n\t"                   /* I1: ptr to C         */
+"          A0 = 0;\n\t"
+"          R0.L = W[I1++];\n\t"
+"          R1.L = R2.L*R5.L (IS);\n\t"
+"          A0 += R1.L*R0.L (IS) || R0.L = W[I1++];\n\t"
+"          R1.L = R3.L*R5.L (IS);\n\t"
+"          A0 += R1.L*R0.L (IS) || R0.L = W[I1++];\n\t"
+"          R1.L = R4.L*R5.L (IS);\n\t"
+"          A0 += R1.L*R0.L (IS) || R0.L = W[I1++];\n\t"
+"          R1.L = R2.L*R3.L (IS);\n\t"
+"          A0 -= R1.L*R0.L (IS) || R0.L = W[I1++];\n\t"
+"          R1.L = R4.L*R3.L (IS);\n\t"
+"          A0 -= R1.L*R0.L (IS) || R0.L = W[I1++];\n\t"
+"          R1.L = R4.L*R2.L (IS);\n\t"
+"          A0 -= R1.L*R0.L (IS) || R0.L = W[I1++];\n\t"
+"          R1.L = R2.L*R2.L (IS);\n\t"
+"          A0 -= R1.L*R0.L (IS) || R0.L = W[I1++];\n\t"
+"          R1.L = R3.L*R3.L (IS);\n\t"
+"          A0 -= R1.L*R0.L (IS) || R0.L = W[I1++];\n\t"
+"          R1.L = R4.L*R4.L (IS);\n\t"
+"          R0 = (A0 -= R1.L*R0.L) (IS);\n\t"
+/*
+    Re-arrange the if-then to code efficiently on the Blackfin:
+      if (sum>best_sum && gain_sum<=max_gain)   ------ (1)
+      if (sum>best_sum && !(gain_sum>max_gain)) ------ (2)
+      if (max_gain<=gain_sum) {                 ------ (3)
+      sum = -VERY_LARGE32;
+      }
+      if (best_sum<=sum)
+    The blackin cc instructions are all of the form:
+      cc = x < y (or cc = x <= y)
+*/
+"          R1 = B0\n\t"
+"          R2 = %5\n\t"
+"          R3 = %6\n\t"
+"          cc = R2 <= R1;\n\t"
+"          if cc R0 = R3;\n\t"
+"          cc = %0 <= R0;\n\t"
+"          if cc %0 = R0;\n\t"
+"          if cc %1 = P1;\n\t"
+"pgs2:     P1 += 1;\n\t"
+       : "=&d" (best_sum), "=&d" (best_cdbk)
+       : "a" (gain_cdbk), "a" (C16), "a" (gain_cdbk_size), "a" (max_gain),
+         "b" (-VERY_LARGE32)
+       : "R0", "R1", "R2", "R3", "R4", "R5", "P0",
+         "P1", "I1", "L1", "A0", "B0"
+#if (__GNUC__ == 4)
+         , "LC1"
+#endif
+       );
+  return best_cdbk;
+}
+#endif

pjproject/trunk/pjmedia/src/pjmedia-codec/speex/ltp_sse.h

-                      r278
+                      r628
 #define OVERRIDE_INNER_PROD
 static float inner_prod(const float *a, const float *b, int len)
+float inner_prod(const float *a, const float *b, int len)
+{
    int i;
 …
 #define OVERRIDE_PITCH_XCORR
 static void pitch_xcorr(const float *_x, const float *_y, float *corr, int len, int nb_pitch, char *stack)
+void pitch_xcorr(const float *_x, const float *_y, float *corr, int len, int nb_pitch, char *stack)
+{
    int i, offset;

pjproject/trunk/pjmedia/src/pjmedia-codec/speex/mdf.c

-                      r523
+                      r628
 #ifdef FIXED_POINT
 static const spx_float_t MIN_LEAK = ((spx_float_t){16777, -24});
+static const spx_float_t MIN_LEAK = {16777, -24};
 #define TOP16(x) ((x)>>16)
 #else
 …
    spx_word16_t notch_radius;
    spx_mem_t notch_mem[2];
+   /* NOTE: If you only use speex_echo_cancel() and want to save some memory, remove this */
+   spx_int16_t *play_buf;
+   int play_buf_pos;
 };
 static inline void filter_dc_notch16(spx_int16_t *in, spx_word16_t radius, spx_word16_t *out, int len, spx_mem_t *mem)
+static inline void filter_dc_notch16(const spx_int16_t *in, spx_word16_t radius, spx_word16_t *out, int len, spx_mem_t *mem)
+{
    int i;
 …
+}
 static inline spx_word32_t inner_prod(const spx_word16_t *x, const spx_word16_t *y, int len)
+static inline spx_word32_t mdf_inner_prod(const spx_word16_t *x, const spx_word16_t *y, int len)
+{
    spx_word32_t sum=0;
    len >>= 2;
+   len >>= 1;
    while(len--)
+   {
       spx_word32_t part=0;
-      part = MAC16_16(part,*x++,*y++);
-      part = MAC16_16(part,*x++,*y++);
       part = MAC16_16(part,*x++,*y++);
       part = MAC16_16(part,*x++,*y++);
 …
 /** Compute power spectrum of a half-complex (packed) vector */
 static inline void power_spectrum(spx_word16_t *X, spx_word32_t *ps, int N)
+static inline void power_spectrum(const spx_word16_t *X, spx_word32_t *ps, int N)
+{
    int i, j;
 …
 /** Compute cross-power spectrum of a half-complex (packed) vectors and add to acc */
 #ifdef FIXED_POINT
 static inline void spectral_mul_accum(spx_word16_t *X, spx_word32_t *Y, spx_word16_t *acc, int N, int M)
+static inline void spectral_mul_accum(const spx_word16_t *X, const spx_word32_t *Y, spx_word16_t *acc, int N, int M)
+{
    int i,j;
 …
+}
 #else
 static inline void spectral_mul_accum(spx_word16_t *X, spx_word32_t *Y, spx_word16_t *acc, int N, int M)
+static inline void spectral_mul_accum(const spx_word16_t *X, const spx_word32_t *Y, spx_word16_t *acc, int N, int M)
+{
    int i,j;
 …
 /** Compute weighted cross-power spectrum of a half-complex (packed) vector with conjugate */
 static inline void weighted_spectral_mul_conj(spx_float_t *w, spx_word16_t *X, spx_word16_t *Y, spx_word32_t *prod, int N)
+static inline void weighted_spectral_mul_conj(const spx_float_t *w, const spx_word16_t *X, const spx_word16_t *Y, spx_word32_t *prod, int N)
+{
    int i, j;
 …
    /* FIXME: Make that an init option (new API call?) */
    st->sampling_rate = 8000;
    st->spec_average = DIV32_16(SHL32(st->frame_size, 15), st->sampling_rate);
 #ifdef FIXED_POINT
    st->beta0 = DIV32_16(SHL32(st->frame_size, 16), st->sampling_rate);
    st->beta_max = DIV32_16(SHL32(st->frame_size, 14), st->sampling_rate);
+   st->spec_average = DIV32_16(SHL32(EXTEND32(st->frame_size), 15), st->sampling_rate);
+#ifdef FIXED_POINT
+   st->beta0 = DIV32_16(SHL32(EXTEND32(st->frame_size), 16), st->sampling_rate);
+   st->beta_max = DIV32_16(SHL32(EXTEND32(st->frame_size), 14), st->sampling_rate);
 #else
    st->beta0 = (2.0f*st->frame_size)/st->sampling_rate;
 …
    st->adapted = 0;
    st->Pey = st->Pyy = FLOAT_ONE;
+   st->play_buf = (spx_int16_t*)speex_alloc(2*st->frame_size*sizeof(spx_int16_t));
+   st->play_buf_pos = 0;
    return st;
+}
 …
    speex_free(st->wtmp2);
 #endif
+   speex_free(st->play_buf);
    speex_free(st);
+}
+extern int fixed_point;
+void speex_echo_capture(SpeexEchoState *st, const spx_int16_t *rec, spx_int16_t *out, spx_int32_t *Yout)
+{
+   int i;
+   if (st->play_buf_pos>=st->frame_size)
+   {
+      speex_echo_cancel(st, rec, st->play_buf, out, Yout);
+      st->play_buf_pos -= st->frame_size;
+      for (i=0;i<st->frame_size;i++)
+         st->play_buf[i] = st->play_buf[i+st->frame_size];
+   } else {
+      speex_warning("no playback frame available");
+      if (st->play_buf_pos!=0)
+      {
+         speex_warning("internal playback buffer corruption?");
+         st->play_buf_pos = 0;
+      }
+      for (i=0;i<st->frame_size;i++)
+         out[i] = rec[i];
+   }
+}
+void speex_echo_playback(SpeexEchoState *st, const spx_int16_t *play)
+{
+   if (st->play_buf_pos<=st->frame_size)
+   {
+      int i;
+      for (i=0;i<st->frame_size;i++)
+         st->play_buf[st->play_buf_pos+i] = play[i];
+      st->play_buf_pos += st->frame_size;
+   } else {
+      speex_warning("had to discard a playback frame");
+   }
+}
 /** Performs echo cancellation on a frame */
 void speex_echo_cancel(SpeexEchoState *st, short *ref, short *echo, short *out, spx_int32_t *Yout)
+void speex_echo_cancel(SpeexEchoState *st, const spx_int16_t *ref, const spx_int16_t *echo, spx_int16_t *out, spx_int32_t *Yout)
+{
    int i,j;
 …
    spx_word32_t tmp32;
    spx_word16_t M_1;
+   int saturated=0;
    N = st->window_size;
 …
 #endif
    filter_dc_notch16((spx_int16_t*)ref, st->notch_radius, st->d, st->frame_size, st->notch_mem);
+   filter_dc_notch16(ref, st->notch_radius, st->d, st->frame_size, st->notch_mem);
    /* Copy input data to buffer */
    for (i=0;i<st->frame_size;i++)
+   {
       spx_word16_t tmp;
+      spx_word32_t tmp32;
       st->x[i] = st->x[i+st->frame_size];
+      st->x[i+st->frame_size] = SUB16(echo[i], MULT16_16_P15(st->preemph, st->memX));
+      tmp32 = SUB32(EXTEND32(echo[i]), EXTEND32(MULT16_16_P15(st->preemph, st->memX)));
+#ifdef FIXED_POINT
+      /*FIXME: If saturation occurs here, we need to freeze adaptation for M frames (not just one) */
+      if (tmp32 > 32767)
+      {
+         tmp32 = 32767;
+         saturated = 1;
+      }
+      if (tmp32 < -32767)
+      {
+         tmp32 = -32767;
+         saturated = 1;
+      }
+#endif
+      st->x[i+st->frame_size] = EXTRACT16(tmp32);
       st->memX = echo[i];
       tmp = st->d[i];
       st->d[i] = st->d[i+st->frame_size];
+      st->d[i+st->frame_size] = SUB16(tmp, MULT16_16_P15(st->preemph, st->memD));
+      tmp32 = SUB32(EXTEND32(tmp), EXTEND32(MULT16_16_P15(st->preemph, st->memD)));
+#ifdef FIXED_POINT
+      if (tmp32 > 32767)
+      {
+         tmp32 = 32767;
+         saturated = 1;
+      }
+      if (tmp32 < -32767)
+      {
+         tmp32 = -32767;
+         saturated = 1;
+      }
+#endif
+      st->d[i+st->frame_size] = tmp32;
       st->memD = tmp;
+   }
 …
          tmp_out = -32768;
       tmp_out = ADD32(tmp_out, EXTEND32(MULT16_16_P15(st->preemph, st->memE)));
+      /* This is an arbitrary test for saturation */
+      if (ref[i] <= -32000 || ref[i] >= 32000)
+      {
+         tmp_out = 0;
+         saturated = 1;
+      }
       out[i] = tmp_out;
       st->memE = tmp_out;
 …
    /* Compute a bunch of correlations */
    See = inner_prod(st->e+st->frame_size, st->e+st->frame_size, st->frame_size);
    See = ADD32(See, SHR32(10000,6));
    Syy = inner_prod(st->y+st->frame_size, st->y+st->frame_size, st->frame_size);
+   See = mdf_inner_prod(st->e+st->frame_size, st->e+st->frame_size, st->frame_size);
+   See = ADD32(See, SHR32(EXTEND32(10000),6));
+   Syy = mdf_inner_prod(st->y+st->frame_size, st->y+st->frame_size, st->frame_size);
    /* Convert error to frequency domain */
 …
    if (FLOAT_GT(st->Pey, st->Pyy))
       st->Pey = st->Pyy;
    /* leak_estimate is the limear regression result */
+   /* leak_estimate is the linear regression result */
    leak_estimate = FLOAT_EXTRACT16(FLOAT_SHL(FLOAT_DIVU(st->Pey, st->Pyy),14));
+   /* This looks like a stupid bug, but it's right (because we convert from Q14 to Q15) */
    if (leak_estimate > 16383)
       leak_estimate = 32767;
 …
       spx_word16_t adapt_rate=0;
       Sxx = inner_prod(st->x+st->frame_size, st->x+st->frame_size, st->frame_size);
+      Sxx = mdf_inner_prod(st->x+st->frame_size, st->x+st->frame_size, st->frame_size);
       /* Temporary adaption rate if filter is not adapted correctly */
 …
+   }
+   /* Gradient descent */
+   for (i=0;i<M*N;i++)
+   {
+      st->W[i] += st->PHI[i];
+      /* Old value of W in PHI */
+      st->PHI[i] = st->W[i] - st->PHI[i];
+   if (!saturated)
+   {
+      /* Gradient descent */
+      for (i=0;i<M*N;i++)
+      {
+         st->W[i] += st->PHI[i];
+         /* Old value of W in PHI */
+         st->PHI[i] = st->W[i] - st->PHI[i];
+      }
+   }
 …
 #ifdef FIXED_POINT
          for (i=0;i<N;i++)
             st->wtmp2[i] = PSHR32(st->W[j*N+i],NORMALIZE_SCALEDOWN+16);
+            st->wtmp2[i] = EXTRACT16(PSHR32(st->W[j*N+i],NORMALIZE_SCALEDOWN+16));
          spx_ifft(st->fft_table, st->wtmp2, st->wtmp);
          for (i=0;i<st->frame_size;i++)
 …
          for (i=st->frame_size;i<N;i++)
+         {
             st->wtmp[i]=SHL(st->wtmp[i],NORMALIZE_SCALEUP);
+            st->wtmp[i]=SHL16(st->wtmp[i],NORMALIZE_SCALEUP);
+         }
          spx_fft(st->fft_table, st->wtmp, st->wtmp2);
          /* The "-1" in the shift is a sort of kludge that trades less efficient update speed for decrease noise */
          for (i=0;i<N;i++)
             st->W[j*N+i] -= SHL32(st->wtmp2[i],16+NORMALIZE_SCALEDOWN-NORMALIZE_SCALEUP-1);
+            st->W[j*N+i] -= SHL32(EXTEND32(st->wtmp2[i]),16+NORMALIZE_SCALEDOWN-NORMALIZE_SCALEUP-1);
 #else
          spx_ifft(st->fft_table, &st->W[j*N], st->wtmp);
 …
       case SPEEX_ECHO_SET_SAMPLING_RATE:
          st->sampling_rate = (*(int*)ptr);
          st->spec_average = DIV32_16(SHL32(st->frame_size, 15), st->sampling_rate);
 #ifdef FIXED_POINT
          st->beta0 = DIV32_16(SHL32(st->frame_size, 16), st->sampling_rate);
          st->beta_max = DIV32_16(SHL32(st->frame_size, 14), st->sampling_rate);
+         st->spec_average = DIV32_16(SHL32(EXTEND32(st->frame_size), 15), st->sampling_rate);
+#ifdef FIXED_POINT
+         st->beta0 = DIV32_16(SHL32(EXTEND32(st->frame_size), 16), st->sampling_rate);
+         st->beta_max = DIV32_16(SHL32(EXTEND32(st->frame_size), 14), st->sampling_rate);
 #else
          st->beta0 = (2.0f*st->frame_size)/st->sampling_rate;

pjproject/trunk/pjmedia/src/pjmedia-codec/speex/misc.c

-                      r278
+                      r628
 #ifdef FIXED_POINT
 spx_word32_t speex_rand(spx_word16_t std, spx_int32_t *seed)
+spx_word16_t speex_rand(spx_word16_t std, spx_int32_t *seed)
+{
    spx_word32_t res;
    *seed = 1664525 * *seed + 1013904223;
    res = MULT16_16(EXTRACT16(SHR32(*seed,16)),std);
    return SUB32(res, SHR(res, 3));
+   return PSHR32(SUB32(res, SHR(res, 3)),14);
+}
 #else
 …
 #endif
-void speex_rand_vec(float std, spx_sig_t *data, int len)
+{
-   int i;
-   for (i=0;i<len;i++)
-      data[i]+=SIG_SCALING*3*std*((((float)rand())/RAND_MAX)-.5);
+}
-/*float speex_rand(float std)
+{
-   return 3*std*((((float)rand())/RAND_MAX)-.5);
-}*/
 #ifndef OVERRIDE_SPEEX_PUTC
 void _speex_putc(int ch, void *file)

pjproject/trunk/pjmedia/src/pjmedia-codec/speex/misc.h

-                      r278
+                      r628
 #define SPEEX_MAJOR_VERSION 1         /**< Major Speex version. */
 #define SPEEX_MINOR_VERSION 1         /**< Minor Speex version. */
 #define SPEEX_MICRO_VERSION 12        /**< Micro Speex version. */
+#define SPEEX_MICRO_VERSION 13        /**< Micro Speex version. */
 #define SPEEX_EXTRA_VERSION ""        /**< Extra Speex version. */
+#define SPEEX_VERSION "speex-1.1.12"  /**< Speex version string. */
+#define SPEEX_VERSION "speex-1.1.13"  /**< Speex version string. */
+#endif
+/* A couple test to catch stupid option combinations */
+#ifdef FIXED_POINT
+#ifdef _USE_SSE
+#error SSE is only for floating-point
+#endif
+#if ((defined (ARM4_ASM)||defined (ARM4_ASM)) && defined(BFIN_ASM)) || (defined (ARM4_ASM)&&defined(ARM5E_ASM))
+#error Make up your mind. What CPU do you have?
+#endif
+#ifdef VORBIS_PSYCHO
+#error Vorbis-psy model currently not implemented in fixed-point
+#endif
+#else
+#if defined (ARM4_ASM) || defined(ARM5E_ASM) || defined(BFIN_ASM)
+#error I suppose you can have a [ARM4/ARM5E/Blackfin] that has float instructions?
+#endif
+#ifdef FIXED_POINT_DEBUG
+#error Don't you think enabling fixed-point is a good thing to do if you want to debug that?
+#endif
 #endif
 …
 void speex_warning_int(const char *str, int val);
-/** Generate a vector of random numbers */
-void speex_rand_vec(float std, spx_sig_t *data, int len);
 /** Generate a random number */
 spx_word32_t speex_rand(spx_word16_t std, spx_int32_t *seed);
+spx_word16_t speex_rand(spx_word16_t std, spx_int32_t *seed);
 /** Speex wrapper for putc */

pjproject/trunk/pjmedia/src/pjmedia-codec/speex/modes.c

-                      r278
+                      r628
    noise_codebook_unquant,
    NULL,
+#ifdef FIXED_POINT
+, 22938, 0, -1,
+#else
+   .7, .7, 0, -1,
+#endif
+   -1,
 };
 …
    split_cb_shape_sign_unquant,
    &split_cb_nb_ulbr,
+#ifdef FIXED_POINT
+, 16384, 11796, 21299,
+#else
+.7, 0.5, .36, .65,
+#endif
+   QCONST16(.65,15),
 };
 …
    split_cb_shape_sign_unquant,
    &split_cb_nb_vlbr,
+#ifdef FIXED_POINT
+, 16384, 11796, 18022,
+#else
+.7, 0.5, .36, .55,
+#endif
+   QCONST16(.55,15),
 };
 …
    split_cb_shape_sign_unquant,
    &split_cb_nb_lbr,
+#ifdef FIXED_POINT
+, 18022, 9830, 14746,
+#else
+.7, 0.55, .30, .45,
+#endif
+   QCONST16(.45,15),
 };
 …
    split_cb_shape_sign_unquant,
    &split_cb_nb_med,
+#ifdef FIXED_POINT
+, 20644, 5243, 11469,
+#else
+.7, 0.63, .16, .35,
+#endif
+   QCONST16(.35,15),
 };
 …
    split_cb_shape_sign_unquant,
    &split_cb_nb,
+#ifdef FIXED_POINT
+, 21299, 3932, 8192,
+#else
+.7, 0.65, .12, .25,
+#endif
+   QCONST16(.2,15),
 };
 …
    split_cb_shape_sign_unquant,
    &split_cb_sb,
+#ifdef FIXED_POINT
+, 21299, 2294, 3277,
+#else
+.68, 0.65, .07, .1,
+#endif
+   QCONST16(.1,15),
 };
 …
    split_cb_shape_sign_unquant,
    &split_cb_nb,
+#ifdef FIXED_POINT
+, 21299, 0, -1,
+#else
+.65, 0.65, .0, -1,
+#endif
+   -1,
 };
 …
    NULL,
    NULL,
+#ifdef FIXED_POINT
+, 24576, 0, -1,
+#else
+   .75, .75, .0, -1,
+#endif
+   -1,
 };
 …
    &split_cb_high_lbr,
 #endif
+#ifdef FIXED_POINT
+, 19661, 8192, -1,
+#else
+   .85, .6, .25, -1,
+#endif
+   -1,
 };
 …
    &split_cb_high,
 #endif
+#ifdef FIXED_POINT
+, 22938, 1638, -1,
+#else
+   .75, .7, .05, -1,
+#endif
+   -1,
 };
 …
    &split_cb_high,
 #endif
+#ifdef FIXED_POINT
+, 24576, 0, -1,
+#else
+   .75, .75, .0, -1,
+#endif
+   -1,
 };
 …
 .9, 0.6, /* gamma1, gamma2 */
 #endif
    .001,   /*lag_factor*/
    QCONST16(.0001,15), /*lpc_floor*/
+   .012,   /*lag_factor*/
+   QCONST16(.0002,15), /*lpc_floor*/
 .9,
    {NULL, &wb_submode1, &wb_submode2, &wb_submode3, &wb_submode4, NULL, NULL, NULL},
 …
 .9, 0.6, /* gamma1, gamma2 */
 #endif
    .002,   /*lag_factor*/
    QCONST16(.0001,15), /*lpc_floor*/
+   .012,   /*lag_factor*/
+   QCONST16(.0002,15), /*lpc_floor*/
 .7,
    {NULL, &wb_submode1, NULL, NULL, NULL, NULL, NULL, NULL},

pjproject/trunk/pjmedia/src/pjmedia-codec/speex/modes.h

-                      r278
+                      r628
 /** Long-term predictor quantization */
 typedef int (*ltp_quant_func)(spx_sig_t *, spx_sig_t *, spx_coef_t *, spx_coef_t *,
+typedef int (*ltp_quant_func)(spx_word16_t *, spx_word16_t *, spx_coef_t *, spx_coef_t *,
                               spx_coef_t *, spx_sig_t *, const void *, int, int, spx_word16_t,
                               int, int, SpeexBits*, char *, spx_sig_t *, spx_word16_t *, int, int, int);
+                              int, int, SpeexBits*, char *, spx_word16_t *, spx_word16_t *, int, int, int, spx_word32_t *);
 /** Long-term un-quantize */
 typedef void (*ltp_unquant_func)(spx_sig_t *, int, int, spx_word16_t, const void *, int, int *,
+typedef void (*ltp_unquant_func)(spx_word16_t *, spx_word32_t *, int, int, spx_word16_t, const void *, int, int *,
                                  spx_word16_t *, SpeexBits*, char*, int, int, spx_word16_t, int);
 /** Innovation quantization function */
 typedef void (*innovation_quant_func)(spx_sig_t *, spx_coef_t *, spx_coef_t *, spx_coef_t *, const void *, int, int,
+typedef void (*innovation_quant_func)(spx_word16_t *, spx_coef_t *, spx_coef_t *, spx_coef_t *, const void *, int, int,
                                       spx_sig_t *, spx_word16_t *, SpeexBits *, char *, int, int);
 …
    ltp_quant_func    ltp_quant; /**< Long-term predictor (pitch) quantizer */
    ltp_unquant_func  ltp_unquant; /**< Long-term predictor (pitch) un-quantizer */
    const void             *ltp_params; /**< Pitch parameters (options) */
+   const void       *ltp_params; /**< Pitch parameters (options) */
    /*Quantization of innovation*/
 …
    const void             *innovation_params; /**< Innovation quantization parameters*/
-   /*Synthesis filter enhancement*/
-   spx_word16_t      lpc_enh_k1; /**< Enhancer constant */
-   spx_word16_t      lpc_enh_k2; /**< Enhancer constant */
-   spx_word16_t      lpc_enh_k3; /**< Enhancer constant */
    spx_word16_t      comb_gain;  /**< Gain of enhancer comb filter */

pjproject/trunk/pjmedia/src/pjmedia-codec/speex/nb_celp.c

-                      r278
+                      r628
 #include "vbr.h"
 #include "misc.h"
+#include "math_approx.h"
 #include <speex/speex_callbacks.h>
 …
 #define sqr(x) ((x)*(x))
+extern const spx_word16_t lpc_window[];
 void *nb_encoder_init(const SpeexMode *m)
 …
    st->frameSize = mode->frameSize;
-   st->windowSize = st->frameSize*3/2;
    st->nbSubframes=mode->frameSize/mode->subframeSize;
    st->subframeSize=mode->subframeSize;
+   st->windowSize = st->frameSize+st->subframeSize;
    st->lpcSize = mode->lpcSize;
    st->gamma1=mode->gamma1;
 …
    st->curve = speex_alloc(128*sizeof(float));
    st->old_curve = speex_alloc(128*sizeof(float));
+#endif
+   st->psy_window = speex_alloc(256*sizeof(float));
+#endif
+   st->cumul_gain = 1024;
    /* Allocating input buffer */
+   st->inBuf = speex_alloc((st->windowSize+EXTRA_BUFFER)*sizeof(spx_sig_t));
+   st->frame = st->inBuf+EXTRA_BUFFER;
+   st->winBuf = speex_alloc((st->windowSize-st->frameSize)*sizeof(spx_word16_t));
    /* Allocating excitation buffer */
+   st->excBuf = speex_alloc((mode->frameSize+mode->pitchEnd+1)*sizeof(spx_sig_t));
+   st->exc = st->excBuf + mode->pitchEnd + 1;
+   st->swBuf = speex_alloc((mode->frameSize+mode->pitchEnd+1)*sizeof(spx_sig_t));
+   st->sw = st->swBuf + mode->pitchEnd + 1;
+   st->innov = speex_alloc((st->frameSize)*sizeof(spx_sig_t));
+   /* Asymmetric "pseudo-Hamming" window */
+   {
+      int part1, part2;
+      part1=st->frameSize - (st->subframeSize>>1);
+      part2=(st->frameSize>>1) + (st->subframeSize>>1);
+      st->window = speex_alloc((st->windowSize)*sizeof(spx_word16_t));
+      for (i=0;i<part1;i++)
+         st->window[i]=(spx_word16_t)(SIG_SCALING*(.54-.46*cos(M_PI*i/part1)));
+      for (i=0;i<part2;i++)
+         st->window[part1+i]=(spx_word16_t)(SIG_SCALING*(.54+.46*cos(M_PI*i/part2)));
+   }
+   st->excBuf = speex_alloc((mode->frameSize+mode->pitchEnd+2)*sizeof(spx_word16_t));
+   st->exc = st->excBuf + mode->pitchEnd + 2;
+   st->swBuf = speex_alloc((mode->frameSize+mode->pitchEnd+2)*sizeof(spx_word16_t));
+   st->sw = st->swBuf + mode->pitchEnd + 2;
+   st->window= lpc_window;
    /* Create the window for autocorrelation (lag-windowing) */
    st->lagWindow = speex_alloc((st->lpcSize+1)*sizeof(spx_word16_t));
 …
       st->lagWindow[i]=16384*exp(-.5*sqr(2*M_PI*st->lag_factor*i));
-   st->autocorr = speex_alloc((st->lpcSize+1)*sizeof(spx_word16_t));
-   st->lpc = speex_alloc((st->lpcSize)*sizeof(spx_coef_t));
-   st->interp_lpc = speex_alloc((st->lpcSize)*sizeof(spx_coef_t));
-   st->interp_qlpc = speex_alloc((st->lpcSize)*sizeof(spx_coef_t));
-   st->bw_lpc1 = speex_alloc((st->lpcSize)*sizeof(spx_coef_t));
-   st->bw_lpc2 = speex_alloc((st->lpcSize)*sizeof(spx_coef_t));
-   st->lsp = speex_alloc((st->lpcSize)*sizeof(spx_lsp_t));
-   st->qlsp = speex_alloc((st->lpcSize)*sizeof(spx_lsp_t));
    st->old_lsp = speex_alloc((st->lpcSize)*sizeof(spx_lsp_t));
    st->old_qlsp = speex_alloc((st->lpcSize)*sizeof(spx_lsp_t));
-   st->interp_lsp = speex_alloc((st->lpcSize)*sizeof(spx_lsp_t));
-   st->interp_qlsp = speex_alloc((st->lpcSize)*sizeof(spx_lsp_t));
    st->first = 1;
    for (i=0;i<st->lpcSize;i++)
+   {
       st->lsp[i]=LSP_SCALING*(M_PI*((float)(i+1)))/(st->lpcSize+1);
+      st->old_lsp[i]=LSP_SCALING*(M_PI*((float)(i+1)))/(st->lpcSize+1);
+   }
 …
    st->mem_sw_whole = speex_alloc((st->lpcSize)*sizeof(spx_mem_t));
    st->mem_exc = speex_alloc((st->lpcSize)*sizeof(spx_mem_t));
+   st->mem_exc2 = speex_alloc((st->lpcSize)*sizeof(spx_mem_t));
    st->pi_gain = speex_alloc((st->nbSubframes)*sizeof(spx_word32_t));
+   st->innov_save = NULL;
    st->pitch = speex_alloc((st->nbSubframes)*sizeof(int));
 …
    st->vbr_quality = 8;
    st->vbr_enabled = 0;
+   st->vbr_max = 0;
    st->vad_enabled = 0;
    st->dtx_enabled = 0;
 …
 #endif
    speex_free (st->inBuf);
+   speex_free (st->winBuf);
    speex_free (st->excBuf);
-   speex_free (st->innov);
-   speex_free (st->interp_qlpc);
-   speex_free (st->qlsp);
    speex_free (st->old_qlsp);
-   speex_free (st->interp_qlsp);
    speex_free (st->swBuf);
-   speex_free (st->window);
    speex_free (st->lagWindow);
+   speex_free (st->autocorr);
+   speex_free (st->lpc);
+   speex_free (st->lsp);
+   speex_free (st->interp_lpc);
+   speex_free (st->bw_lpc1);
+   speex_free (st->bw_lpc2);
    speex_free (st->old_lsp);
-   speex_free (st->interp_lsp);
    speex_free (st->mem_sp);
    speex_free (st->mem_sw);
    speex_free (st->mem_sw_whole);
    speex_free (st->mem_exc);
+   speex_free (st->mem_exc2);
    speex_free (st->pi_gain);
    speex_free (st->pitch);
 …
    speex_free (st->curve);
    speex_free (st->old_curve);
+   speex_free (st->psy_window);
 #endif
 …
    spx_word16_t ol_pitch_coef;
    spx_word32_t ol_gain;
+   VARDECL(spx_sig_t *res);
+   VARDECL(spx_sig_t *target);
+   VARDECL(spx_word16_t *ringing);
+   VARDECL(spx_word16_t *target);
+   VARDECL(spx_sig_t *innov);
+   VARDECL(spx_word32_t *exc32);
    VARDECL(spx_mem_t *mem);
+   VARDECL(spx_coef_t *bw_lpc1);
+   VARDECL(spx_coef_t *bw_lpc2);
+   VARDECL(spx_coef_t *lpc);
+   VARDECL(spx_lsp_t *lsp);
+   VARDECL(spx_lsp_t *qlsp);
+   VARDECL(spx_lsp_t *interp_lsp);
+   VARDECL(spx_lsp_t *interp_qlsp);
+   VARDECL(spx_coef_t *interp_lpc);
+   VARDECL(spx_coef_t *interp_qlpc);
    char *stack;
    VARDECL(spx_word16_t *syn_resp);
    VARDECL(spx_sig_t *real_exc);
+   VARDECL(spx_word16_t *real_exc);
 #ifdef EPIC_48K
    int pitch_half[2];
 …
    stack=st->stack;
+   /* Copy new data in input buffer */
+   speex_move(st->inBuf, st->inBuf+st->frameSize, (EXTRA_BUFFER+st->windowSize-st->frameSize)*sizeof(spx_sig_t));
+   for (i=0;i<st->frameSize;i++)
+      st->inBuf[st->windowSize-st->frameSize+i+EXTRA_BUFFER] = SHL32(EXTEND32(in[i]), SIG_SHIFT);
+   ALLOC(lpc, st->lpcSize, spx_coef_t);
+   ALLOC(bw_lpc1, st->lpcSize, spx_coef_t);
+   ALLOC(bw_lpc2, st->lpcSize, spx_coef_t);
+   ALLOC(lsp, st->lpcSize, spx_lsp_t);
+   ALLOC(qlsp, st->lpcSize, spx_lsp_t);
+   ALLOC(interp_lsp, st->lpcSize, spx_lsp_t);
+   ALLOC(interp_qlsp, st->lpcSize, spx_lsp_t);
+   ALLOC(interp_lpc, st->lpcSize, spx_coef_t);
+   ALLOC(interp_qlpc, st->lpcSize, spx_coef_t);
    /* Move signals 1 frame towards the past */
    speex_move(st->excBuf, st->excBuf+st->frameSize, (st->max_pitch+1)*sizeof(spx_sig_t));
    speex_move(st->swBuf, st->swBuf+st->frameSize, (st->max_pitch+1)*sizeof(spx_sig_t));
+   speex_move(st->excBuf, st->excBuf+st->frameSize, (st->max_pitch+2)*sizeof(spx_word16_t));
+   speex_move(st->swBuf, st->swBuf+st->frameSize, (st->max_pitch+2)*sizeof(spx_word16_t));
+   {
       VARDECL(spx_word16_t *w_sig);
+      VARDECL(spx_word16_t *autocorr);
       ALLOC(w_sig, st->windowSize, spx_word16_t);
+      ALLOC(autocorr, st->lpcSize+1, spx_word16_t);
       /* Window for analysis */
+      for (i=0;i<st->windowSize;i++)
+         w_sig[i] = EXTRACT16(SHR32(MULT16_16(EXTRACT16(SHR32(st->frame[i],SIG_SHIFT)),st->window[i]),SIG_SHIFT));
+      for (i=0;i<st->windowSize-st->frameSize;i++)
+         w_sig[i] = EXTRACT16(SHR32(MULT16_16(st->winBuf[i],st->window[i]),SIG_SHIFT));
+      for (;i<st->windowSize;i++)
+         w_sig[i] = EXTRACT16(SHR32(MULT16_16(in[i-st->windowSize+st->frameSize],st->window[i]),SIG_SHIFT));
       /* Compute auto-correlation */
+      _spx_autocorr(w_sig, st->autocorr, st->lpcSize+1, st->windowSize);
+   }
+   st->autocorr[0] = ADD16(st->autocorr[0],MULT16_16_Q15(st->autocorr[0],st->lpc_floor)); /* Noise floor in auto-correlation domain */
+   /* Lag windowing: equivalent to filtering in the power-spectrum domain */
+   for (i=0;i<st->lpcSize+1;i++)
+      st->autocorr[i] = MULT16_16_Q14(st->autocorr[i],st->lagWindow[i]);
+   /* Levinson-Durbin */
+   _spx_lpc(st->lpc, st->autocorr, st->lpcSize);
+   /* LPC to LSPs (x-domain) transform */
+   roots=lpc_to_lsp (st->lpc, st->lpcSize, st->lsp, 15, LSP_DELTA1, stack);
+   /* Check if we found all the roots */
+   if (roots!=st->lpcSize)
+   {
+      /* Search again if we can afford it */
+      if (st->complexity>1)
+         roots = lpc_to_lsp (st->lpc, st->lpcSize, st->lsp, 11, LSP_DELTA2, stack);
+      if (roots!=st->lpcSize)
+      _spx_autocorr(w_sig, autocorr, st->lpcSize+1, st->windowSize);
+      autocorr[0] = ADD16(autocorr[0],MULT16_16_Q15(autocorr[0],st->lpc_floor)); /* Noise floor in auto-correlation domain */
+      /* Lag windowing: equivalent to filtering in the power-spectrum domain */
+      for (i=0;i<st->lpcSize+1;i++)
+         autocorr[i] = MULT16_16_Q14(autocorr[i],st->lagWindow[i]);
+      /* Levinson-Durbin */
+      _spx_lpc(lpc, autocorr, st->lpcSize);
+      /* LPC to LSPs (x-domain) transform */
+      roots=lpc_to_lsp (lpc, st->lpcSize, lsp, 10, LSP_DELTA1, stack);
+      /* Check if we found all the roots */
+      if (roots!=st->lpcSize)
+      {
          /*If we can't find all LSP's, do some damage control and use previous filter*/
          for (i=0;i<st->lpcSize;i++)
+         {
+            st->lsp[i]=st->old_lsp[i];
+         }
+      }
+   }
+            lsp[i]=st->old_lsp[i];
+         }
+      }
+   }
 …
       if (st->first)
          for (i=0;i<st->lpcSize;i++)
             st->interp_lsp[i] = st->lsp[i];
+            interp_lsp[i] = lsp[i];
       else
          lsp_interpolate(st->old_lsp, st->lsp, st->interp_lsp, st->lpcSize, st->nbSubframes, st->nbSubframes<<1);
       lsp_enforce_margin(st->interp_lsp, st->lpcSize, LSP_MARGIN);
+         lsp_interpolate(st->old_lsp, lsp, interp_lsp, st->lpcSize, st->nbSubframes, st->nbSubframes<<1);
+      lsp_enforce_margin(interp_lsp, st->lpcSize, LSP_MARGIN);
       /* Compute interpolated LPCs (unquantized) for whole frame*/
       lsp_to_lpc(st->interp_lsp, st->interp_lpc, st->lpcSize,stack);
+      lsp_to_lpc(interp_lsp, interp_lpc, st->lpcSize,stack);
       /*Open-loop pitch*/
       if (!st->submodes[st->submodeID] || st->vbr_enabled || st->vad_enabled || SUBMODE(forced_pitch_gain) ||
+      if (st->complexity>2 || !st->submodes[st->submodeID] || st->vbr_enabled || st->vad_enabled || SUBMODE(forced_pitch_gain) ||
           SUBMODE(lbr_pitch) != -1)
+      {
 …
          spx_word16_t nol_pitch_coef[6];
          bw_lpc(st->gamma1, st->interp_lpc, st->bw_lpc1, st->lpcSize);
          bw_lpc(st->gamma2, st->interp_lpc, st->bw_lpc2, st->lpcSize);
+         bw_lpc(st->gamma1, interp_lpc, bw_lpc1, st->lpcSize);
+         bw_lpc(st->gamma2, interp_lpc, bw_lpc2, st->lpcSize);
+         filter_mem2(st->frame, st->bw_lpc1, st->bw_lpc2, st->sw, st->frameSize, st->lpcSize, st->mem_sw_whole);
+         for (i=0;i<st->windowSize-st->frameSize;i++)
+            st->sw[i] = st->winBuf[i];
+         for (;i<st->frameSize;i++)
+            st->sw[i] = in[i-st->windowSize+st->frameSize];
+         filter_mem16(st->sw, bw_lpc1, bw_lpc2, st->sw, st->frameSize, st->lpcSize, st->mem_sw_whole, stack);
          open_loop_nbest_pitch(st->sw, st->min_pitch, st->max_pitch, st->frameSize,
 …
          ol_pitch_coef=0;
+      }
       /*Compute "real" excitation*/
+      fir_mem2(st->frame, st->interp_lpc, st->exc, st->frameSize, st->lpcSize, st->mem_exc);
+      for (i=0;i<st->windowSize-st->frameSize;i++)
+         st->exc[i] = st->winBuf[i];
+      for (;i<st->frameSize;i++)
+         st->exc[i] = in[i-st->windowSize+st->frameSize];
+      fir_mem16(st->exc, interp_lpc, st->exc, st->frameSize, st->lpcSize, st->mem_exc, stack);
       /* Compute open-loop excitation gain */
 …
          float ol1=0,ol2=0;
          float ol_gain2;
          ol1 = compute_rms(st->exc, st->frameSize>>1);
          ol2 = compute_rms(st->exc+(st->frameSize>>1), st->frameSize>>1);
+         ol1 = compute_rms16(st->exc, st->frameSize>>1);
+         ol2 = compute_rms16(st->exc+(st->frameSize>>1), st->frameSize>>1);
          ol1 *= ol1*(st->frameSize>>1);
          ol2 *= ol2*(st->frameSize>>1);
 …
          ol_gain=SHR(sqrt(1+ol_gain2/st->frameSize),SIG_SHIFT);
+      } else {
+#endif
+         ol_gain = SHL32(EXTEND32(compute_rms(st->exc, st->frameSize)),SIG_SHIFT);
+#ifdef EPIC_48K
+      }
+#endif
+      } else
+#endif
+      {
+         spx_word16_t g = compute_rms16(st->exc, st->frameSize);
+         if (ol_pitch>0)
+            ol_gain = MULT16_16(g, MULT16_16_Q14(QCONST16(1.1,14),
+                                spx_sqrt(QCONST32(1.,28)-MULT16_32_Q15(QCONST16(.8,15),SHL32(MULT16_16(ol_pitch_coef,ol_pitch_coef),16)))));
+         else
+            ol_gain = SHL32(EXTEND32(g),SIG_SHIFT);
+      }
+   }
 #ifdef VORBIS_PSYCHO
+   compute_curve(st->psy, st->frame-16, st->curve);
+   for(i=0;i<256-st->frameSize;i++)
+      st->psy_window[i] = st->psy_window[i+st->frameSize];
+   for(i=0;i<st->frameSize;i++)
+      st->psy_window[256-st->frameSize+i] = in[i];
+   compute_curve(st->psy, st->psy_window, st->curve);
    /*print_vec(st->curve, 128, "curve");*/
    if (st->first)
 …
       float lsp_dist=0;
       for (i=0;i<st->lpcSize;i++)
          lsp_dist += (st->old_lsp[i] - st->lsp[i])*(st->old_lsp[i] - st->lsp[i]);
+         lsp_dist += (st->old_lsp[i] - lsp[i])*(st->old_lsp[i] - lsp[i]);
       lsp_dist /= LSP_SCALING*LSP_SCALING;
 …
          speex_encoder_ctl(state, SPEEX_SET_MODE, &mode);
+         if (st->vbr_max>0)
+         {
+            spx_int32_t rate;
+            speex_encoder_ctl(state, SPEEX_GET_BITRATE, &rate);
+            if (rate > st->vbr_max)
+            {
+               rate = st->vbr_max;
+               speex_encoder_ctl(state, SPEEX_SET_BITRATE, &rate);
+            }
+         }
          if (st->abr_enabled)
+         {
 …
       st->bounded_pitch = 1;
+      /* Final signal synthesis from excitation */
+      iir_mem2(st->exc, st->interp_qlpc, st->frame, st->frameSize, st->lpcSize, st->mem_sp);
+#ifdef RESYNTH
+      for (i=0;i<st->frameSize;i++)
+         in[i]=st->frame[i];
+#endif
+      speex_move(st->winBuf, in+2*st->frameSize-st->windowSize, (st->windowSize-st->frameSize)*sizeof(spx_word16_t));
+      /* Clear memory (no need to really compute it) */
+      for (i=0;i<st->lpcSize;i++)
+         st->mem_sp[i] = 0;
       return 0;
 …
+   {
       for (i=0;i<st->lpcSize;i++)
          st->old_lsp[i] = st->lsp[i];
+         st->old_lsp[i] = lsp[i];
+   }
 …
    /*Quantize LSPs*/
 #if 1 /*0 for unquantized*/
    SUBMODE(lsp_quant)(st->lsp, st->qlsp, st->lpcSize, bits);
+   SUBMODE(lsp_quant)(lsp, qlsp, st->lpcSize, bits);
 #else
    for (i=0;i<st->lpcSize;i++)
      st->qlsp[i]=st->lsp[i];
+     qlsp[i]=lsp[i];
 #endif
 …
+   {
       for (i=0;i<st->lpcSize;i++)
+         st->old_qlsp[i] = st->qlsp[i];
+   }
+   /* Filter response */
+   ALLOC(res, st->subframeSize, spx_sig_t);
+         st->old_qlsp[i] = qlsp[i];
+   }
    /* Target signal */
+   ALLOC(target, st->subframeSize, spx_sig_t);
+   ALLOC(target, st->subframeSize, spx_word16_t);
+   ALLOC(innov, st->subframeSize, spx_sig_t);
+   ALLOC(exc32, st->subframeSize, spx_word32_t);
+   ALLOC(ringing, st->subframeSize, spx_word16_t);
    ALLOC(syn_resp, st->subframeSize, spx_word16_t);
    ALLOC(real_exc, st->subframeSize, spx_sig_t);
+   ALLOC(real_exc, st->subframeSize, spx_word16_t);
    ALLOC(mem, st->lpcSize, spx_mem_t);
 …
+   {
       int   offset;
+      spx_sig_t *sp, *sw, *exc;
+      spx_word16_t *sw;
+      spx_word16_t *exc;
+      spx_sig_t *innov_save = NULL;
       int pitch;
       int response_bound = st->subframeSize;
 …
       /* Offset relative to start of frame */
       offset = st->subframeSize*sub;
-      /* Original signal */
-      sp=st->frame+offset;
       /* Excitation */
       exc=st->exc+offset;
       /* Weighted signal */
       sw=st->sw+offset;
+      /* Pointer for saving innovation */
+      if (st->innov_save)
+         innov_save = st->innov_save+offset;
       /* LSP interpolation (quantized and unquantized) */
       lsp_interpolate(st->old_lsp, st->lsp, st->interp_lsp, st->lpcSize, sub, st->nbSubframes);
       lsp_interpolate(st->old_qlsp, st->qlsp, st->interp_qlsp, st->lpcSize, sub, st->nbSubframes);
+      lsp_interpolate(st->old_lsp, lsp, interp_lsp, st->lpcSize, sub, st->nbSubframes);
+      lsp_interpolate(st->old_qlsp, qlsp, interp_qlsp, st->lpcSize, sub, st->nbSubframes);
       /* Make sure the filters are stable */
       lsp_enforce_margin(st->interp_lsp, st->lpcSize, LSP_MARGIN);
       lsp_enforce_margin(st->interp_qlsp, st->lpcSize, LSP_MARGIN);
+      lsp_enforce_margin(interp_lsp, st->lpcSize, LSP_MARGIN);
+      lsp_enforce_margin(interp_qlsp, st->lpcSize, LSP_MARGIN);
       /* Compute interpolated LPCs (quantized and unquantized) */
       lsp_to_lpc(st->interp_lsp, st->interp_lpc, st->lpcSize,stack);
       lsp_to_lpc(st->interp_qlsp, st->interp_qlpc, st->lpcSize, stack);
+      lsp_to_lpc(interp_lsp, interp_lpc, st->lpcSize,stack);
+      lsp_to_lpc(interp_qlsp, interp_qlpc, st->lpcSize, stack);
       /* Compute analysis filter gain at w=pi (for use in SB-CELP) */
 …
+         {
             /*pi_g += -st->interp_qlpc[i] +  st->interp_qlpc[i+1];*/
             pi_g = ADD32(pi_g, SUB32(st->interp_qlpc[i+1],st->interp_qlpc[i]));
+            pi_g = ADD32(pi_g, SUB32(EXTEND32(interp_qlpc[i+1]),EXTEND32(interp_qlpc[i])));
+         }
          st->pi_gain[sub] = pi_g;
 …
          for (i=0;i<128;i++)
             curr_curve[i] = (1.0f-fact)*st->old_curve[i] + fact*st->curve[i];
          curve_to_lpc(st->psy, curr_curve, st->bw_lpc1, st->bw_lpc2, 10);
+         curve_to_lpc(st->psy, curr_curve, bw_lpc1, bw_lpc2, 10);
+      }
 #else
       /* Compute bandwidth-expanded (unquantized) LPCs for perceptual weighting */
       bw_lpc(st->gamma1, st->interp_lpc, st->bw_lpc1, st->lpcSize);
+      bw_lpc(st->gamma1, interp_lpc, bw_lpc1, st->lpcSize);
       if (st->gamma2>=0)
          bw_lpc(st->gamma2, st->interp_lpc, st->bw_lpc2, st->lpcSize);
+         bw_lpc(st->gamma2, interp_lpc, bw_lpc2, st->lpcSize);
       else
+      {
          st->bw_lpc2[0]=1;
+         bw_lpc2[0]=1;
          for (i=1;i<=st->lpcSize;i++)
             st->bw_lpc2[i]=0;
+            bw_lpc2[i]=0;
+      }
       /*print_vec(st->bw_lpc1, 10, "bw_lpc");*/
 #endif
+      for (i=0;i<st->subframeSize;i++)
+         real_exc[i] = exc[i];
+      {
+         /*FIXME: This will break if we change the window size */
+         if (st->windowSize-st->frameSize != st->subframeSize)
+            speex_error("windowSize-frameSize != subframeSize");
+         if (sub==0)
+         {
+            for (i=0;i<st->subframeSize;i++)
+               real_exc[i] = sw[i] = st->winBuf[i];
+         } else {
+            for (i=0;i<st->subframeSize;i++)
+               real_exc[i] = sw[i] = in[i+((sub-1)*st->subframeSize)];
+         }
+      }
+      fir_mem16(real_exc, interp_qlpc, real_exc, st->subframeSize, st->lpcSize, st->mem_exc2, stack);
       if (st->complexity==0)
          response_bound >>= 1;
       compute_impulse_response(st->interp_qlpc, st->bw_lpc1, st->bw_lpc2, syn_resp, response_bound, st->lpcSize, stack);
+      compute_impulse_response(interp_qlpc, bw_lpc1, bw_lpc2, syn_resp, response_bound, st->lpcSize, stack);
       for (i=response_bound;i<st->subframeSize;i++)
          syn_resp[i]=VERY_SMALL;
-      /* Reset excitation */
-      for (i=0;i<st->subframeSize;i++)
-         exc[i]=VERY_SMALL;
       /* Compute zero response of A(z/g1) / ( A(z/g2) * A(z) ) */
       for (i=0;i<st->lpcSize;i++)
+         mem[i]=st->mem_sp[i];
+         mem[i]=SHL32(st->mem_sp[i],1);
+      for (i=0;i<st->subframeSize;i++)
+         ringing[i] = VERY_SMALL;
 #ifdef SHORTCUTS2
       iir_mem2(exc, st->interp_qlpc, exc, response_bound, st->lpcSize, mem);
+      iir_mem16(ringing, interp_qlpc, ringing, response_bound, st->lpcSize, mem, stack);
       for (i=0;i<st->lpcSize;i++)
          mem[i]=st->mem_sw[i];
       filter_mem2(exc, st->bw_lpc1, st->bw_lpc2, res, response_bound, st->lpcSize, mem);
+         mem[i]=SHL32(st->mem_sw[i],1);
+      filter_mem16(ringing, st->bw_lpc1, st->bw_lpc2, ringing, response_bound, st->lpcSize, mem, stack);
       for (i=response_bound;i<st->subframeSize;i++)
          res[i]=0;
+         ringing[i]=0;
 #else
       iir_mem2(exc, st->interp_qlpc, exc, st->subframeSize, st->lpcSize, mem);
+      iir_mem16(ringing, interp_qlpc, ringing, st->subframeSize, st->lpcSize, mem, stack);
       for (i=0;i<st->lpcSize;i++)
          mem[i]=st->mem_sw[i];
       filter_mem2(exc, st->bw_lpc1, st->bw_lpc2, res, st->subframeSize, st->lpcSize, mem);
+         mem[i]=SHL32(st->mem_sw[i],1);
+      filter_mem16(ringing, bw_lpc1, bw_lpc2, ringing, st->subframeSize, st->lpcSize, mem, stack);
 #endif
 …
       for (i=0;i<st->lpcSize;i++)
          mem[i]=st->mem_sw[i];
       filter_mem2(sp, st->bw_lpc1, st->bw_lpc2, sw, st->subframeSize, st->lpcSize, mem);
+      filter_mem16(sw, bw_lpc1, bw_lpc2, sw, st->subframeSize, st->lpcSize, mem, stack);
       if (st->complexity==0)
 …
       /* Compute target signal */
       for (i=0;i<st->subframeSize;i++)
+         target[i]=sw[i]-res[i];
+         target[i]=SUB16(sw[i],PSHR32(ringing[i],1));
+      /* Reset excitation */
       for (i=0;i<st->subframeSize;i++)
          exc[i]=0;
 …
          if (st->lbr_48k)
+         {
             pitch = SUBMODE(ltp_quant)(target, sw, st->interp_qlpc, st->bw_lpc1, st->bw_lpc2,
                                        exc, SUBMODE(ltp_params), pit_min, pit_max, ol_pitch_coef,
+            pitch = SUBMODE(ltp_quant)(target, sw, interp_qlpc, bw_lpc1, bw_lpc2,
+                                       exc32, SUBMODE(ltp_params), pit_min, pit_max, ol_pitch_coef,
                                        st->lpcSize, st->subframeSize, bits, stack,
                                        exc, syn_resp, st->complexity, ol_pitch_id, st->plc_tuning);
+                                       exc, syn_resp, st->complexity, ol_pitch_id, st->plc_tuning, &st->cumul_gain);
          } else {
 #endif
          /* Perform pitch search */
          pitch = SUBMODE(ltp_quant)(target, sw, st->interp_qlpc, st->bw_lpc1, st->bw_lpc2,
                                     exc, SUBMODE(ltp_params), pit_min, pit_max, ol_pitch_coef,
+         pitch = SUBMODE(ltp_quant)(target, sw, interp_qlpc, bw_lpc1, bw_lpc2,
+                                    exc32, SUBMODE(ltp_params), pit_min, pit_max, ol_pitch_coef,
                                     st->lpcSize, st->subframeSize, bits, stack,
                                     exc, syn_resp, st->complexity, 0, st->plc_tuning);
+                                    exc, syn_resp, st->complexity, 0, st->plc_tuning, &st->cumul_gain);
 #ifdef EPIC_48K
+         }
 …
       /* Quantization of innovation */
+      {
-         spx_sig_t *innov;
          spx_word32_t ener=0;
          spx_word16_t fine_gain;
-         innov = st->innov+sub*st->subframeSize;
          for (i=0;i<st->subframeSize;i++)
             innov[i]=0;
          for (i=0;i<st->subframeSize;i++)
             real_exc[i] = SUB32(real_exc[i], exc[i]);
          ener = SHL32(EXTEND32(compute_rms(real_exc, st->subframeSize)),SIG_SHIFT);
+            real_exc[i] = SUB16(real_exc[i], PSHR32(exc32[i],SIG_SHIFT-1));
+         ener = SHL32(EXTEND32(compute_rms16(real_exc, st->subframeSize)),SIG_SHIFT);
          /*FIXME: Should use DIV32_16 and make sure result fits in 16 bits */
 #ifdef FIXED_POINT
+         {
             spx_word32_t f = DIV32(ener,PSHR32(ol_gain,SIG_SHIFT));
+            spx_word32_t f = PDIV32(ener,PSHR32(ol_gain,SIG_SHIFT));
             if (f<=32767)
                fine_gain = f;
 …
+         }
 #else
          fine_gain = DIV32_16(ener,PSHR32(ol_gain,SIG_SHIFT));
+         fine_gain = PDIV32_16(ener,PSHR32(ol_gain,SIG_SHIFT));
 #endif
          /* Calculate gain correction for the sub-frame (if any) */
 …
+         {
             /* Codebook search */
             SUBMODE(innovation_quant)(target, st->interp_qlpc, st->bw_lpc1, st->bw_lpc2,
+            SUBMODE(innovation_quant)(target, interp_qlpc, bw_lpc1, bw_lpc2,
                                       SUBMODE(innovation_params), st->lpcSize, st->subframeSize,
                                       innov, syn_resp, bits, stack, st->complexity, SUBMODE(double_codebook));
 …
             for (i=0;i<st->subframeSize;i++)
                exc[i] = ADD32(exc[i],innov[i]);
+               exc[i] = EXTRACT16(PSHR32(ADD32(SHL32(exc32[i],1),innov[i]),SIG_SHIFT));
          } else {
             speex_error("No fixed codebook");
+         }
+         if (innov_save)
+         {
+            for (i=0;i<st->subframeSize;i++)
+               innov_save[i] = innov[i];
+         }
          /* In some (rare) modes, we do a second search (more bits) to reduce noise even more */
          if (SUBMODE(double_codebook)) {
 …
                innov2[i]=0;
             for (i=0;i<st->subframeSize;i++)
                target[i]*=2.2;
             SUBMODE(innovation_quant)(target, st->interp_qlpc, st->bw_lpc1, st->bw_lpc2,
+               target[i]=MULT16_16_P13(QCONST16(2.2,13), target[i]);
+            SUBMODE(innovation_quant)(target, interp_qlpc, bw_lpc1, bw_lpc2,
                                       SUBMODE(innovation_params), st->lpcSize, st->subframeSize,
                                       innov2, syn_resp, bits, stack, st->complexity, 0);
             signal_mul(innov2, innov2, (spx_word32_t) (ener*(1.f/2.2f)), st->subframeSize);
+            signal_mul(innov2, innov2, MULT16_32_Q15(QCONST16(0.454545,15),ener), st->subframeSize);
             for (i=0;i<st->subframeSize;i++)
+               exc[i] = ADD32(exc[i],innov2[i]);
+               exc[i] = ADD32(exc[i],PSHR32(innov2[i],SIG_SHIFT));
+            if (innov_save)
+            {
+               for (i=0;i<st->subframeSize;i++)
+                  innov_save[i] = ADD32(innov_save[i],innov2[i]);
+            }
             stack = tmp_stack;
+         }
 …
+      }
+      for (i=0;i<st->subframeSize;i++)
+         sw[i] = exc[i];
       /* Final signal synthesis from excitation */
       iir_mem2(exc, st->interp_qlpc, sp, st->subframeSize, st->lpcSize, st->mem_sp);
+      iir_mem16(sw, interp_qlpc, sw, st->subframeSize, st->lpcSize, st->mem_sp, stack);
       /* Compute weighted signal again, from synthesized speech (not sure it's the right thing) */
       if (st->complexity!=0)
          filter_mem2(sp, st->bw_lpc1, st->bw_lpc2, sw, st->subframeSize, st->lpcSize, st->mem_sw);
+         filter_mem16(sw, bw_lpc1, bw_lpc2, sw, st->subframeSize, st->lpcSize, st->mem_sw, stack);
+   }
 …
+   {
       for (i=0;i<st->lpcSize;i++)
          st->old_lsp[i] = st->lsp[i];
+         st->old_lsp[i] = lsp[i];
       for (i=0;i<st->lpcSize;i++)
          st->old_qlsp[i] = st->qlsp[i];
+         st->old_qlsp[i] = qlsp[i];
+   }
 …
    /* The next frame will not be the first (Duh!) */
    st->first = 0;
+#ifdef RESYNTH
+   /* Replace input by synthesized speech */
+   for (i=0;i<st->frameSize;i++)
+   {
+      spx_word32_t sig = PSHR32(st->frame[i],SIG_SHIFT);
+      if (sig>32767)
+         sig = 32767;
+      if (sig<-32767)
+         sig = -32767;
+     in[i]=sig;
+   }
+#endif
+   speex_move(st->winBuf, in+2*st->frameSize-st->windowSize, (st->windowSize-st->frameSize)*sizeof(spx_word16_t));
    if (SUBMODE(innovation_quant) == noise_codebook_quant || st->submodeID==0)
 …
    return 1;
+}
 void *nb_decoder_init(const SpeexMode *m)
 …
    st->submodeID=mode->defaultSubmode;
+   st->lpc_enh_enabled=0;
+   st->inBuf = speex_alloc((st->frameSize)*sizeof(spx_sig_t));
+   st->frame = st->inBuf;
+   st->excBuf = speex_alloc((st->frameSize + st->max_pitch + 1)*sizeof(spx_sig_t));
+   st->exc = st->excBuf + st->max_pitch + 1;
+   for (i=0;i<st->frameSize;i++)
+      st->inBuf[i]=0;
+   st->lpc_enh_enabled=1;
+   st->excBuf = speex_alloc((st->frameSize + 2*st->max_pitch + st->subframeSize + 12)*sizeof(spx_word16_t));
+   st->exc = st->excBuf + 2*st->max_pitch + st->subframeSize + 6;
    for (i=0;i<st->frameSize + st->max_pitch + 1;i++)
       st->excBuf[i]=0;
-   st->innov = speex_alloc((st->frameSize)*sizeof(spx_sig_t));
    st->interp_qlpc = speex_alloc(st->lpcSize*sizeof(spx_coef_t));
-   st->qlsp = speex_alloc(st->lpcSize*sizeof(spx_lsp_t));
    st->old_qlsp = speex_alloc(st->lpcSize*sizeof(spx_lsp_t));
+   st->interp_qlsp = speex_alloc(st->lpcSize*sizeof(spx_lsp_t));
+   st->mem_sp = speex_alloc((5*st->lpcSize)*sizeof(spx_mem_t));
+   st->comb_mem = speex_alloc(sizeof(CombFilterMem));
+   comb_filter_mem_init (st->comb_mem);
+   st->mem_sp = speex_alloc(st->lpcSize*sizeof(spx_mem_t));
    st->pi_gain = speex_alloc((st->nbSubframes)*sizeof(spx_word32_t));
    st->last_pitch = 40;
 …
 #endif
-   speex_free (st->inBuf);
    speex_free (st->excBuf);
-   speex_free (st->innov);
    speex_free (st->interp_qlpc);
-   speex_free (st->qlsp);
    speex_free (st->old_qlsp);
-   speex_free (st->interp_qlsp);
    speex_free (st->mem_sp);
-   speex_free (st->comb_mem);
    speex_free (st->pi_gain);
 …
    int i, sub;
    int pitch_val;
-   VARDECL(spx_coef_t *awk1);
-   VARDECL(spx_coef_t *awk2);
-   VARDECL(spx_coef_t *awk3);
    spx_word16_t pitch_gain;
    spx_word16_t fact;
 …
    /* Shift all buffers by one frame */
+   /*speex_move(st->inBuf, st->inBuf+st->frameSize, (st->bufSize-st->frameSize)*sizeof(spx_sig_t));*/
+   speex_move(st->excBuf, st->excBuf+st->frameSize, (st->max_pitch + 1)*sizeof(spx_sig_t));
+   ALLOC(awk1, (st->lpcSize+1), spx_coef_t);
+   ALLOC(awk2, (st->lpcSize+1), spx_coef_t);
+   ALLOC(awk3, (st->lpcSize+1), spx_coef_t);
+   speex_move(st->excBuf, st->excBuf+st->frameSize, (2*st->max_pitch + st->subframeSize + 12)*sizeof(spx_word16_t));
    for (sub=0;sub<st->nbSubframes;sub++)
+   {
       int offset;
+      spx_sig_t *sp, *exc;
+      spx_word16_t *sp;
+      spx_word16_t *exc;
       /* Offset relative to start of frame */
       offset = st->subframeSize*sub;
       /* Original signal */
       sp=st->frame+offset;
+      sp=out+offset;
       /* Excitation */
       exc=st->exc+offset;
       /* Excitation after post-filter*/
-      /* Calculate perceptually enhanced LPC filter */
-      if (st->lpc_enh_enabled)
+      {
-         spx_word16_t k1,k2,k3;
-         if (st->submodes[st->submodeID] != NULL)
+         {
-            k1=SUBMODE(lpc_enh_k1);
-            k2=SUBMODE(lpc_enh_k2);
-            k3=SUBMODE(lpc_enh_k3);
-         } else {
-            k1=k2=.7*GAMMA_SCALING;
-            k3=.0;
+         }
-         bw_lpc(k1, st->interp_qlpc, awk1, st->lpcSize);
-         bw_lpc(k2, st->interp_qlpc, awk2, st->lpcSize);
-         bw_lpc(k3, st->interp_qlpc, awk3, st->lpcSize);
+      }
       /* Make up a plausible excitation */
 …
       /*if (pitch_gain>.95)
         pitch_gain=.95;*/
+      innov_gain = compute_rms(st->innov, st->frameSize);
+      /* FIXME: This was rms of innovation (not exc) */
+      innov_gain = compute_rms16(st->exc, st->frameSize);
       pitch_val = st->last_pitch + SHR32((spx_int32_t)speex_rand(1+st->count_lost, &st->seed),SIG_SHIFT);
       if (pitch_val > st->max_pitch)
 …
       for (i=0;i<st->subframeSize;i++)
+      {
          exc[i]= MULT16_32_Q15(pitch_gain, (exc[i-pitch_val]+VERY_SMALL)) +
                MULT16_32_Q15(fact, MULT16_32_Q15(SHL(Q15ONE,15)-SHL(MULT16_16(pitch_gain,pitch_gain),1),speex_rand(innov_gain, &st->seed)));
+      }
+         /* FIXME: Second term need to be 16-bit */
+         exc[i]= MULT16_16_Q15(pitch_gain, (exc[i-pitch_val]+VERY_SMALL)) +
+               MULT16_16_Q15(fact, MULT16_16_Q15(SHL(Q15ONE,15)-SHL(MULT16_16(pitch_gain,pitch_gain),1),speex_rand(innov_gain, &st->seed)));
+      }
       for (i=0;i<st->subframeSize;i++)
+         sp[i]=exc[i];
+      /* Signal synthesis */
+      if (st->lpc_enh_enabled)
+      {
+         filter_mem2(sp, awk2, awk1, sp, st->subframeSize, st->lpcSize,
+                     st->mem_sp+st->lpcSize);
+         filter_mem2(sp, awk3, st->interp_qlpc, sp, st->subframeSize, st->lpcSize,
+                     st->mem_sp);
+      } else {
+         for (i=0;i<st->lpcSize;i++)
+            st->mem_sp[st->lpcSize+i] = 0;
+         iir_mem2(sp, st->interp_qlpc, sp, st->subframeSize, st->lpcSize,
+                     st->mem_sp);
+      }
+   }
+   for (i=0;i<st->frameSize;i++)
+   {
+      spx_word32_t sig = PSHR32(st->frame[i],SIG_SHIFT);
+      if (sig>32767)
+         sig = 32767;
+      if (sig<-32767)
+         sig = -32767;
+     out[i]=sig;
+         sp[i]=exc[i-st->subframeSize];
+      iir_mem16(sp, st->interp_qlpc, sp, st->subframeSize, st->lpcSize,
+                st->mem_sp, stack);
+      bw_lpc(QCONST16(.98,15), st->interp_qlpc, st->interp_qlpc, st->lpcSize);
+   }
 …
       st->pitch_gain_buf_idx = 0;
+}
 int nb_decode(void *state, SpeexBits *bits, void *vout)
 …
    int m;
    char *stack;
+   VARDECL(spx_coef_t *awk1);
+   VARDECL(spx_coef_t *awk2);
+   VARDECL(spx_coef_t *awk3);
+   VARDECL(spx_sig_t *innov);
+   VARDECL(spx_word32_t *exc32);
+   VARDECL(spx_coef_t *ak);
+   VARDECL(spx_lsp_t *qlsp);
    spx_word16_t pitch_average=0;
 #ifdef EPIC_48K
 …
 #endif
    spx_word16_t *out = vout;
+   VARDECL(spx_lsp_t *interp_qlsp);
    st=(DecState*)state;
 …
    /* Shift all buffers by one frame */
    speex_move(st->excBuf, st->excBuf+st->frameSize, (st->max_pitch + 1)*sizeof(spx_sig_t));
+   speex_move(st->excBuf, st->excBuf+st->frameSize, (2*st->max_pitch + st->subframeSize + 12)*sizeof(spx_word16_t));
    /* If null mode (no transmission), just set a couple things to zero*/
 …
          if (pgain>.6)
             pgain=.6;
+         innov_gain = compute_rms(st->innov, st->frameSize);
+         /* FIXME: This was innov, not exc */
+         innov_gain = compute_rms16(st->exc, st->frameSize);
          for (i=0;i<st->frameSize;i++)
+            st->exc[i]=VERY_SMALL;
+         speex_rand_vec(innov_gain, st->exc, st->frameSize);
+            st->exc[i]=speex_rand(innov_gain, &st->seed);
+      }
 …
       st->first=1;
+      for (i=0;i<st->frameSize;i++)
+         out[i] = st->exc[i];
       /* Final signal synthesis from excitation */
+      iir_mem2(st->exc, lpc, st->frame, st->frameSize, st->lpcSize, st->mem_sp);
+      for (i=0;i<st->frameSize;i++)
+      {
+         spx_word32_t sig = PSHR32(st->frame[i],SIG_SHIFT);
+         if (sig>32767)
+            sig = 32767;
+         if (sig<-32767)
+            sig = -32767;
+         out[i]=sig;
+      }
+      iir_mem16(out, lpc, out, st->frameSize, st->lpcSize, st->mem_sp, stack);
       st->count_lost=0;
 …
+   }
+   ALLOC(qlsp, st->lpcSize, spx_lsp_t);
    /* Unquantize LSPs */
    SUBMODE(lsp_unquant)(st->qlsp, st->lpcSize, bits);
+   SUBMODE(lsp_unquant)(qlsp, st->lpcSize, bits);
    /*Damp memory if a frame was lost and the LSP changed too much*/
 …
       spx_word32_t lsp_dist=0;
       for (i=0;i<st->lpcSize;i++)
          lsp_dist = ADD32(lsp_dist, EXTEND32(ABS(st->old_qlsp[i] - st->qlsp[i])));
+         lsp_dist = ADD32(lsp_dist, EXTEND32(ABS(st->old_qlsp[i] - qlsp[i])));
 #ifdef FIXED_POINT
       fact = SHR16(19661,SHR32(lsp_dist,LSP_SHIFT+2));
 …
       fact = .6*exp(-.2*lsp_dist);
 #endif
       for (i=0;i<2*st->lpcSize;i++)
+      for (i=0;i<st->lpcSize;i++)
          st->mem_sp[i] = MULT16_32_Q15(fact,st->mem_sp[i]);
+   }
 …
+   {
       for (i=0;i<st->lpcSize;i++)
          st->old_qlsp[i] = st->qlsp[i];
+         st->old_qlsp[i] = qlsp[i];
+   }
 …
 #endif
    ALLOC(awk1, st->lpcSize+1, spx_coef_t);
    ALLOC(awk2, st->lpcSize+1, spx_coef_t);
    ALLOC(awk3, st->lpcSize+1, spx_coef_t);
+   ALLOC(ak, st->lpcSize, spx_coef_t);
+   ALLOC(innov, st->subframeSize, spx_sig_t);
+   ALLOC(exc32, st->subframeSize, spx_word32_t);
    if (st->submodeID==1)
 …
+   {
       int offset;
+      spx_sig_t *sp, *exc;
+      spx_word16_t *exc;
+      spx_word16_t *sp;
+      spx_sig_t *innov_save = NULL;
       spx_word16_t tmp;
 …
       /* Offset relative to start of frame */
       offset = st->subframeSize*sub;
-      /* Original signal */
-      sp=st->frame+offset;
       /* Excitation */
       exc=st->exc+offset;
+      /* Excitation after post-filter*/
+      /* LSP interpolation (quantized and unquantized) */
+      lsp_interpolate(st->old_qlsp, st->qlsp, st->interp_qlsp, st->lpcSize, sub, st->nbSubframes);
+      /* Make sure the LSP's are stable */
+      lsp_enforce_margin(st->interp_qlsp, st->lpcSize, LSP_MARGIN);
+      /* Compute interpolated LPCs (unquantized) */
+      lsp_to_lpc(st->interp_qlsp, st->interp_qlpc, st->lpcSize, stack);
+      /* Compute enhanced synthesis filter */
+      if (st->lpc_enh_enabled)
+      {
+         bw_lpc(SUBMODE(lpc_enh_k1), st->interp_qlpc, awk1, st->lpcSize);
+         bw_lpc(SUBMODE(lpc_enh_k2), st->interp_qlpc, awk2, st->lpcSize);
+         bw_lpc(SUBMODE(lpc_enh_k3), st->interp_qlpc, awk3, st->lpcSize);
+      }
+      /* Compute analysis filter at w=pi */
+      {
+         spx_word32_t pi_g=LPC_SCALING;
+         for (i=0;i<st->lpcSize;i+=2)
+         {
+            /*pi_g += -st->interp_qlpc[i] +  st->interp_qlpc[i+1];*/
+            pi_g = ADD32(pi_g, SUB32(st->interp_qlpc[i+1],st->interp_qlpc[i]));
+         }
+         st->pi_gain[sub] = pi_g;
+      }
+      /* Original signal */
+      sp=out+offset;
+      if (st->innov_save)
+         innov_save = st->innov_save+offset;
       /* Reset excitation */
 …
          if (st->lbr_48k)
+         {
              SUBMODE(ltp_unquant)(exc, pit_min, pit_max, ol_pitch_coef, SUBMODE(ltp_params),
+             SUBMODE(ltp_unquant)(exc, exc32, pit_min, pit_max, ol_pitch_coef, SUBMODE(ltp_params),
                                   st->subframeSize, &pitch, &pitch_gain[0], bits, stack,
                                   st->count_lost, offset, st->last_pitch_gain, ol_pitch_id);
 …
 #endif
              SUBMODE(ltp_unquant)(exc, pit_min, pit_max, ol_pitch_coef, SUBMODE(ltp_params),
+             SUBMODE(ltp_unquant)(exc, exc32, pit_min, pit_max, ol_pitch_coef, SUBMODE(ltp_params),
                                   st->subframeSize, &pitch, &pitch_gain[0], bits, stack,
                                   st->count_lost, offset, st->last_pitch_gain, 0);
 …
 #endif
-         /* If we had lost frames, check energy of last received frame */
-         if (st->count_lost && ol_gain < st->last_ol_gain)
+         {
-            /*float fact = (float)ol_gain/(st->last_ol_gain+1);
-            for (i=0;i<st->subframeSize;i++)
-            exc[i]*=fact;*/
-            spx_word16_t fact = DIV32_16(SHL32(EXTEND32(ol_gain),15),st->last_ol_gain+1);
-            for (i=0;i<st->subframeSize;i++)
-               exc[i] = MULT16_32_Q15(fact, exc[i]);
+         }
          tmp = gain_3tap_to_1tap(pitch_gain);
          pitch_average += tmp;
+         if (tmp>best_pitch_gain)
+         if ((tmp>best_pitch_gain&&ABS(2*best_pitch-pitch)>=3&&ABS(3*best_pitch-pitch)>=4&&ABS(4*best_pitch-pitch)>=5)
+              || (tmp>MULT16_16_Q15(QCONST16(.6,15),best_pitch_gain)&&(ABS(best_pitch-2*pitch)<3||ABS(best_pitch-3*pitch)<4||ABS(best_pitch-4*pitch)<5))
+              || (MULT16_16_Q15(QCONST16(.67,15),tmp)>best_pitch_gain&&(ABS(2*best_pitch-pitch)<3||ABS(3*best_pitch-pitch)<4||ABS(4*best_pitch-pitch)<5)) )
+         {
             best_pitch = pitch;
+            best_pitch_gain = tmp;
+            if (tmp > best_pitch_gain)
+               best_pitch_gain = tmp;
+         }
       } else {
 …
          int q_energy;
          spx_word32_t ener;
-         spx_sig_t *innov;
-         innov = st->innov+sub*st->subframeSize;
          for (i=0;i<st->subframeSize;i++)
             innov[i]=0;
 …
+            {
                if (st->voc_offset>=0)
                   exc[st->voc_offset]=SIG_SCALING*sqrt(1.0*ol_pitch);
+                  exc[st->voc_offset]=sqrt(1.0*ol_pitch);
                st->voc_offset+=ol_pitch;
+            }
 …
             for (i=0;i<st->subframeSize;i++)
+            {
+               float exci=exc[i];
+               exc[i]=.8*g*exc[i]*ol_gain/SIG_SCALING + .6*g*st->voc_m1*ol_gain/SIG_SCALING + .5*g*innov[i] - .5*g*st->voc_m2 + (1-g)*innov[i];
+               spx_word16_t exci=exc[i];
+               /* FIXME: cleanup the innov[i]/SIG_SCALING */
+               exc[i]=.8*g*exc[i]*PSHR32(ol_gain,SIG_SHIFT) + .6*g*st->voc_m1*PSHR32(ol_gain,SIG_SHIFT) + (1-.5*g)*PSHR32(innov[i],SIG_SHIFT) - .5*g*PSHR32(st->voc_m2,SIG_SHIFT);
                st->voc_m1 = exci;
                st->voc_m2=innov[i];
 …
          } else {
             for (i=0;i<st->subframeSize;i++)
                exc[i]=ADD32(exc[i],innov[i]);
+               exc[i]=PSHR32(ADD32(SHL32(exc32[i],1),innov[i]),SIG_SHIFT);
             /*print_vec(exc, 40, "innov");*/
+         }
+         if (innov_save)
+         {
+            for (i=0;i<st->subframeSize;i++)
+               innov_save[i] = innov[i];
+         }
          /* Decode second codebook (only for some modes) */
 …
                innov2[i]=0;
             SUBMODE(innovation_unquant)(innov2, SUBMODE(innovation_params), st->subframeSize, bits, stack);
             signal_mul(innov2, innov2, (spx_word32_t) (ener*(1/2.2)), st->subframeSize);
+            signal_mul(innov2, innov2, MULT16_32_Q15(QCONST16(0.454545,15),ener), st->subframeSize);
             for (i=0;i<st->subframeSize;i++)
+               exc[i] = ADD32(exc[i],innov2[i]);
+               exc[i] = ADD16(exc[i],PSHR32(innov2[i],SIG_SHIFT));
+            if (innov_save)
+            {
+               for (i=0;i<st->subframeSize;i++)
+                  innov_save[i] = ADD32(innov_save[i],innov2[i]);
+            }
             stack = tmp_stack;
+         }
+      }
+      /* If the last packet was lost, re-scale the excitation to obtain the same energy as encoded in ol_gain */
+      if (st->count_lost)
+      {
+         spx_word16_t exc_ener;
+         spx_word32_t gain32;
+         spx_word16_t gain;
+         exc_ener = compute_rms (exc, st->subframeSize);
+         gain32 = DIV32(ol_gain, ADD16(exc_ener,1));
+      }
+   }
+   ALLOC(interp_qlsp, st->lpcSize, spx_lsp_t);
+   if (st->lpc_enh_enabled && SUBMODE(comb_gain)>0 && !st->count_lost)
+   {
+      multicomb(st->exc-st->subframeSize, out, st->interp_qlpc, st->lpcSize, 2*st->subframeSize, best_pitch, 40, SUBMODE(comb_gain), stack);
+      multicomb(st->exc+st->subframeSize, out+2*st->subframeSize, st->interp_qlpc, st->lpcSize, 2*st->subframeSize, best_pitch, 40, SUBMODE(comb_gain), stack);
+   } else {
+      for (i=0;i<st->frameSize;i++)
+         out[i]=st->exc[i-st->subframeSize];
+   }
+   /* If the last packet was lost, re-scale the excitation to obtain the same energy as encoded in ol_gain */
+   if (st->count_lost)
+   {
+      spx_word16_t exc_ener;
+      spx_word32_t gain32;
+      spx_word16_t gain;
+      exc_ener = compute_rms16 (st->exc, st->frameSize);
+      gain32 = PDIV32(ol_gain, ADD16(exc_ener,1));
 #ifdef FIXED_POINT
          if (gain32 > 32768)
             gain32 = 32768;
          gain = EXTRACT16(gain32);
+      if (gain32 > 32768)
+         gain32 = 32768;
+      gain = EXTRACT16(gain32);
 #else
+         if (gain32 > 2)
+            gain32=2;
+         gain = gain32;
+#endif
+         for (i=0;i<st->subframeSize;i++)
+            exc[i] = MULT16_32_Q14(gain, exc[i]);
+      }
+      for (i=0;i<st->subframeSize;i++)
+         sp[i]=exc[i];
+      /* Signal synthesis */
+      if (st->lpc_enh_enabled && SUBMODE(comb_gain)>0)
+         comb_filter(exc, sp, st->interp_qlpc, st->lpcSize, st->subframeSize,
+                              pitch, pitch_gain, SUBMODE(comb_gain), st->comb_mem);
+      if (st->lpc_enh_enabled)
+      {
+         /* Use enhanced LPC filter */
+         filter_mem2(sp, awk2, awk1, sp, st->subframeSize, st->lpcSize,
+                     st->mem_sp+st->lpcSize);
+         filter_mem2(sp, awk3, st->interp_qlpc, sp, st->subframeSize, st->lpcSize,
+                     st->mem_sp);
+      } else {
+         /* Use regular filter */
+         for (i=0;i<st->lpcSize;i++)
+            st->mem_sp[st->lpcSize+i] = 0;
+         iir_mem2(sp, st->interp_qlpc, sp, st->subframeSize, st->lpcSize,
+                     st->mem_sp);
+      }
+   }
+   /*Copy output signal*/
+   for (i=0;i<st->frameSize;i++)
+   {
+      spx_word32_t sig = PSHR32(st->frame[i],SIG_SHIFT);
+      if (sig>32767)
+         sig = 32767;
+      if (sig<-32767)
+         sig = -32767;
+     out[i]=sig;
+      if (gain32 > 2)
+         gain32=2;
+      gain = gain32;
+#endif
+      for (i=0;i<st->frameSize;i++)
+      {
+         st->exc[i] = MULT16_16_Q14(gain, st->exc[i]);
+         out[i]=st->exc[i-st->subframeSize];
+      }
+   }
+   /*Loop on subframes */
+   for (sub=0;sub<st->nbSubframes;sub++)
+   {
+      int offset;
+      spx_word16_t *sp;
+      spx_word16_t *exc;
+      /* Offset relative to start of frame */
+      offset = st->subframeSize*sub;
+      /* Original signal */
+      sp=out+offset;
+      /* Excitation */
+      exc=st->exc+offset;
+      /* LSP interpolation (quantized and unquantized) */
+      lsp_interpolate(st->old_qlsp, qlsp, interp_qlsp, st->lpcSize, sub, st->nbSubframes);
+      /* Make sure the LSP's are stable */
+      lsp_enforce_margin(interp_qlsp, st->lpcSize, LSP_MARGIN);
+      /* Compute interpolated LPCs (unquantized) */
+      lsp_to_lpc(interp_qlsp, ak, st->lpcSize, stack);
+      /* Compute analysis filter at w=pi */
+      {
+         spx_word32_t pi_g=LPC_SCALING;
+         for (i=0;i<st->lpcSize;i+=2)
+         {
+            /*pi_g += -st->interp_qlpc[i] +  st->interp_qlpc[i+1];*/
+            pi_g = ADD32(pi_g, SUB32(EXTEND32(st->interp_qlpc[i+1]),EXTEND32(st->interp_qlpc[i])));
+         }
+         st->pi_gain[sub] = pi_g;
+      }
+      iir_mem16(sp, st->interp_qlpc, sp, st->subframeSize, st->lpcSize,
+                st->mem_sp, stack);
+      for (i=0;i<st->lpcSize;i++)
+         st->interp_qlpc[i] = ak[i];
+   }
 …
    /* Store the LSPs for interpolation in the next frame */
    for (i=0;i<st->lpcSize;i++)
       st->old_qlsp[i] = st->qlsp[i];
+      st->old_qlsp[i] = qlsp[i];
    /* The next frame will not be the first (Duh!) */
 …
       break;
    case SPEEX_SET_ABR:
+      st->abr_enabled = (*(int*)ptr);
+      st->vbr_enabled = 1;
+      {
+         int i=10, rate, target;
+      st->abr_enabled = (*(spx_int32_t*)ptr);
+      st->vbr_enabled = st->abr_enabled!=0;
+      if (st->vbr_enabled)
+      {
+         int i=10;
+         spx_int32_t rate, target;
          float vbr_qual;
          target = (*(int*)ptr);
+         target = (*(spx_int32_t*)ptr);
          while (i>=0)
+         {
 …
       break;
    case SPEEX_GET_ABR:
       (*(int*)ptr) = st->abr_enabled;
+      (*(spx_int32_t*)ptr) = st->abr_enabled;
       break;
    case SPEEX_SET_VBR_QUALITY:
 …
       break;
    case SPEEX_GET_COMPLEXITY:
       (*(int*)ptr) = st->complexity;
+      (*(spx_int32_t*)ptr) = st->complexity;
       break;
    case SPEEX_SET_BITRATE:
+      {
+         int i=10, rate, target;
+         target = (*(int*)ptr);
+         int i=10;
+         spx_int32_t rate, target;
+         target = (*(spx_int32_t*)ptr);
          while (i>=0)
+         {
 …
    case SPEEX_GET_BITRATE:
       if (st->submodes[st->submodeID])
          (*(int*)ptr) = st->sampling_rate*SUBMODE(bits_per_frame)/st->frameSize;
+         (*(spx_int32_t*)ptr) = st->sampling_rate*SUBMODE(bits_per_frame)/st->frameSize;
       else
          (*(int*)ptr) = st->sampling_rate*(NB_SUBMODE_BITS+1)/st->frameSize;
+         (*(spx_int32_t*)ptr) = st->sampling_rate*(NB_SUBMODE_BITS+1)/st->frameSize;
       break;
    case SPEEX_SET_SAMPLING_RATE:
       st->sampling_rate = (*(int*)ptr);
+      st->sampling_rate = (*(spx_int32_t*)ptr);
       break;
    case SPEEX_GET_SAMPLING_RATE:
       (*(int*)ptr)=st->sampling_rate;
+      (*(spx_int32_t*)ptr)=st->sampling_rate;
       break;
    case SPEEX_RESET_STATE:
 …
          st->first = 1;
          for (i=0;i<st->lpcSize;i++)
             st->lsp[i]=(M_PI*((float)(i+1)))/(st->lpcSize+1);
+            st->old_lsp[i]=(M_PI*((float)(i+1)))/(st->lpcSize+1);
          for (i=0;i<st->lpcSize;i++)
             st->mem_sw[i]=st->mem_sw_whole[i]=st->mem_sp[i]=st->mem_exc[i]=0;
          for (i=0;i<st->frameSize+st->max_pitch+1;i++)
             st->excBuf[i]=st->swBuf[i]=0;
          for (i=0;i<st->windowSize;i++)
             st->inBuf[i]=0;
+         for (i=0;i<st->windowSize-st->frameSize;i++)
+            st->winBuf[i]=0;
+      }
       break;
 …
       (*(int*)ptr)=(st->plc_tuning);
       break;
+   case SPEEX_SET_VBR_MAX_BITRATE:
+      st->vbr_max = (*(spx_int32_t*)ptr);
+      break;
+   case SPEEX_GET_VBR_MAX_BITRATE:
+      (*(spx_int32_t*)ptr) = st->vbr_max;
+      break;
+   /* This is all internal stuff past this point */
    case SPEEX_GET_PI_GAIN:
+      {
 …
+      {
          int i;
          spx_sig_t *e = (spx_sig_t*)ptr;
+         spx_word16_t *e = (spx_word16_t*)ptr;
          for (i=0;i<st->frameSize;i++)
             e[i]=st->exc[i];
+      }
       break;
-   case SPEEX_GET_INNOV:
+      {
-         int i;
-         spx_sig_t *e = (spx_sig_t*)ptr;
-         for (i=0;i<st->frameSize;i++)
-            e[i]=st->innov[i];
+      }
-      break;
    case SPEEX_GET_RELATIVE_QUALITY:
       (*(float*)ptr)=st->relative_quality;
+      break;
+   case SPEEX_SET_INNOVATION_SAVE:
+      st->innov_save = ptr;
       break;
    default:
 …
    case SPEEX_GET_BITRATE:
       if (st->submodes[st->submodeID])
          (*(int*)ptr) = st->sampling_rate*SUBMODE(bits_per_frame)/st->frameSize;
+         (*(spx_int32_t*)ptr) = st->sampling_rate*SUBMODE(bits_per_frame)/st->frameSize;
       else
          (*(int*)ptr) = st->sampling_rate*(NB_SUBMODE_BITS+1)/st->frameSize;
+         (*(spx_int32_t*)ptr) = st->sampling_rate*(NB_SUBMODE_BITS+1)/st->frameSize;
       break;
    case SPEEX_SET_SAMPLING_RATE:
       st->sampling_rate = (*(int*)ptr);
+      st->sampling_rate = (*(spx_int32_t*)ptr);
       break;
    case SPEEX_GET_SAMPLING_RATE:
       (*(int*)ptr)=st->sampling_rate;
+      (*(spx_int32_t*)ptr)=st->sampling_rate;
       break;
    case SPEEX_SET_HANDLER:
 …
+      {
          int i;
          for (i=0;i<2*st->lpcSize;i++)
+         for (i=0;i<st->lpcSize;i++)
             st->mem_sp[i]=0;
          for (i=0;i<st->frameSize + st->max_pitch + 1;i++)
             st->excBuf[i]=0;
-         for (i=0;i<st->frameSize;i++)
-            st->inBuf[i] = 0;
+      }
       break;
 …
    case SPEEX_GET_SUBMODE_ENCODING:
       (*(int*)ptr) = st->encode_submode;
+      break;
+   case SPEEX_GET_LOOKAHEAD:
+      (*(int*)ptr)=st->subframeSize;
       break;
    case SPEEX_GET_PI_GAIN:
 …
+      {
          int i;
          spx_sig_t *e = (spx_sig_t*)ptr;
+         spx_word16_t *e = (spx_word16_t*)ptr;
          for (i=0;i<st->frameSize;i++)
             e[i]=st->exc[i];
+      }
       break;
-   case SPEEX_GET_INNOV:
+      {
-         int i;
-         spx_sig_t *e = (spx_sig_t*)ptr;
-         for (i=0;i<st->frameSize;i++)
-            e[i]=st->innov[i];
+      }
-      break;
    case SPEEX_GET_DTX_STATUS:
       *((int*)ptr) = st->dtx_enabled;
+      break;
+   case SPEEX_SET_INNOVATION_SAVE:
+      st->innov_save = ptr;
       break;
    default:

pjproject/trunk/pjmedia/src/pjmedia-codec/speex/nb_celp.h

-                      r278
+                      r628
 /**Structure representing the full state of the narrowband encoder*/
 typedef struct EncState {
    const SpeexMode *mode;       /**< Mode corresponding to the state */
    int    first;          /**< Is this the first frame? */
    int    frameSize;      /**< Size of frames */
    int    subframeSize;   /**< Size of sub-frames */
    int    nbSubframes;    /**< Number of sub-frames */
    int    windowSize;     /**< Analysis (LPC) window length */
    int    lpcSize;        /**< LPC order */
    int    min_pitch;      /**< Minimum pitch value allowed */
    int    max_pitch;      /**< Maximum pitch value allowed */
+   const SpeexMode *mode;        /**< Mode corresponding to the state */
+   int    first;                 /**< Is this the first frame? */
+   int    frameSize;             /**< Size of frames */
+   int    subframeSize;          /**< Size of sub-frames */
+   int    nbSubframes;           /**< Number of sub-frames */
+   int    windowSize;            /**< Analysis (LPC) window length */
+   int    lpcSize;               /**< LPC order */
+   int    min_pitch;             /**< Minimum pitch value allowed */
+   int    max_pitch;             /**< Maximum pitch value allowed */
    int    safe_pitch;     /**< Don't use too large values for pitch (in case we lose a packet) */
    int    bounded_pitch;  /**< Next frame should not rely on previous frames for pitch */
    int    ol_pitch;       /**< Open-loop pitch */
    int    ol_voiced;      /**< Open-loop voiced/non-voiced decision */
+   spx_word32_t cumul_gain;      /**< Product of previously used pitch gains (Q10) */
+   int    bounded_pitch;         /**< Next frame should not rely on previous frames for pitch */
+   int    ol_pitch;              /**< Open-loop pitch */
+   int    ol_voiced;             /**< Open-loop voiced/non-voiced decision */
    int   *pitch;
 …
 #ifdef VORBIS_PSYCHO
    VorbisPsy *psy;
+   float *psy_window;
    float *curve;
    float *old_curve;
 …
    spx_word16_t  gamma1;         /**< Perceptual filter: A(z/gamma1) */
    spx_word16_t  gamma2;         /**< Perceptual filter: A(z/gamma2) */
    float  lag_factor;     /**< Lag windowing Gaussian width */
+   float  lag_factor;            /**< Lag windowing Gaussian width */
    spx_word16_t  lpc_floor;      /**< Noise floor multiplier for A[0] in LPC analysis*/
+   char  *stack;          /**< Pseudo-stack allocation for temporary memory */
+   spx_sig_t *inBuf;          /**< Input buffer (original signal) */
+   spx_sig_t *frame;          /**< Start of original frame */
+   spx_sig_t *excBuf;         /**< Excitation buffer */
+   spx_sig_t *exc;            /**< Start of excitation frame */
+   spx_sig_t *swBuf;          /**< Weighted signal buffer */
+   spx_sig_t *sw;             /**< Start of weighted signal frame */
+   spx_sig_t *innov;          /**< Innovation for the frame */
+   spx_word16_t *window;         /**< Temporary (Hanning) window */
+   spx_word16_t *autocorr;       /**< auto-correlation */
+   char  *stack;                 /**< Pseudo-stack allocation for temporary memory */
+   spx_word16_t *winBuf;         /**< Input buffer (original signal) */
+   spx_word16_t *excBuf;         /**< Excitation buffer */
+   spx_word16_t *exc;            /**< Start of excitation frame */
+   spx_word16_t *swBuf;          /**< Weighted signal buffer */
+   spx_word16_t *sw;             /**< Start of weighted signal frame */
+   const spx_word16_t *window;   /**< Temporary (Hanning) window */
    spx_word16_t *lagWindow;      /**< Window applied to auto-correlation */
+   spx_coef_t *lpc;            /**< LPCs for current frame */
+   spx_lsp_t *lsp;            /**< LSPs for current frame */
+   spx_lsp_t *qlsp;           /**< Quantized LSPs for current frame */
+   spx_lsp_t *old_lsp;        /**< LSPs for previous frame */
+   spx_lsp_t *old_qlsp;       /**< Quantized LSPs for previous frame */
+   spx_lsp_t *interp_lsp;     /**< Interpolated LSPs */
+   spx_lsp_t *interp_qlsp;    /**< Interpolated quantized LSPs */
+   spx_coef_t *interp_lpc;     /**< Interpolated LPCs */
+   spx_coef_t *interp_qlpc;    /**< Interpolated quantized LPCs */
+   spx_coef_t *bw_lpc1;        /**< LPCs after bandwidth expansion by gamma1 for perceptual weighting*/
+   spx_coef_t *bw_lpc2;        /**< LPCs after bandwidth expansion by gamma2 for perceptual weighting*/
+   spx_mem_t *mem_sp;         /**< Filter memory for signal synthesis */
+   spx_mem_t *mem_sw;         /**< Filter memory for perceptually-weighted signal */
+   spx_mem_t *mem_sw_whole;   /**< Filter memory for perceptually-weighted signal (whole frame)*/
+   spx_mem_t *mem_exc;        /**< Filter memory for excitation (whole frame) */
+   spx_lsp_t *old_lsp;           /**< LSPs for previous frame */
+   spx_lsp_t *old_qlsp;          /**< Quantized LSPs for previous frame */
+   spx_mem_t *mem_sp;            /**< Filter memory for signal synthesis */
+   spx_mem_t *mem_sw;            /**< Filter memory for perceptually-weighted signal */
+   spx_mem_t *mem_sw_whole;      /**< Filter memory for perceptually-weighted signal (whole frame)*/
+   spx_mem_t *mem_exc;           /**< Filter memory for excitation (whole frame) */
+   spx_mem_t *mem_exc2;          /**< Filter memory for excitation (whole frame) */
    spx_word32_t *pi_gain;        /**< Gain of LPC filter at theta=pi (fe/2) */
+   VBRState *vbr;         /**< State of the VBR data */
+   float  vbr_quality;    /**< Quality setting for VBR encoding */
+   float  relative_quality; /**< Relative quality that will be needed by VBR */
+   int    vbr_enabled;    /**< 1 for enabling VBR, 0 otherwise */
+   int    vad_enabled;    /**< 1 for enabling VAD, 0 otherwise */
+   int    dtx_enabled;    /**< 1 for enabling DTX, 0 otherwise */
+   int    dtx_count;      /**< Number of consecutive DTX frames */
+   int    abr_enabled;    /**< ABR setting (in bps), 0 if off */
+   spx_sig_t *innov_save;        /**< If non-NULL, innovation is copied here */
+   VBRState *vbr;                /**< State of the VBR data */
+   float  vbr_quality;           /**< Quality setting for VBR encoding */
+   float  relative_quality;      /**< Relative quality that will be needed by VBR */
+   int    vbr_enabled;           /**< 1 for enabling VBR, 0 otherwise */
+   spx_int32_t vbr_max;          /**< Max bit-rate allowed in VBR mode */
+   int    vad_enabled;           /**< 1 for enabling VAD, 0 otherwise */
+   int    dtx_enabled;           /**< 1 for enabling DTX, 0 otherwise */
+   int    dtx_count;             /**< Number of consecutive DTX frames */
+   spx_int32_t abr_enabled;      /**< ABR setting (in bps), 0 if off */
    float  abr_drift;
    float  abr_drift2;
    float  abr_count;
    int    complexity;     /**< Complexity setting (0-10 from least complex to most complex) */
    int    sampling_rate;
+   int    complexity;            /**< Complexity setting (0-10 from least complex to most complex) */
+   spx_int32_t sampling_rate;
    int    plc_tuning;
    int    encode_submode;
    const SpeexSubmode * const *submodes; /**< Sub-mode data */
    int    submodeID;      /**< Activated sub-mode */
    int    submodeSelect;  /**< Mode chosen by the user (may differ from submodeID if VAD is on) */
+   int    submodeID;             /**< Activated sub-mode */
+   int    submodeSelect;         /**< Mode chosen by the user (may differ from submodeID if VAD is on) */
 } EncState;
 …
 typedef struct DecState {
    const SpeexMode *mode;       /**< Mode corresponding to the state */
    int    first;          /**< Is this the first frame? */
    int    count_lost;     /**< Was the last frame lost? */
    int    frameSize;      /**< Size of frames */
    int    subframeSize;   /**< Size of sub-frames */
    int    nbSubframes;    /**< Number of sub-frames */
    int    lpcSize;        /**< LPC order */
    int    min_pitch;      /**< Minimum pitch value allowed */
    int    max_pitch;      /**< Maximum pitch value allowed */
    int    sampling_rate;
+   int    first;                /**< Is this the first frame? */
+   int    count_lost;           /**< Was the last frame lost? */
+   int    frameSize;            /**< Size of frames */
+   int    subframeSize;         /**< Size of sub-frames */
+   int    nbSubframes;          /**< Number of sub-frames */
+   int    lpcSize;              /**< LPC order */
+   int    min_pitch;            /**< Minimum pitch value allowed */
+   int    max_pitch;            /**< Maximum pitch value allowed */
+   spx_int32_t sampling_rate;
 #ifdef EPIC_48K
 …
 #endif
    spx_word16_t  last_ol_gain;   /**< Open-loop gain for previous frame */
+   spx_word16_t  last_ol_gain;  /**< Open-loop gain for previous frame */
+   char  *stack;          /**< Pseudo-stack allocation for temporary memory */
+   spx_sig_t *inBuf;          /**< Input buffer (original signal) */
+   spx_sig_t *frame;          /**< Start of original frame */
+   spx_sig_t *excBuf;         /**< Excitation buffer */
+   spx_sig_t *exc;            /**< Start of excitation frame */
+   spx_sig_t *innov;          /**< Innovation for the frame */
+   spx_lsp_t *qlsp;           /**< Quantized LSPs for current frame */
+   spx_lsp_t *old_qlsp;       /**< Quantized LSPs for previous frame */
+   spx_lsp_t *interp_qlsp;    /**< Interpolated quantized LSPs */
+   spx_coef_t *interp_qlpc;    /**< Interpolated quantized LPCs */
+   spx_mem_t *mem_sp;         /**< Filter memory for synthesis signal */
+   spx_word32_t *pi_gain;        /**< Gain of LPC filter at theta=pi (fe/2) */
+   int    last_pitch;     /**< Pitch of last correctly decoded frame */
+   char  *stack;                /**< Pseudo-stack allocation for temporary memory */
+   spx_word16_t *excBuf;        /**< Excitation buffer */
+   spx_word16_t *exc;           /**< Start of excitation frame */
+   spx_lsp_t *old_qlsp;         /**< Quantized LSPs for previous frame */
+   spx_coef_t *interp_qlpc;     /**< Interpolated quantized LPCs */
+   spx_mem_t *mem_sp;           /**< Filter memory for synthesis signal */
+   spx_word32_t *pi_gain;       /**< Gain of LPC filter at theta=pi (fe/2) */
+   spx_sig_t *innov_save;       /** If non-NULL, innovation is copied here */
+   /* This is used in packet loss concealment */
+   int    last_pitch;           /**< Pitch of last correctly decoded frame */
    spx_word16_t  last_pitch_gain; /**< Pitch gain of last correctly decoded frame */
    spx_word16_t  pitch_gain_buf[3];  /**< Pitch gain of last decoded frames */
    int    pitch_gain_buf_idx; /**< Tail of the buffer */
    spx_int32_t seed;          /** Seed used for random number generation */
+   spx_word16_t  pitch_gain_buf[3]; /**< Pitch gain of last decoded frames */
+   int    pitch_gain_buf_idx;   /**< Tail of the buffer */
+   spx_int32_t seed;            /** Seed used for random number generation */
    int    encode_submode;
    const SpeexSubmode * const *submodes; /**< Sub-mode data */
+   int    submodeID;      /**< Activated sub-mode */
+   int    lpc_enh_enabled; /**< 1 when LPC enhancer is on, 0 otherwise */
+   CombFilterMem *comb_mem;
+   int    submodeID;            /**< Activated sub-mode */
+   int    lpc_enh_enabled;      /**< 1 when LPC enhancer is on, 0 otherwise */
    SpeexCallback speex_callbacks[SPEEX_MAX_CALLBACKS];
 …
    /*Vocoder data*/
    float  voc_m1;
    float  voc_m2;
+   spx_word16_t  voc_m1;
+   spx_word32_t  voc_m2;
    float  voc_mean;
    int    voc_offset;

pjproject/trunk/pjmedia/src/pjmedia-codec/speex/pseudofloat.h

-                      r278
+                      r628
 } spx_float_t;
+#define FLOAT_ZERO ((spx_float_t){0,0})
+#define FLOAT_ONE ((spx_float_t){16384,-14})
+#define FLOAT_HALF ((spx_float_t){16384,-15})
+static const spx_float_t FLOAT_ZERO = {0,0};
+static const spx_float_t FLOAT_ONE = {16384,-14};
+static const spx_float_t FLOAT_HALF = {16384,-15};
 #define MIN(a,b) ((a)<(b)?(a):(b))
 …
+   }
    if (x==0)
+      return (spx_float_t) {0,0};
+   {
+      spx_float_t r = {0,0};
+      return r;
+   }
    while (x>32767)
+   {
 …
+   }
    if (sign)
+      return (spx_float_t) {-x,e};
+   {
+      spx_float_t r;
+      r.m = -x;
+      r.e = e;
+      return r;
+   }
    else
+      return (spx_float_t) {x,e};
+   {
+      spx_float_t r;
+      r.m = x;
+      r.e = e;
+      return r;
+   }
+}
 …
    else if (b.m==0)
       return a;
+   r = (a).e > (b).e ? (spx_float_t) {((a).m>>1) + ((b).m>>MIN(15,(a).e-(b).e+1)),(a).e+1} : (spx_float_t) {((b).m>>1) + ((a).m>>MIN(15,(b).e-(a).e+1)),(b).e+1};
+   if ((a).e > (b).e)
+   {
+      r.m = ((a).m>>1) + ((b).m>>MIN(15,(a).e-(b).e+1));
+      r.e = (a).e+1;
+   }
+   else
+   {
+      r.m = ((b).m>>1) + ((a).m>>MIN(15,(b).e-(a).e+1));
+      r.e = (b).e+1;
+   }
    if (r.m>0)
+   {
 …
    else if (b.m==0)
       return a;
+   r = (a).e > (b).e ? (spx_float_t) {((a).m>>1) - ((b).m>>MIN(15,(a).e-(b).e+1)),(a).e+1} : (spx_float_t) {((a).m>>MIN(15,(b).e-(a).e+1)) - ((b).m>>1) ,(b).e+1};
+   if ((a).e > (b).e)
+   {
+      r.m = ((a).m>>1) - ((b).m>>MIN(15,(a).e-(b).e+1));
+      r.e = (a).e+1;
+   }
+   else
+   {
+      r.m = ((a).m>>MIN(15,(b).e-(a).e+1)) - ((b).m>>1);
+      r.e = (b).e+1;
+   }
    if (r.m>0)
+   {
 …
 static inline spx_float_t FLOAT_MULT(spx_float_t a, spx_float_t b)
+{
+   spx_float_t r = (spx_float_t) {(spx_int16_t)((spx_int32_t)(a).m*(b).m>>15), (a).e+(b).e+15};
+   spx_float_t r;
+   r.m = (spx_int16_t)((spx_int32_t)(a).m*(b).m>>15);
+   r.e = (a).e+(b).e+15;
    if (r.m>0)
+   {
 …
 static inline spx_float_t FLOAT_SHL(spx_float_t a, int b)
+{
+   return (spx_float_t) {a.m,a.e+b};
+   spx_float_t r;
+   r.m = a.m;
+   r.e = a.e+b;
+   return r;
+}
 …
+{
    if (a.e<0)
       return (a.m+(1<<(-a.e-1)))>>-a.e;
+      return EXTRACT16((EXTEND32(a.m)+(1<<(-a.e-1)))>>-a.e);
    else
       return a.m<<a.e;
 …
+{
    int e=0;
+   spx_float_t r;
    /* FIXME: Handle the sign */
    if (a==0)
+      return (spx_float_t) {0,0};
+   {
+      return FLOAT_ZERO;
+   }
    while (a>32767)
+   {
 …
       e--;
+   }
+   return (spx_float_t) {MULT16_16_Q15(a,b),e+15};
+   r.m = MULT16_16_Q15(a,b);
+   r.e = e+15;
+   return r;
+}
 …
+{
    int e=0;
+   spx_float_t r;
    /* FIXME: Handle the sign */
    if (a==0)
+      return (spx_float_t) {0,0};
+   while (a<SHL32(b.m,14))
+   {
+      return FLOAT_ZERO;
+   }
+   while (a<SHL32(EXTEND32(b.m),14))
+   {
       a <<= 1;
       e--;
+   }
    while (a>=SHL32(b.m-1,15))
+   while (a>=SHL32(EXTEND32(b.m-1),15))
+   {
       a >>= 1;
       e++;
+   }
+   return (spx_float_t) {DIV32_16(a,b.m),e-b.e};
+   r.m = DIV32_16(a,b.m);
+   r.e = e-b.e;
+   return r;
+}
 …
+{
    int e=0;
+   spx_float_t r;
    /* FIXME: Handle the sign */
    if (a==0)
+      return (spx_float_t) {0,0};
+   {
+      return FLOAT_ZERO;
+   }
    while (b>32767)
+   {
 …
       e++;
+   }
+   return (spx_float_t) {DIV32_16(a,b),e};
+   r.m = DIV32_16(a,b);
+   r.e = e;
+   return r;
+}
 …
    int e=0;
    spx_int32_t num;
+   spx_float_t r;
    num = a.m;
    while (a.m >= b.m)
 …
+   }
    num = num << (15-e);
+   return (spx_float_t) {DIV32_16(num,b.m),a.e-b.e-15+e};
+   r.m = DIV32_16(num,b.m);
+   r.e = a.e-b.e-15+e;
+   return r;
+}

pjproject/trunk/pjmedia/src/pjmedia-codec/speex/quant_lsp.c

-                      r278
+                      r628
 #endif
 #include "misc.h"
+#ifdef BFIN_ASM
+#include "quant_lsp_bfin.h"
+#endif
 #ifdef FIXED_POINT
 …
 /* Note: x is modified*/
+#ifndef OVERRIDE_LSP_QUANT
 static int lsp_quant(spx_word16_t *x, const signed char *cdbk, int nbVec, int nbDim)
+{
 …
    spx_word32_t dist;
    spx_word16_t tmp;
    spx_word32_t best_dist=0;
+   spx_word32_t best_dist=VERY_LARGE32;
    int best_id=0;
    const signed char *ptr=cdbk;
 …
          tmp=SUB16(x[j],SHL16((spx_word16_t)*ptr++,5));
          dist=MAC16_16(dist,tmp,tmp);
+      }
       if (dist<best_dist || i==0)
+      }
+      if (dist<best_dist)
+      {
          best_dist=dist;
 …
    return best_id;
+}
+#endif
 /* Note: x is modified*/
+#ifndef OVERRIDE_LSP_WEIGHT_QUANT
 static int lsp_weight_quant(spx_word16_t *x, spx_word16_t *weight, const signed char *cdbk, int nbVec, int nbDim)
+{
 …
    spx_word32_t dist;
    spx_word16_t tmp;
    spx_word32_t best_dist=0;
+   spx_word32_t best_dist=VERY_LARGE32;
    int best_id=0;
    const signed char *ptr=cdbk;
 …
          dist=MAC16_32_Q15(dist,weight[j],MULT16_16(tmp,tmp));
+      }
       if (dist<best_dist || i==0)
+      if (dist<best_dist)
+      {
          best_dist=dist;
 …
    return best_id;
+}
+#endif
 void lsp_quant_nb(spx_lsp_t *lsp, spx_lsp_t *qlsp, int order, SpeexBits *bits)

pjproject/trunk/pjmedia/src/pjmedia-codec/speex/sb_celp.c

-                      r278
+                      r628
 #endif
+extern const spx_word16_t lpc_window[];
 static void mix_and_saturate(spx_word32_t *x0, spx_word32_t *x1, spx_word16_t *out, int len)
+{
 …
    st->subframeSize = mode->subframeSize;
    st->nbSubframes = mode->frameSize/mode->subframeSize;
    st->windowSize = st->frame_size*3/2;
+   st->windowSize = st->frame_size+st->subframeSize;
    st->lpcSize=mode->lpcSize;
    st->bufSize=mode->bufSize;
 …
    st->res=speex_alloc((st->frame_size)*sizeof(spx_sig_t));
    st->sw=speex_alloc((st->frame_size)*sizeof(spx_sig_t));
+   st->target=speex_alloc((st->frame_size)*sizeof(spx_sig_t));
+   /*Asymmetric "pseudo-Hamming" window*/
+   {
+      int part1, part2;
+      part1 = st->subframeSize*7/2;
+      part2 = st->subframeSize*5/2;
+      st->window = speex_alloc((st->windowSize)*sizeof(spx_word16_t));
+      for (i=0;i<part1;i++)
+         st->window[i]=(spx_word16_t)(SIG_SCALING*(.54-.46*cos(M_PI*i/part1)));
+      for (i=0;i<part2;i++)
+         st->window[part1+i]=(spx_word16_t)(SIG_SCALING*(.54+.46*cos(M_PI*i/part2)));
+   }
+   st->window= lpc_window;
    st->lagWindow = speex_alloc((st->lpcSize+1)*sizeof(spx_word16_t));
 …
    st->interp_qlpc = speex_alloc(st->lpcSize*sizeof(spx_coef_t));
    st->pi_gain = speex_alloc((st->nbSubframes)*sizeof(spx_word32_t));
+   st->low_innov = speex_alloc((st->frame_size)*sizeof(spx_word32_t));
+   speex_encoder_ctl(st->st_low, SPEEX_SET_INNOVATION_SAVE, st->low_innov);
+   st->innov_save = NULL;
    st->mem_sp = speex_alloc((st->lpcSize)*sizeof(spx_mem_t));
    st->mem_sp2 = speex_alloc((st->lpcSize)*sizeof(spx_mem_t));
 …
    st->vbr_quality = 8;
    st->vbr_enabled = 0;
+   st->vbr_max = 0;
+   st->vbr_max_high = 20000;  /* We just need a big value here */
    st->vad_enabled = 0;
    st->abr_enabled = 0;
 …
    speex_free(st->res);
    speex_free(st->sw);
-   speex_free(st->target);
-   speex_free(st->window);
    speex_free(st->lagWindow);
 …
    VARDECL(spx_mem_t *mem);
    VARDECL(spx_sig_t *innov);
+   VARDECL(spx_word16_t *target);
    VARDECL(spx_word16_t *syn_resp);
    VARDECL(spx_word32_t *low_pi_gain);
+   VARDECL(spx_sig_t *low_exc);
+   VARDECL(spx_sig_t *low_innov);
+   VARDECL(spx_word16_t *low_exc);
    const SpeexSBMode *mode;
    int dtx;
 …
    ALLOC(low_pi_gain, st->nbSubframes, spx_word32_t);
+   ALLOC(low_exc, st->frame_size, spx_sig_t);
+   ALLOC(low_innov, st->frame_size, spx_sig_t);
+   ALLOC(low_exc, st->frame_size, spx_word16_t);
    speex_encoder_ctl(st->st_low, SPEEX_GET_PI_GAIN, low_pi_gain);
    speex_encoder_ctl(st->st_low, SPEEX_GET_EXC, low_exc);
-   speex_encoder_ctl(st->st_low, SPEEX_GET_INNOV, low_innov);
    speex_encoder_ctl(st->st_low, SPEEX_GET_LOW_MODE, &dtx);
 …
    /* LPC to LSPs (x-domain) transform */
    roots=lpc_to_lsp (st->lpc, st->lpcSize, st->lsp, 15, LSP_DELTA1, stack);
+   roots=lpc_to_lsp (st->lpc, st->lpcSize, st->lsp, 10, LSP_DELTA1, stack);
    if (roots!=st->lpcSize)
+   {
       roots = lpc_to_lsp (st->lpc, st->lpcSize, st->lsp, 11, LSP_DELTA2, stack);
+      roots = lpc_to_lsp (st->lpc, st->lpcSize, st->lsp, 10, LSP_DELTA2, stack);
       if (roots!=st->lpcSize) {
          /*If we can't find all LSP's, do some damage control and use a flat filter*/
          for (i=0;i<st->lpcSize;i++)
+         {
             st->lsp[i]=M_PI*((float)(i+1))/(st->lpcSize+1);
+            st->lsp[i]=LSP_SCALING*M_PI*((float)(i+1))/(st->lpcSize+1);
+         }
+      }
 …
                thresh = (st->vbr_quality-v1)   * mode->vbr_thresh[modeid][v1+1] +
                         (1+v1-st->vbr_quality) * mode->vbr_thresh[modeid][v1];
             if (st->relative_quality >= thresh)
+            if (st->relative_quality >= thresh && st->sampling_rate*st->submodes[modeid]->bits_per_frame/st->full_frame_size <= st->vbr_max_high)
                break;
             modeid--;
 …
    ALLOC(syn_resp, st->subframeSize, spx_word16_t);
    ALLOC(innov, st->subframeSize, spx_sig_t);
+   ALLOC(target, st->subframeSize, spx_word16_t);
    for (sub=0;sub<st->nbSubframes;sub++)
+   {
       spx_sig_t *exc, *sp, *res, *target, *sw;
+      spx_sig_t *exc, *sp, *res, *sw, *innov_save=NULL;
       spx_word16_t filter_ratio;
       int offset;
 …
       exc=st->exc+offset;
       res=st->res+offset;
-      target=st->target+offset;
       sw=st->sw+offset;
+      /* Pointer for saving innovation */
+      if (st->innov_save)
+      {
+         innov_save = st->innov_save+2*offset;
+         for (i=0;i<2*st->subframeSize;i++)
+            innov_save[i]=0;
+      }
       /* LSP interpolation (quantized and unquantized) */
 …
       rl = low_pi_gain[sub];
 #ifdef FIXED_POINT
       filter_ratio=DIV32_16(SHL(rl+82,2),SHR(82+rh,5));
+      filter_ratio=PDIV32_16(SHL(rl+82,2),SHR(82+rh,5));
 #else
       filter_ratio=(rl+.01)/(rh+.01);
 …
          float g;
          spx_word16_t el;
          el = compute_rms(low_innov+offset, st->subframeSize);
+         el = compute_rms(st->low_innov+offset, st->subframeSize);
          /* Gain to use if we want to use the low-band excitation for high-band */
          g=eh/(.01+el);
+         g=eh/(1.+el);
 #if 0
 …
             for (i=0;i<st->lpcSize;i++)
                mem[i]=st->mem_sp[i];
             iir_mem2(low_innov+offset, st->interp_qlpc, tmp_sig, st->subframeSize, st->lpcSize, mem);
+            iir_mem2(st->low_innov+offset, st->interp_qlpc, tmp_sig, st->subframeSize, st->lpcSize, mem);
             g2 = compute_rms(sp, st->subframeSize)/(.01+compute_rms(tmp_sig, st->subframeSize));
             /*fprintf (stderr, "gains: %f %f\n", g, g2);*/
 …
          spx_word32_t scale;
          spx_word16_t el;
          el = compute_rms(low_exc+offset, st->subframeSize);
          gc = DIV32_16(MULT16_16(filter_ratio,1+eh),1+el);
+         el = compute_rms16(low_exc+offset, st->subframeSize);
+         gc = PDIV32_16(MULT16_16(filter_ratio,1+eh),1+el);
          /* This is a kludge that cleans up a historical bug */
 …
             gc *= 1.4142;
          scale = SHL(MULT16_16(DIV32_16(SHL(gc,SIG_SHIFT-4),filter_ratio),(1+el)),4);
+         scale = SHL32(MULT16_16(PDIV32_16(SHL32(EXTEND32(gc),SIG_SHIFT-6),filter_ratio),(1+el)),6);
          compute_impulse_response(st->interp_qlpc, st->bw_lpc1, st->bw_lpc2, syn_resp, st->subframeSize, st->lpcSize, stack);
 …
          /* Compute target signal */
          for (i=0;i<st->subframeSize;i++)
             target[i]=sw[i]-res[i];
+            target[i]=PSHR32(sw[i]-res[i],SIG_SHIFT);
          for (i=0;i<st->subframeSize;i++)
 …
             exc[i] = ADD32(exc[i], innov[i]);
+         if (st->innov_save)
+         {
+            for (i=0;i<st->subframeSize;i++)
+               innov_save[2*i]=innov[i];
+         }
          if (SUBMODE(double_codebook)) {
             char *tmp_stack=stack;
 …
    st->exc=speex_alloc((st->frame_size)*sizeof(spx_sig_t));
+   st->excBuf=speex_alloc((st->subframeSize)*sizeof(spx_sig_t));
    st->qlsp = speex_alloc((st->lpcSize)*sizeof(spx_lsp_t));
 …
    st->mem_sp = speex_alloc((2*st->lpcSize)*sizeof(spx_mem_t));
+   st->low_innov = speex_alloc((st->frame_size)*sizeof(spx_word32_t));
+   speex_decoder_ctl(st->st_low, SPEEX_SET_INNOVATION_SAVE, st->low_innov);
+   st->innov_save = NULL;
    st->lpc_enh_enabled=0;
    st->seed = 1000;
 …
    speex_free(st->g1_mem);
    speex_free(st->exc);
+   speex_free(st->excBuf);
    speex_free(st->qlsp);
    speex_free(st->old_qlsp);
 …
+{
    int i;
-   VARDECL(spx_coef_t *awk1);
-   VARDECL(spx_coef_t *awk2);
-   VARDECL(spx_coef_t *awk3);
    int saved_modeid=0;
 …
    st->first=1;
-   ALLOC(awk1, st->lpcSize+1, spx_coef_t);
-   ALLOC(awk2, st->lpcSize+1, spx_coef_t);
-   ALLOC(awk3, st->lpcSize+1, spx_coef_t);
-   if (st->lpc_enh_enabled)
+   {
-      spx_word16_t k1,k2,k3;
-      if (st->submodes[st->submodeID] != NULL)
+      {
-         k1=SUBMODE(lpc_enh_k1);
-         k2=SUBMODE(lpc_enh_k2);
-         k3=SUBMODE(lpc_enh_k3);
-      } else {
-         k1=k2=.7*GAMMA_SCALING;
-         k3 = 0;
+      }
-      bw_lpc(k1, st->interp_qlpc, awk1, st->lpcSize);
-      bw_lpc(k2, st->interp_qlpc, awk2, st->lpcSize);
-      bw_lpc(k3, st->interp_qlpc, awk3, st->lpcSize);
-      /*fprintf (stderr, "%f %f %f\n", k1, k2, k3);*/
+   }
    /* Final signal synthesis from excitation */
 …
       st->high[i]=st->exc[i];
+   if (st->lpc_enh_enabled)
+   {
+      /* Use enhanced LPC filter */
+      filter_mem2(st->high, awk2, awk1, st->high, st->frame_size, st->lpcSize,
+                  st->mem_sp+st->lpcSize);
+      filter_mem2(st->high, awk3, st->interp_qlpc, st->high, st->frame_size, st->lpcSize,
+                  st->mem_sp);
+   } else {
+      /* Use regular filter */
+      for (i=0;i<st->lpcSize;i++)
+         st->mem_sp[st->lpcSize+i] = 0;
+      iir_mem2(st->high, st->interp_qlpc, st->high, st->frame_size, st->lpcSize,
+               st->mem_sp);
+   }
+   iir_mem2(st->high, st->interp_qlpc, st->high, st->frame_size, st->lpcSize,
+            st->mem_sp);
-   /*iir_mem2(st->exc, st->interp_qlpc, st->high, st->frame_size, st->lpcSize, st->mem_sp);*/
    /* Reconstruct the original */
 …
    char *stack;
    VARDECL(spx_word32_t *low_pi_gain);
+   VARDECL(spx_sig_t *low_exc);
+   VARDECL(spx_sig_t *low_innov);
+   VARDECL(spx_coef_t *awk1);
+   VARDECL(spx_coef_t *awk2);
+   VARDECL(spx_coef_t *awk3);
+   VARDECL(spx_word16_t *low_exc);
+   VARDECL(spx_coef_t *ak);
    int dtx;
    const SpeexSBMode *mode;
 …
    ALLOC(low_pi_gain, st->nbSubframes, spx_word32_t);
+   ALLOC(low_exc, st->frame_size, spx_sig_t);
+   ALLOC(low_innov, st->frame_size, spx_sig_t);
+   ALLOC(low_exc, st->frame_size, spx_word16_t);
    speex_decoder_ctl(st->st_low, SPEEX_GET_PI_GAIN, low_pi_gain);
    speex_decoder_ctl(st->st_low, SPEEX_GET_EXC, low_exc);
-   speex_decoder_ctl(st->st_low, SPEEX_GET_INNOV, low_innov);
    SUBMODE(lsp_unquant)(st->qlsp, st->lpcSize, bits);
 …
+   }
+   ALLOC(awk1, st->lpcSize+1, spx_coef_t);
+   ALLOC(awk2, st->lpcSize+1, spx_coef_t);
+   ALLOC(awk3, st->lpcSize+1, spx_coef_t);
+   ALLOC(ak, st->lpcSize, spx_coef_t);
    for (sub=0;sub<st->nbSubframes;sub++)
+   {
       spx_sig_t *exc, *sp;
+      spx_sig_t *exc, *sp, *innov_save=NULL;
       spx_word16_t filter_ratio;
       spx_word16_t el=0;
 …
       sp=st->high+offset;
       exc=st->exc+offset;
+      /* Pointer for saving innovation */
+      if (st->innov_save)
+      {
+         innov_save = st->innov_save+2*offset;
+         for (i=0;i<2*st->subframeSize;i++)
+            innov_save[i]=0;
+      }
       /* LSP interpolation */
 …
       /* LSP to LPC */
+      lsp_to_lpc(st->interp_qlsp, st->interp_qlpc, st->lpcSize, stack);
+      if (st->lpc_enh_enabled)
+      {
+         spx_word16_t k1,k2,k3;
+         k1=SUBMODE(lpc_enh_k1);
+         k2=SUBMODE(lpc_enh_k2);
+         k3=SUBMODE(lpc_enh_k3);
+         bw_lpc(k1, st->interp_qlpc, awk1, st->lpcSize);
+         bw_lpc(k2, st->interp_qlpc, awk2, st->lpcSize);
+         bw_lpc(k3, st->interp_qlpc, awk3, st->lpcSize);
+         /*fprintf (stderr, "%f %f %f\n", k1, k2, k3);*/
+      }
+      lsp_to_lpc(st->interp_qlsp, ak, st->lpcSize, stack);
       /* Calculate reponse ratio between the low and high filter in the middle
 …
          rl = low_pi_gain[sub];
 #ifdef FIXED_POINT
          filter_ratio=DIV32_16(SHL(rl+82,2),SHR(82+rh,5));
+         filter_ratio=PDIV32_16(SHL(rl+82,2),SHR(82+rh,5));
 #else
          filter_ratio=(rl+.01)/(rh+.01);
 …
 #if 0
          for (i=0;i<st->subframeSize;i++)
             exc[i]=mode->folding_gain*g*low_innov[offset+i];
+            exc[i]=mode->folding_gain*g*st->low_innov[offset+i];
 #else
+         {
 …
             for (i=0;i<st->subframeSize;i++)
+            {
                float e=tmp*g*mode->folding_gain*low_innov[offset+i];
+               float e=tmp*g*mode->folding_gain*st->low_innov[offset+i];
                tmp *= -1;
                exc[i] = e;
 …
+         }
-         /*speex_rand_vec(mode->folding_gain*g*el, exc, st->subframeSize);*/
 #endif
       } else {
 …
          int qgc = speex_bits_unpack_unsigned(bits, 4);
          el = compute_rms(low_exc+offset, st->subframeSize);
+         el = compute_rms16(low_exc+offset, st->subframeSize);
 #ifdef FIXED_POINT
 …
             gc *= 1.4142;
          scale = SHL(MULT16_16(DIV32_16(SHL(gc,SIG_SHIFT-4),filter_ratio),(1+el)),4);
+         scale = SHL(MULT16_16(PDIV32_16(SHL(gc,SIG_SHIFT-6),filter_ratio),(1+el)),6);
          SUBMODE(innovation_unquant)(exc, SUBMODE(innovation_params), st->subframeSize,
 …
+      }
+      if (st->innov_save)
+      {
+         for (i=0;i<st->subframeSize;i++)
+            innov_save[2*i]=exc[i];
+      }
       for (i=0;i<st->subframeSize;i++)
+         sp[i]=exc[i];
+      if (st->lpc_enh_enabled)
+      {
+         /* Use enhanced LPC filter */
+         filter_mem2(sp, awk2, awk1, sp, st->subframeSize, st->lpcSize,
+                     st->mem_sp+st->lpcSize);
+         filter_mem2(sp, awk3, st->interp_qlpc, sp, st->subframeSize, st->lpcSize,
+                     st->mem_sp);
+      } else {
+         /* Use regular filter */
+         for (i=0;i<st->lpcSize;i++)
+            st->mem_sp[st->lpcSize+i] = 0;
+         iir_mem2(sp, st->interp_qlpc, sp, st->subframeSize, st->lpcSize,
+                     st->mem_sp);
+      }
+      /*iir_mem2(exc, st->interp_qlpc, sp, st->subframeSize, st->lpcSize, st->mem_sp);*/
+         sp[i]=st->excBuf[i];
+      iir_mem2(sp, st->interp_qlpc, sp, st->subframeSize, st->lpcSize,
+               st->mem_sp);
+      for (i=0;i<st->subframeSize;i++)
+         st->excBuf[i]=exc[i];
+      for (i=0;i<st->lpcSize;i++)
+         st->interp_qlpc[i] = ak[i];
+   }
 …
       break;
    case SPEEX_SET_ABR:
       st->abr_enabled = (*(int*)ptr);
       st->vbr_enabled = 1;
+      st->abr_enabled = (*(spx_int32_t*)ptr);
+      st->vbr_enabled = st->abr_enabled!=0;
       speex_encoder_ctl(st->st_low, SPEEX_SET_VBR, &st->vbr_enabled);
+      if (st->vbr_enabled)
+      {
          int i=10, rate, target;
          float vbr_qual;
          target = (*(int*)ptr);
+         target = (*(spx_int32_t*)ptr);
          while (i>=0)
+         {
 …
       break;
    case SPEEX_GET_ABR:
       (*(int*)ptr) = st->abr_enabled;
+      (*(spx_int32_t*)ptr) = st->abr_enabled;
       break;
    case SPEEX_SET_QUALITY:
 …
    case SPEEX_SET_BITRATE:
+      {
+         int i=10, rate, target;
+         target = (*(int*)ptr);
+         int i=10;
+         spx_int32_t rate, target;
+         target = (*(spx_int32_t*)ptr);
          while (i>=0)
+         {
 …
       /*fprintf (stderr, "before: %d\n", (*(int*)ptr));*/
       if (st->submodes[st->submodeID])
          (*(int*)ptr) += st->sampling_rate*SUBMODE(bits_per_frame)/st->full_frame_size;
+         (*(spx_int32_t*)ptr) += st->sampling_rate*SUBMODE(bits_per_frame)/st->full_frame_size;
       else
          (*(int*)ptr) += st->sampling_rate*(SB_SUBMODE_BITS+1)/st->full_frame_size;
+         (*(spx_int32_t*)ptr) += st->sampling_rate*(SB_SUBMODE_BITS+1)/st->full_frame_size;
       /*fprintf (stderr, "after: %d\n", (*(int*)ptr));*/
       break;
    case SPEEX_SET_SAMPLING_RATE:
+      {
          int tmp=(*(int*)ptr);
+         spx_int32_t tmp=(*(spx_int32_t*)ptr);
          st->sampling_rate = tmp;
          tmp>>=1;
 …
       break;
    case SPEEX_GET_SAMPLING_RATE:
       (*(int*)ptr)=st->sampling_rate;
+      (*(spx_int32_t*)ptr)=st->sampling_rate;
       break;
    case SPEEX_RESET_STATE:
 …
       (*(int*)ptr) = 2*(*(int*)ptr) + QMF_ORDER - 1;
       break;
+   case SPEEX_SET_PLC_TUNING:
+      speex_encoder_ctl(st->st_low, SPEEX_SET_PLC_TUNING, ptr);
+      break;
+   case SPEEX_GET_PLC_TUNING:
+      speex_encoder_ctl(st->st_low, SPEEX_GET_PLC_TUNING, ptr);
+      break;
+   case SPEEX_SET_VBR_MAX_BITRATE:
+      {
+         st->vbr_max = (*(spx_int32_t*)ptr);
+         if (SPEEX_SET_VBR_MAX_BITRATE<1)
+         {
+            speex_encoder_ctl(st->st_low, SPEEX_SET_VBR_MAX_BITRATE, &st->vbr_max);
+            st->vbr_max_high = 17600;
+         } else {
+            spx_int32_t low_rate;
+            /* FIXME: Need to adapt that to ultra-wideband */
+            if (st->vbr_max >= 42200)
+            {
+               st->vbr_max_high = 17600;
+            } else if (st->vbr_max >= 27800)
+            {
+               st->vbr_max_high = 9600;
+            } else if (st->vbr_max > 20600)
+            {
+               st->vbr_max_high = 5600;
+            } else {
+               st->vbr_max_high = 1800;
+            }
+            low_rate = st->vbr_max - st->vbr_max_high;
+            speex_encoder_ctl(st->st_low, SPEEX_SET_VBR_MAX_BITRATE, &low_rate);
+         }
+      }
+      break;
+   case SPEEX_GET_VBR_MAX_BITRATE:
+      (*(spx_int32_t*)ptr) = st->vbr_max;
+      break;
+   /* This is all internal stuff past this point */
    case SPEEX_GET_PI_GAIN:
+      {
 …
    case SPEEX_GET_RELATIVE_QUALITY:
       (*(float*)ptr)=st->relative_quality;
+      break;
+   case SPEEX_SET_INNOVATION_SAVE:
+      st->innov_save = ptr;
       break;
    default:
 …
       speex_decoder_ctl(st->st_low, request, ptr);
       if (st->submodes[st->submodeID])
          (*(int*)ptr) += st->sampling_rate*SUBMODE(bits_per_frame)/st->full_frame_size;
+         (*(spx_int32_t*)ptr) += st->sampling_rate*SUBMODE(bits_per_frame)/st->full_frame_size;
       else
          (*(int*)ptr) += st->sampling_rate*(SB_SUBMODE_BITS+1)/st->full_frame_size;
+         (*(spx_int32_t*)ptr) += st->sampling_rate*(SB_SUBMODE_BITS+1)/st->full_frame_size;
       break;
    case SPEEX_SET_SAMPLING_RATE:
+      {
          int tmp=(*(int*)ptr);
+         spx_int32_t tmp=(*(spx_int32_t*)ptr);
          st->sampling_rate = tmp;
          tmp>>=1;
 …
       break;
    case SPEEX_GET_SAMPLING_RATE:
       (*(int*)ptr)=st->sampling_rate;
+      (*(spx_int32_t*)ptr)=st->sampling_rate;
       break;
    case SPEEX_SET_HANDLER:
 …
    case SPEEX_GET_SUBMODE_ENCODING:
       (*(int*)ptr) = st->encode_submode;
+      break;
+   case SPEEX_GET_LOOKAHEAD:
+      speex_decoder_ctl(st->st_low, SPEEX_GET_LOOKAHEAD, ptr);
+      (*(int*)ptr) = 2*(*(int*)ptr);
       break;
    case SPEEX_GET_PI_GAIN:
 …
       speex_decoder_ctl(st->st_low, SPEEX_GET_DTX_STATUS, ptr);
       break;
+   case SPEEX_SET_INNOVATION_SAVE:
+      st->innov_save = ptr;
+      break;
    default:
       speex_warning_int("Unknown nb_ctl request: ", request);

pjproject/trunk/pjmedia/src/pjmedia-codec/speex/sb_celp.h

-                      r278
+                      r628
 /**Structure representing the full state of the sub-band encoder*/
 typedef struct SBEncState {
    const SpeexMode *mode;            /**< Pointer to the mode (containing for vtable info) */
    void *st_low;               /**< State of the low-band (narrowband) encoder */
    int    full_frame_size;     /**< Length of full-band frames*/
    int    frame_size;          /**< Length of high-band frames*/
    int    subframeSize;        /**< Length of high-band sub-frames*/
    int    nbSubframes;         /**< Number of high-band sub-frames*/
    int    windowSize;          /**< Length of high-band LPC window*/
    int    lpcSize;             /**< Order of high-band LPC analysis */
    int    bufSize;             /**< Buffer size */
    int    first;               /**< First frame? */
    float  lag_factor;          /**< Lag-windowing control parameter */
    spx_word16_t  lpc_floor;           /**< Controls LPC analysis noise floor */
    spx_word16_t  gamma1;              /**< Perceptual weighting coef 1 */
    spx_word16_t  gamma2;              /**< Perceptual weighting coef 2 */
+   const SpeexMode *mode;         /**< Pointer to the mode (containing for vtable info) */
+   void *st_low;                  /**< State of the low-band (narrowband) encoder */
+   int    full_frame_size;        /**< Length of full-band frames*/
+   int    frame_size;             /**< Length of high-band frames*/
+   int    subframeSize;           /**< Length of high-band sub-frames*/
+   int    nbSubframes;            /**< Number of high-band sub-frames*/
+   int    windowSize;             /**< Length of high-band LPC window*/
+   int    lpcSize;                /**< Order of high-band LPC analysis */
+   int    bufSize;                /**< Buffer size */
+   int    first;                  /**< First frame? */
+   float  lag_factor;             /**< Lag-windowing control parameter */
+   spx_word16_t  lpc_floor;       /**< Controls LPC analysis noise floor */
+   spx_word16_t  gamma1;          /**< Perceptual weighting coef 1 */
+   spx_word16_t  gamma2;          /**< Perceptual weighting coef 2 */
    char  *stack;               /**< Temporary allocation stack */
    spx_sig_t *x0d, *x1d; /**< QMF filter signals*/
    spx_sig_t *high;                /**< High-band signal (buffer) */
    spx_sig_t *y0, *y1;             /**< QMF synthesis signals */
+   char  *stack;                  /**< Temporary allocation stack */
+   spx_sig_t *x0d, *x1d;          /**< QMF filter signals*/
+   spx_sig_t *high;               /**< High-band signal (buffer) */
+   spx_sig_t *y0, *y1;            /**< QMF synthesis signals */
    spx_word16_t *h0_mem, *h1_mem;
    spx_word32_t *g0_mem, *g1_mem; /**< QMF memories */
+   spx_sig_t *excBuf;              /**< High-band excitation */
+   spx_sig_t *exc;                 /**< High-band excitation (for QMF only)*/
+   spx_sig_t *res;                 /**< Zero-input response (ringing) */
+   spx_sig_t *sw;                  /**< Perceptually weighted signal */
+   spx_sig_t *target;              /**< Weighted target signal (analysis by synthesis) */
+   spx_word16_t *window;              /**< LPC analysis window */
+   spx_word16_t *lagWindow;           /**< Auto-correlation window */
+   spx_word16_t *autocorr;            /**< Auto-correlation (for LPC analysis) */
+   spx_coef_t *lpc;                 /**< LPC coefficients */
+   spx_lsp_t *lsp;                 /**< LSP coefficients */
+   spx_lsp_t *qlsp;                /**< Quantized LSPs */
+   spx_lsp_t *old_lsp;             /**< LSPs of previous frame */
+   spx_lsp_t *old_qlsp;            /**< Quantized LSPs of previous frame */
+   spx_lsp_t *interp_lsp;          /**< Interpolated LSPs for current sub-frame */
+   spx_lsp_t *interp_qlsp;         /**< Interpolated quantized LSPs for current sub-frame */
+   spx_coef_t *interp_lpc;          /**< Interpolated LPCs for current sub-frame */
+   spx_coef_t *interp_qlpc;         /**< Interpolated quantized LPCs for current sub-frame */
+   spx_coef_t *bw_lpc1;             /**< Bandwidth-expanded version of LPCs (#1) */
+   spx_coef_t *bw_lpc2;             /**< Bandwidth-expanded version of LPCs (#2) */
+   spx_sig_t *excBuf;             /**< High-band excitation */
+   spx_sig_t *exc;                /**< High-band excitation (for QMF only)*/
+   spx_sig_t *res;                /**< Zero-input response (ringing) */
+   spx_sig_t *sw;                 /**< Perceptually weighted signal */
+   const spx_word16_t *window;    /**< LPC analysis window */
+   spx_word16_t *lagWindow;       /**< Auto-correlation window */
+   spx_word16_t *autocorr;        /**< Auto-correlation (for LPC analysis) */
+   spx_coef_t *lpc;               /**< LPC coefficients */
+   spx_lsp_t *lsp;                /**< LSP coefficients */
+   spx_lsp_t *qlsp;               /**< Quantized LSPs */
+   spx_lsp_t *old_lsp;            /**< LSPs of previous frame */
+   spx_lsp_t *old_qlsp;           /**< Quantized LSPs of previous frame */
+   spx_lsp_t *interp_lsp;         /**< Interpolated LSPs for current sub-frame */
+   spx_lsp_t *interp_qlsp;        /**< Interpolated quantized LSPs for current sub-frame */
+   spx_coef_t *interp_lpc;        /**< Interpolated LPCs for current sub-frame */
+   spx_coef_t *interp_qlpc;       /**< Interpolated quantized LPCs for current sub-frame */
+   spx_coef_t *bw_lpc1;           /**< Bandwidth-expanded version of LPCs (#1) */
+   spx_coef_t *bw_lpc2;           /**< Bandwidth-expanded version of LPCs (#2) */
    spx_mem_t *mem_sp;              /**< Synthesis signal memory */
+   spx_mem_t *mem_sp;             /**< Synthesis signal memory */
    spx_mem_t *mem_sp2;
    spx_mem_t *mem_sw;              /**< Perceptual signal memory */
+   spx_mem_t *mem_sw;             /**< Perceptual signal memory */
    spx_word32_t *pi_gain;
+   spx_sig_t *innov_save;         /**< If non-NULL, innovation is copied here */
+   spx_sig_t *low_innov;          /**< Lower-band innovation is copied here magically */
+   float  vbr_quality;         /**< Quality setting for VBR encoding */
+   int    vbr_enabled;         /**< 1 for enabling VBR, 0 otherwise */
+   int    abr_enabled;         /**< ABR setting (in bps), 0 if off */
+   float  vbr_quality;            /**< Quality setting for VBR encoding */
+   int    vbr_enabled;            /**< 1 for enabling VBR, 0 otherwise */
+   spx_int32_t vbr_max;           /**< Max bit-rate allowed in VBR mode (total) */
+   spx_int32_t vbr_max_high;      /**< Max bit-rate allowed in VBR mode for the high-band */
+   spx_int32_t abr_enabled;       /**< ABR setting (in bps), 0 if off */
    float  abr_drift;
    float  abr_drift2;
    float  abr_count;
    int    vad_enabled;         /**< 1 for enabling VAD, 0 otherwise */
+   int    vad_enabled;            /**< 1 for enabling VAD, 0 otherwise */
    float  relative_quality;
 …
    int    submodeSelect;
    int    complexity;
    int    sampling_rate;
+   spx_int32_t sampling_rate;
 } SBEncState;
 …
    int    lpcSize;
    int    first;
    int    sampling_rate;
+   spx_int32_t sampling_rate;
    int    lpc_enh_enabled;
 …
    spx_sig_t *exc;
+   spx_sig_t *excBuf;
    spx_lsp_t *qlsp;
    spx_lsp_t *old_qlsp;
 …
    spx_mem_t *mem_sp;
    spx_word32_t *pi_gain;
+   spx_sig_t *innov_save;      /** If non-NULL, innovation is copied here */
+   spx_sig_t *low_innov;       /** Lower-band innovation is copied here magically */
    spx_int32_t seed;

pjproject/trunk/pjmedia/src/pjmedia-codec/speex/speex.h

-                      r278
+                      r628
 #define SPEEX_GET_PLC_TUNING 41
+/** Sets the max bit-rate allowed in VBR mode */
+#define SPEEX_SET_VBR_MAX_BITRATE 42
+/** Gets the max bit-rate allowed in VBR mode */
+#define SPEEX_GET_VBR_MAX_BITRATE 43
 /* Used internally, not to be used in applications */
 /** Used internally*/
 …
 /** Used internally*/
 #define SPEEX_GET_DTX_STATUS   103
+/** Used internally*/
+#define SPEEX_SET_INNOVATION_SAVE   104

pjproject/trunk/pjmedia/src/pjmedia-codec/speex/speex_echo.h

-                      r278
+                      r628
 /** Performs echo cancellation a frame */
+void speex_echo_cancel(SpeexEchoState *st, short *ref, short *echo, short *out, spx_int32_t *Y);
+void speex_echo_cancel(SpeexEchoState *st, const spx_int16_t *rec, const spx_int16_t *play, spx_int16_t *out, spx_int32_t *Yout);
+/** Perform echo cancellation using internal playback buffer */
+void speex_echo_capture(SpeexEchoState *st, const spx_int16_t *rec, spx_int16_t *out, spx_int32_t *Yout);
+/** Let the echo canceller know that a frame was just played */
+void speex_echo_playback(SpeexEchoState *st, const spx_int16_t *play);
 /** Reset the echo canceller state */

pjproject/trunk/pjmedia/src/pjmedia-codec/speex/speex_jitter.h

-                      r278
+                      r628
 #endif
+#define SPEEX_JITTER_MAX_PACKET_SIZE 1500 /**< Maximum number of bytes per packet         */
+#define SPEEX_JITTER_MAX_BUFFER_SIZE 20   /**< Maximum number of packets in jitter buffer */
+struct JitterBuffer_;
+#define MAX_MARGIN 12  /**< Number of bins in margin histogram */
+typedef struct JitterBuffer_ JitterBuffer;
+typedef struct _JitterBufferPacket JitterBufferPacket;
+struct _JitterBufferPacket {
+   char        *data;
+   spx_uint32_t len;
+   spx_uint32_t timestamp;
+   spx_uint32_t span;
+};
+#define JITTER_BUFFER_OK 0
+#define JITTER_BUFFER_MISSING 1
+#define JITTER_BUFFER_INCOMPLETE 2
+#define JITTER_BUFFER_INTERNAL_ERROR -1
+#define JITTER_BUFFER_BAD_ARGUMENT -2
+/** Initialise jitter buffer */
+JitterBuffer *jitter_buffer_init(int tick);
+/** Reset jitter buffer */
+void jitter_buffer_reset(JitterBuffer *jitter);
+/** Destroy jitter buffer */
+void jitter_buffer_destroy(JitterBuffer *jitter);
+/** Put one packet into the jitter buffer */
+void jitter_buffer_put(JitterBuffer *jitter, const JitterBufferPacket *packet);
+/** Get one packet from the jitter buffer */
+int jitter_buffer_get(JitterBuffer *jitter, JitterBufferPacket *packet, spx_uint32_t *current_timestamp);
+/** Get pointer timestamp of jitter buffer */
+int jitter_buffer_get_pointer_timestamp(JitterBuffer *jitter);
+/** Advance by one tick */
+void jitter_buffer_tick(JitterBuffer *jitter);
 /** Speex jitter-buffer state. */
 typedef struct SpeexJitter {
-   int buffer_size;                                                       /**< Buffer size                         */
-   int pointer_timestamp;                                                 /**< Pointer timestamp                   */
    SpeexBits current_packet;                                              /**< Current Speex packet                */
    int valid_bits;                                                        /**< True if Speex bits are valid        */
+   char buf[SPEEX_JITTER_MAX_BUFFER_SIZE][SPEEX_JITTER_MAX_PACKET_SIZE];  /**< Buffer of packets                   */
+   int timestamp[SPEEX_JITTER_MAX_BUFFER_SIZE];                           /**< Timestamp of packet                 */
+   int len[SPEEX_JITTER_MAX_BUFFER_SIZE];                                 /**< Number of bytes in packet           */
+   JitterBuffer *packets;
    void *dec;                                                             /**< Pointer to Speex decoder            */
    int frame_size;                                                        /**< Frame size of Speex decoder         */
-   int frame_time;                                                        /**< Frame time in [ms] of Speex decoder */
-   int reset_state;                                                       /**< True if Speex state was reset       */
-   int lost_count;                                                        /**< Number of lost packets              */
-   float shortterm_margin[MAX_MARGIN];                                    /**< Short term margins                  */
-   float longterm_margin[MAX_MARGIN];                                     /**< Long term margins                   */
-   float loss_rate;                                                       /**< Loss rate                           */
 } SpeexJitter;
 …
 /** Get one packet from the jitter buffer */
 void speex_jitter_get(SpeexJitter *jitter, short *out, int *current_timestamp);
+void speex_jitter_get(SpeexJitter *jitter, spx_int16_t *out, int *start_offset);
 /** Get pointer timestamp of jitter buffer */

pjproject/trunk/pjmedia/src/pjmedia-codec/speex/speex_stereo.h

r278	r628
54	54
55	55	/** Initialization value for a stereo state */
56		#define SPEEX_STEREO_STATE_INIT {1,.5,1,1}
	56	#define SPEEX_STEREO_STATE_INIT {1,.5,1,1,0,0}
57	57
58	58	/** Transforms a stereo frame into a mono frame and stores intensity stereo info in 'bits' */

pjproject/trunk/pjmedia/src/pjmedia-codec/speex/speex_types.h

-                      r278
+                      r628
 #  if defined(__CYGWIN__)
 #    include <_G_config.h>
-     typedef _G_int64_t spx_int64_t;
      typedef _G_int32_t spx_int32_t;
      typedef _G_uint32_t spx_uint32_t;
 …
      typedef int spx_int32_t;
      typedef unsigned int spx_uint32_t;
-     typedef long long spx_int64_t;
-     typedef unsigned long long spx_uint64_t;
 #  elif defined(__MWERKS__)
-     typedef long long spx_int64_t;
      typedef int spx_int32_t;
      typedef unsigned int spx_uint32_t;
 …
 #  else
      /* MSVC/Borland */
-     typedef __int64 spx_int64_t;
      typedef __int32 spx_int32_t;
      typedef unsigned __int32 spx_uint32_t;
 …
    typedef SInt32 spx_int32_t;
    typedef UInt32 spx_uint32_t;
-   typedef SInt64 spx_int64_t;
 #elif defined(__MACOSX__) /* MacOS X Framework build */
 …
    typedef int32_t spx_int32_t;
    typedef u_int32_t spx_uint32_t;
-   typedef int64_t spx_int64_t;
 #elif defined(__BEOS__)
 …
    typedef int32_t spx_int32_t;
    typedef u_int32_t spx_uint32_t;
-   typedef int64_t spx_int64_t;
 #elif defined (__EMX__)
 …
    typedef int spx_int32_t;
    typedef unsigned int spx_uint32_t;
-   typedef long long spx_int64_t;
 #elif defined (DJGPP)
 …
    typedef int spx_int32_t;
    typedef unsigned int spx_uint32_t;
-   typedef long long spx_int64_t;
 #elif defined(R5900)
    /* PS2 EE */
-   typedef long spx_int64_t;
    typedef int spx_int32_t;
    typedef unsigned spx_uint32_t;
 …
    typedef signed int spx_int32_t;
    typedef unsigned int spx_uint32_t;
-   typedef long long int spx_int64_t;
 #elif defined(CONFIG_TI_C54X) || defined (CONFIG_TI_C55X)
 …
    typedef unsigned long spx_uint32_t;
 #elif defined(CONFIG_TI_C5X)
+#elif defined(CONFIG_TI_C6X)
    typedef short spx_int16_t;

pjproject/trunk/pjmedia/src/pjmedia-codec/speex/stack_alloc.h

r278	r628
115	115	#elif defined(USE_ALLOCA)
116	116	#define VARDECL(var) var
117		#define ALLOC(var, size, type) var = alloca(sizeof(type)*~~size~~)
	117	#define ALLOC(var, size, type) var = alloca(sizeof(type)*(size))
118	118	#else
119	119	#define VARDECL(var) var

pjproject/trunk/pjmedia/src/pjmedia-codec/speex_codec.c

-                      r582
+                      r628
+{
     struct spx_private *spx;
+    float tmp[642]; /* 20ms at 32KHz + 2 */
+    pj_int16_t *samp_in;
+    unsigned i, samp_count, sz;
+    unsigned sz;
     int tx;
 …
+    }
-    /* Copy frame to float buffer. */
-    samp_count = input->size / 2;
-    pj_assert(samp_count <= PJ_ARRAY_SIZE(tmp));
-    samp_in = input->buf;
-    for (i=0; i<samp_count; ++i) {
-        tmp[i] = samp_in[i];
+    }
     /* Flush all the bits in the struct so we can encode a new frame */
     speex_bits_reset(&spx->enc_bits);
     /* Encode the frame */
     tx = speex_encode(spx->enc, tmp, &spx->enc_bits);
+    tx = speex_encode_int(spx->enc, input->buf, &spx->enc_bits);
     /* Check if we need not to transmit the frame (DTX) */
 …
+{
     struct spx_private *spx;
-    float tmp[642]; /* 20ms at 32KHz + 2 */
-    pj_int16_t *dst_buf;
-    unsigned i, count, sz;
     spx = (struct spx_private*) codec->codec_data;
 …
     /* Decode the data */
+    speex_decode(spx->dec, &spx->dec_bits, tmp);
+    /* Check size. */
+    sz = speex_bits_nbytes(&spx->enc_bits);
+    pj_assert(sz <= output_buf_len);
+    /* Copy from float to short samples. */
+    count = spx_factory.speex_param[spx->param_id].clock_rate * 20 / 1000;
+    pj_assert((count <= output_buf_len/2) && count <= PJ_ARRAY_SIZE(tmp));
+    dst_buf = output->buf;
+    for (i=0; i<count; ++i) {
+        dst_buf[i] = (pj_int16_t)tmp[i];
+    }
+    speex_decode_int(spx->dec, &spx->dec_bits, output->buf);
     output->type = PJMEDIA_FRAME_TYPE_AUDIO;
+    output->size = count * 2;
+    output->size = speex_bits_nbytes(&spx->dec_bits);
+    pj_assert(output->size <= (int)output_buf_len);
     output->timestamp.u64 = input->timestamp.u64;
 …
+{
     struct spx_private *spx;
+    float tmp[642]; /* 20ms at 32KHz + 2 */
+    pj_int16_t *dst_buf;
+    unsigned i, count;
+    unsigned count;
     /* output_buf_len is unreferenced when building in Release mode */
 …
     count = spx_factory.speex_param[spx->param_id].clock_rate * 20 / 1000;
     pj_assert((count <= output_buf_len/2) && count <= PJ_ARRAY_SIZE(tmp));
+    pj_assert(count <= output_buf_len/2);
     /* Recover packet loss */
+    speex_decode(spx->dec, NULL, tmp);
+    /* Copy from float to short samples. */
+    dst_buf = output->buf;
+    for (i=0; i<count; ++i) {
+        dst_buf[i] = (pj_int16_t)tmp[i];
+    }
+    speex_decode_int(spx->dec, NULL, output->buf);
     output->size = count * 2;

pjproject/trunk/pjmedia/src/pjmedia/alaw_ulaw.c

-                      r584
+                      r628
         int             t;
+        /* Shortcut: when input is zero, output is zero
+         * This will also make the VAD works harder.
+         *  -bennylp
+         */
+        if (u_val == 0) return 0;
         /* Complement to obtain normal u-law value. */
         u_val = ~u_val;

Context Navigation

Legend:

Download in other formats: