Ignore:
Timestamp:
Nov 23, 2006 10:19:46 AM (17 years ago)
Author:
bennylp
Message:

Updated Speex to their latest SVN (1.2-beta). AEC seems
to work much better now and take less CPU, so I increased
default tail length in PJSUA to 800ms.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • pjproject/trunk/pjmedia/src/pjmedia-codec/speex/mdf.c

    r641 r823  
    4242    
    4343   Valin, J.-M., On Adjusting the Learning Rate in Frequency Domain Echo  
    44    Cancellation With Double-Talk. Submitted to IEEE Transactions on Speech  
    45    and Audio Processing, 2006. 
     44   Cancellation With Double-Talk. To appear in IEEE Transactions on Audio, 
     45   Speech and Language Processing, 2006. 
     46   http://people.xiph.org/~jm/papers/valin_taslp2006.pdf 
    4647    
    4748   There is no explicit double-talk detection, but a continuous variation 
     
    7980#endif 
    8081 
    81 #define min(a,b) ((a)<(b) ? (a) : (b)) 
    82 #define max(a,b) ((a)>(b) ? (a) : (b)) 
    83  
    8482#ifdef FIXED_POINT 
    8583#define WEIGHT_SHIFT 11 
     
    9088#endif 
    9189 
    92 #ifdef FIXED_POINT 
    93 static const spx_float_t MIN_LEAK = {16777, -24}; 
     90/* If enabled, the transition between blocks is smooth, so there isn't any blocking 
     91aftifact when adapting. The cost is an extra FFT and a matrix-vector multiply */ 
     92#define SMOOTH_BLOCKS 
     93 
     94#ifdef FIXED_POINT 
     95static const spx_float_t MIN_LEAK = {16777, -19}; 
    9496#define TOP16(x) ((x)>>16) 
    9597#else 
    96 static const spx_float_t MIN_LEAK = .001f; 
     98static const spx_float_t MIN_LEAK = .032f; 
    9799#define TOP16(x) (x) 
    98100#endif 
     101 
     102 
     103#define PLAYBACK_DELAY 2 
     104 
     105void speex_echo_get_residual(SpeexEchoState *st, spx_word32_t *Yout, int len); 
    99106 
    100107 
     
    106113   int cancel_count; 
    107114   int adapted; 
     115   int saturated; 
     116   int screwed_up; 
    108117   spx_int32_t sampling_rate; 
    109118   spx_word16_t spec_average; 
     
    111120   spx_word16_t beta_max; 
    112121   spx_word32_t sum_adapt; 
    113    spx_word16_t *e; 
     122   spx_word16_t leak_estimate; 
     123    
     124   spx_word16_t *e;      /* scratch */ 
    114125   spx_word16_t *x; 
    115126   spx_word16_t *X; 
    116    spx_word16_t *d; 
    117    spx_word16_t *y; 
     127   spx_word16_t *input;  /* scratch */ 
     128   spx_word16_t *y;      /* scratch */ 
    118129   spx_word16_t *last_y; 
    119    spx_word32_t *Yps; 
    120    spx_word16_t *Y; 
     130   spx_word16_t *Y;      /* scratch */ 
    121131   spx_word16_t *E; 
    122    spx_word32_t *PHI; 
     132   spx_word32_t *PHI;    /* scratch */ 
    123133   spx_word32_t *W; 
    124134   spx_word32_t *power; 
    125135   spx_float_t *power_1; 
    126    spx_word16_t *wtmp; 
    127 #ifdef FIXED_POINT 
    128    spx_word16_t *wtmp2; 
    129 #endif 
    130    spx_word32_t *Rf; 
    131    spx_word32_t *Yf; 
    132    spx_word32_t *Xf; 
     136   spx_word16_t *wtmp;   /* scratch */ 
     137#ifdef FIXED_POINT 
     138   spx_word16_t *wtmp2;  /* scratch */ 
     139#endif 
     140   spx_word32_t *Rf;     /* scratch */ 
     141   spx_word32_t *Yf;     /* scratch */ 
     142   spx_word32_t *Xf;     /* scratch */ 
    133143   spx_word32_t *Eh; 
    134144   spx_word32_t *Yh; 
     
    136146   spx_float_t Pyy; 
    137147   spx_word16_t *window; 
     148   spx_word16_t *prop; 
    138149   void *fft_table; 
    139150   spx_word16_t memX, memD, memE; 
     
    145156   spx_int16_t *play_buf; 
    146157   int play_buf_pos; 
     158   int play_buf_started; 
    147159}; 
    148160 
    149 PJ_INLINE(void) filter_dc_notch16(const spx_int16_t *in, spx_word16_t radius, spx_word16_t *out, int len, spx_mem_t *mem) 
     161static inline void filter_dc_notch16(const spx_int16_t *in, spx_word16_t radius, spx_word16_t *out, int len, spx_mem_t *mem) 
    150162{ 
    151163   int i; 
     
    171183} 
    172184 
    173 PJ_INLINE(spx_word32_t ) mdf_inner_prod(const spx_word16_t *x, const spx_word16_t *y, int len) 
     185static inline spx_word32_t mdf_inner_prod(const spx_word16_t *x, const spx_word16_t *y, int len) 
    174186{ 
    175187   spx_word32_t sum=0; 
     
    187199 
    188200/** Compute power spectrum of a half-complex (packed) vector */ 
    189 PJ_INLINE(void) power_spectrum(const spx_word16_t *X, spx_word32_t *ps, int N) 
     201static inline void power_spectrum(const spx_word16_t *X, spx_word32_t *ps, int N) 
    190202{ 
    191203   int i, j; 
     
    228240} 
    229241#else 
    230 PJ_INLINE(void) spectral_mul_accum(const spx_word16_t *X, const spx_word32_t *Y, spx_word16_t *acc, int N, int M) 
     242static inline void spectral_mul_accum(const spx_word16_t *X, const spx_word32_t *Y, spx_word16_t *acc, int N, int M) 
    231243{ 
    232244   int i,j; 
     
    249261 
    250262/** Compute weighted cross-power spectrum of a half-complex (packed) vector with conjugate */ 
    251 PJ_INLINE(void) weighted_spectral_mul_conj(const spx_float_t *w, const spx_word16_t *X, const spx_word16_t *Y, spx_word32_t *prod, int N) 
     263static inline void weighted_spectral_mul_conj(const spx_float_t *w, const spx_float_t p, const spx_word16_t *X, const spx_word16_t *Y, spx_word32_t *prod, int N) 
    252264{ 
    253265   int i, j; 
    254    prod[0] = FLOAT_MUL32(w[0],MULT16_16(X[0],Y[0])); 
     266   spx_float_t W; 
     267   W = FLOAT_AMULT(p, w[0]); 
     268   prod[0] = FLOAT_MUL32(W,MULT16_16(X[0],Y[0])); 
    255269   for (i=1,j=1;i<N-1;i+=2,j++) 
    256270   { 
    257       prod[i] = FLOAT_MUL32(w[j],MAC16_16(MULT16_16(X[i],Y[i]), X[i+1],Y[i+1])); 
    258       prod[i+1] = FLOAT_MUL32(w[j],MAC16_16(MULT16_16(-X[i+1],Y[i]), X[i],Y[i+1])); 
    259    } 
    260    prod[i] = FLOAT_MUL32(w[j],MULT16_16(X[i],Y[i])); 
     271      W = FLOAT_AMULT(p, w[j]); 
     272      prod[i] = FLOAT_MUL32(W,MAC16_16(MULT16_16(X[i],Y[i]), X[i+1],Y[i+1])); 
     273      prod[i+1] = FLOAT_MUL32(W,MAC16_16(MULT16_16(-X[i+1],Y[i]), X[i],Y[i+1])); 
     274   } 
     275   W = FLOAT_AMULT(p, w[j]); 
     276   prod[i] = FLOAT_MUL32(W,MULT16_16(X[i],Y[i])); 
    261277} 
    262278 
     
    274290   st->cancel_count=0; 
    275291   st->sum_adapt = 0; 
    276    /* FIXME: Make that an init option (new API call?) */ 
     292   st->saturated = 0; 
     293   st->screwed_up = 0; 
     294   /* This is the default sampling rate */ 
    277295   st->sampling_rate = 8000; 
    278296   st->spec_average = DIV32_16(SHL32(EXTEND32(st->frame_size), 15), st->sampling_rate); 
     
    284302   st->beta_max = (.5f*st->frame_size)/st->sampling_rate; 
    285303#endif 
     304   st->leak_estimate = 0; 
    286305 
    287306   st->fft_table = spx_fft_init(N); 
     
    289308   st->e = (spx_word16_t*)speex_alloc(N*sizeof(spx_word16_t)); 
    290309   st->x = (spx_word16_t*)speex_alloc(N*sizeof(spx_word16_t)); 
    291    st->d = (spx_word16_t*)speex_alloc(N*sizeof(spx_word16_t)); 
     310   st->input = (spx_word16_t*)speex_alloc(st->frame_size*sizeof(spx_word16_t)); 
    292311   st->y = (spx_word16_t*)speex_alloc(N*sizeof(spx_word16_t)); 
    293    st->Yps = (spx_word32_t*)speex_alloc(N*sizeof(spx_word32_t)); 
    294312   st->last_y = (spx_word16_t*)speex_alloc(N*sizeof(spx_word16_t)); 
    295313   st->Yf = (spx_word32_t*)speex_alloc((st->frame_size+1)*sizeof(spx_word32_t)); 
     
    299317   st->Eh = (spx_word32_t*)speex_alloc((st->frame_size+1)*sizeof(spx_word32_t)); 
    300318 
    301    st->X = (spx_word16_t*)speex_alloc(M*N*sizeof(spx_word16_t)); 
     319   st->X = (spx_word16_t*)speex_alloc((M+1)*N*sizeof(spx_word16_t)); 
    302320   st->Y = (spx_word16_t*)speex_alloc(N*sizeof(spx_word16_t)); 
    303321   st->E = (spx_word16_t*)speex_alloc(N*sizeof(spx_word16_t)); 
    304322   st->W = (spx_word32_t*)speex_alloc(M*N*sizeof(spx_word32_t)); 
    305    st->PHI = (spx_word32_t*)speex_alloc(M*N*sizeof(spx_word32_t)); 
     323   st->PHI = (spx_word32_t*)speex_alloc(N*sizeof(spx_word32_t)); 
    306324   st->power = (spx_word32_t*)speex_alloc((frame_size+1)*sizeof(spx_word32_t)); 
    307325   st->power_1 = (spx_float_t*)speex_alloc((frame_size+1)*sizeof(spx_float_t)); 
    308326   st->window = (spx_word16_t*)speex_alloc(N*sizeof(spx_word16_t)); 
     327   st->prop = (spx_word16_t*)speex_alloc(M*sizeof(spx_word16_t)); 
    309328   st->wtmp = (spx_word16_t*)speex_alloc(N*sizeof(spx_word16_t)); 
    310329#ifdef FIXED_POINT 
     
    319338      st->window[i] = .5-.5*cos(2*M_PI*i/N); 
    320339#endif 
     340   for (i=0;i<=st->frame_size;i++) 
     341      st->power_1[i] = FLOAT_ONE; 
    321342   for (i=0;i<N*M;i++) 
    322    { 
    323       st->W[i] = st->PHI[i] = 0; 
    324    } 
     343      st->W[i] = 0; 
     344   { 
     345      spx_word32_t sum = 0; 
     346      /* Ratio of ~10 between adaptation rate of first and last block */ 
     347      spx_word16_t decay = SHR32(spx_exp(NEG16(DIV32_16(QCONST16(2.4,11),M))),1); 
     348      st->prop[0] = QCONST16(.7, 15); 
     349      sum = EXTEND32(st->prop[0]); 
     350      for (i=1;i<M;i++) 
     351      { 
     352         st->prop[i] = MULT16_16_Q15(st->prop[i-1], decay); 
     353         sum = ADD32(sum, EXTEND32(st->prop[i])); 
     354      } 
     355      for (i=M-1;i>=0;i--) 
     356      { 
     357         st->prop[i] = DIV32(MULT16_16(QCONST16(.8,15), st->prop[i]),sum); 
     358      } 
     359   } 
     360    
    325361   st->memX=st->memD=st->memE=0; 
    326362   st->preemph = QCONST16(.9,15); 
     
    336372   st->Pey = st->Pyy = FLOAT_ONE; 
    337373    
    338    st->play_buf = (spx_int16_t*)speex_alloc(2*st->frame_size*sizeof(spx_int16_t)); 
    339    st->play_buf_pos = 0; 
    340  
     374   st->play_buf = (spx_int16_t*)speex_alloc((PLAYBACK_DELAY+1)*st->frame_size*sizeof(spx_int16_t)); 
     375   st->play_buf_pos = PLAYBACK_DELAY*st->frame_size; 
     376   st->play_buf_started = 0; 
     377    
    341378   return st; 
    342379} 
     
    347384   int i, M, N; 
    348385   st->cancel_count=0; 
     386   st->screwed_up = 0; 
    349387   N = st->window_size; 
    350388   M = st->M; 
    351389   for (i=0;i<N*M;i++) 
    352    { 
    353390      st->W[i] = 0; 
     391   for (i=0;i<N*(M+1);i++) 
    354392      st->X[i] = 0; 
    355    } 
    356393   for (i=0;i<=st->frame_size;i++) 
     394   { 
    357395      st->power[i] = 0; 
    358     
     396      st->power_1[i] = FLOAT_ONE; 
     397      st->Eh[i] = 0; 
     398      st->Yh[i] = 0; 
     399   } 
     400   for (i=0;i<st->frame_size;i++) 
     401   { 
     402      st->last_y[i] = 0; 
     403   } 
     404   for (i=0;i<N;i++) 
     405   { 
     406      st->E[i] = 0; 
     407      st->x[i] = 0; 
     408   } 
     409   st->notch_mem[0] = st->notch_mem[1] = 0; 
     410   st->memX=st->memD=st->memE=0; 
     411 
     412   st->saturated = 0; 
    359413   st->adapted = 0; 
    360414   st->sum_adapt = 0; 
    361415   st->Pey = st->Pyy = FLOAT_ONE; 
     416   for (i=0;i<3*st->frame_size;i++) 
     417      st->play_buf[i] = 0; 
     418   st->play_buf_pos = PLAYBACK_DELAY*st->frame_size; 
     419   st->play_buf_started = 0; 
    362420 
    363421} 
     
    370428   speex_free(st->e); 
    371429   speex_free(st->x); 
    372    speex_free(st->d); 
     430   speex_free(st->input); 
    373431   speex_free(st->y); 
    374432   speex_free(st->last_y); 
    375    speex_free(st->Yps); 
    376433   speex_free(st->Yf); 
    377434   speex_free(st->Rf); 
     
    388445   speex_free(st->power_1); 
    389446   speex_free(st->window); 
     447   speex_free(st->prop); 
    390448   speex_free(st->wtmp); 
    391449#ifdef FIXED_POINT 
     
    396454} 
    397455 
    398 void speex_echo_capture(SpeexEchoState *st, const spx_int16_t *rec, spx_int16_t *out, spx_int32_t *Yout) 
     456void speex_echo_capture(SpeexEchoState *st, const spx_int16_t *rec, spx_int16_t *out) 
    399457{ 
    400458   int i; 
     459   /*speex_warning_int("capture with fill level ", st->play_buf_pos/st->frame_size);*/ 
     460   st->play_buf_started = 1; 
    401461   if (st->play_buf_pos>=st->frame_size) 
    402462   { 
    403       speex_echo_cancel(st, rec, st->play_buf, out, Yout); 
     463      speex_echo_cancellation(st, rec, st->play_buf, out); 
    404464      st->play_buf_pos -= st->frame_size; 
    405       for (i=0;i<st->frame_size;i++) 
     465      for (i=0;i<st->play_buf_pos;i++) 
    406466         st->play_buf[i] = st->play_buf[i+st->frame_size]; 
    407467   } else { 
    408       speex_warning("no playback frame available"); 
     468      speex_warning("No playback frame available (your application is buggy and/or got xruns)"); 
    409469      if (st->play_buf_pos!=0) 
    410470      { 
     
    419479void speex_echo_playback(SpeexEchoState *st, const spx_int16_t *play) 
    420480{ 
    421    if (st->play_buf_pos<=st->frame_size) 
     481   /*speex_warning_int("playback with fill level ", st->play_buf_pos/st->frame_size);*/ 
     482   if (!st->play_buf_started) 
     483   { 
     484      speex_warning("discarded first playback frame"); 
     485      return; 
     486   } 
     487   if (st->play_buf_pos<=PLAYBACK_DELAY*st->frame_size) 
    422488   { 
    423489      int i; 
     
    425491         st->play_buf[st->play_buf_pos+i] = play[i]; 
    426492      st->play_buf_pos += st->frame_size; 
     493      if (st->play_buf_pos <= (PLAYBACK_DELAY-1)*st->frame_size) 
     494      { 
     495         speex_warning("Auto-filling the buffer (your application is buggy and/or got xruns)"); 
     496         for (i=0;i<st->frame_size;i++) 
     497            st->play_buf[st->play_buf_pos+i] = play[i]; 
     498         st->play_buf_pos += st->frame_size; 
     499      } 
    427500   } else { 
    428       speex_warning("had to discard a playback frame"); 
     501      speex_warning("Had to discard a playback frame (your application is buggy and/or got xruns)"); 
    429502   } 
    430503} 
    431504 
    432505/** Performs echo cancellation on a frame */ 
    433 void speex_echo_cancel(SpeexEchoState *st, const spx_int16_t *ref, const spx_int16_t *echo, spx_int16_t *out, spx_int32_t *Yout) 
     506void speex_echo_cancel(SpeexEchoState *st, const spx_int16_t *in, const spx_int16_t *far_end, spx_int16_t *out, spx_int32_t *Yout) 
     507{ 
     508   speex_echo_cancellation(st, in, far_end, out); 
     509} 
     510 
     511/** Performs echo cancellation on a frame (deprecated, last arg now ignored) */ 
     512void speex_echo_cancellation(SpeexEchoState *st, const spx_int16_t *in, const spx_int16_t *far_end, spx_int16_t *out) 
    434513{ 
    435514   int i,j; 
    436515   int N,M; 
    437    spx_word32_t Syy,See; 
    438    spx_word16_t leak_estimate; 
     516   spx_word32_t Syy,See,Sxx,Sdd; 
     517   spx_word32_t Sey; 
    439518   spx_word16_t ss, ss_1; 
    440519   spx_float_t Pey = FLOAT_ONE, Pyy=FLOAT_ONE; 
     
    442521   spx_word16_t RER; 
    443522   spx_word32_t tmp32; 
    444    spx_word16_t M_1; 
    445    int saturated=0; 
    446523    
    447524   N = st->window_size; 
     
    451528   ss=DIV32_16(11469,M); 
    452529   ss_1 = SUB16(32767,ss); 
    453    M_1 = DIV32_16(32767,M); 
    454530#else 
    455531   ss=.35/M; 
    456532   ss_1 = 1-ss; 
    457    M_1 = 1.f/M; 
    458 #endif 
    459  
    460    filter_dc_notch16(ref, st->notch_radius, st->d, st->frame_size, st->notch_mem); 
    461    /* Copy input data to buffer */ 
     533#endif 
     534 
     535   filter_dc_notch16(in, st->notch_radius, st->input, st->frame_size, st->notch_mem); 
     536   /* Copy input data to buffer and apply pre-emphasis */ 
    462537   for (i=0;i<st->frame_size;i++) 
    463538   { 
    464       spx_word16_t tmp; 
    465539      spx_word32_t tmp32; 
    466540      st->x[i] = st->x[i+st->frame_size]; 
    467       tmp32 = SUB32(EXTEND32(echo[i]), EXTEND32(MULT16_16_P15(st->preemph, st->memX))); 
    468 #ifdef FIXED_POINT 
    469       /*FIXME: If saturation occurs here, we need to freeze adaptation for M frames (not just one) */ 
     541      tmp32 = SUB32(EXTEND32(far_end[i]), EXTEND32(MULT16_16_P15(st->preemph, st->memX))); 
     542#ifdef FIXED_POINT 
     543      /* If saturation occurs here, we need to freeze adaptation for M+1 frames (not just one) */ 
    470544      if (tmp32 > 32767) 
    471545      { 
    472546         tmp32 = 32767; 
    473          saturated = 1; 
     547         st->saturated = M+1; 
     548      } 
     549      if (tmp32 < -32767) 
     550      { 
     551         tmp32 = -32767; 
     552         st->saturated = M+1; 
     553      }       
     554#endif 
     555      st->x[i+st->frame_size] = EXTRACT16(tmp32); 
     556      st->memX = far_end[i]; 
     557       
     558      tmp32 = SUB32(EXTEND32(st->input[i]), EXTEND32(MULT16_16_P15(st->preemph, st->memD))); 
     559#ifdef FIXED_POINT 
     560      if (tmp32 > 32767) 
     561      { 
     562         tmp32 = 32767; 
     563         if (st->saturated == 0) 
     564            st->saturated = 1; 
    474565      }       
    475566      if (tmp32 < -32767) 
    476567      { 
    477568         tmp32 = -32767; 
    478          saturated = 1; 
    479       }       
    480 #endif 
    481       st->x[i+st->frame_size] = EXTRACT16(tmp32); 
    482       st->memX = echo[i]; 
    483        
    484       tmp = st->d[i]; 
    485       st->d[i] = st->d[i+st->frame_size]; 
    486       tmp32 = SUB32(EXTEND32(tmp), EXTEND32(MULT16_16_P15(st->preemph, st->memD))); 
    487 #ifdef FIXED_POINT 
    488       if (tmp32 > 32767) 
    489       { 
    490          tmp32 = 32767; 
    491          saturated = 1; 
    492       }       
    493       if (tmp32 < -32767) 
    494       { 
    495          tmp32 = -32767; 
    496          saturated = 1; 
    497       } 
    498 #endif 
    499       st->d[i+st->frame_size] = tmp32; 
    500       st->memD = tmp; 
     569         if (st->saturated == 0) 
     570            st->saturated = 1; 
     571      } 
     572#endif 
     573      st->memD = st->input[i]; 
     574      st->input[i] = tmp32; 
    501575   } 
    502576 
    503577   /* Shift memory: this could be optimized eventually*/ 
    504    for (i=0;i<N*(M-1);i++) 
    505       st->X[i]=st->X[i+N]; 
    506  
    507    /* Convert x (echo input) to frequency domain */ 
    508    spx_fft(st->fft_table, st->x, &st->X[(M-1)*N]); 
    509     
     578   for (j=M-1;j>=0;j--) 
     579   { 
     580      for (i=0;i<N;i++) 
     581         st->X[(j+1)*N+i] = st->X[j*N+i]; 
     582   } 
     583 
     584   /* Convert x (far end) to frequency domain */ 
     585   spx_fft(st->fft_table, st->x, &st->X[0]); 
     586    
     587#ifdef SMOOTH_BLOCKS 
     588   spectral_mul_accum(st->X, st->W, st->Y, N, M);    
     589   spx_ifft(st->fft_table, st->Y, st->e); 
     590#endif 
     591 
     592   /* Compute weight gradient */ 
     593   if (st->saturated == 0) 
     594   { 
     595      for (j=M-1;j>=0;j--) 
     596      { 
     597         weighted_spectral_mul_conj(st->power_1, FLOAT_SHL(PSEUDOFLOAT(st->prop[j]),-15), &st->X[(j+1)*N], st->E, st->PHI, N); 
     598         for (i=0;i<N;i++) 
     599            st->W[j*N+i] = ADD32(st->W[j*N+i], st->PHI[i]); 
     600          
     601      } 
     602   } else { 
     603      st->saturated--; 
     604   } 
     605    
     606   /* Update weight to prevent circular convolution (MDF / AUMDF) */ 
     607   for (j=0;j<M;j++) 
     608   { 
     609      /* This is a variant of the Alternatively Updated MDF (AUMDF) */ 
     610      /* Remove the "if" to make this an MDF filter */ 
     611      if (j==0 || st->cancel_count%(M-1) == j-1) 
     612      { 
     613#ifdef FIXED_POINT 
     614         for (i=0;i<N;i++) 
     615            st->wtmp2[i] = EXTRACT16(PSHR32(st->W[j*N+i],NORMALIZE_SCALEDOWN+16)); 
     616         spx_ifft(st->fft_table, st->wtmp2, st->wtmp); 
     617         for (i=0;i<st->frame_size;i++) 
     618         { 
     619            st->wtmp[i]=0; 
     620         } 
     621         for (i=st->frame_size;i<N;i++) 
     622         { 
     623            st->wtmp[i]=SHL16(st->wtmp[i],NORMALIZE_SCALEUP); 
     624         } 
     625         spx_fft(st->fft_table, st->wtmp, st->wtmp2); 
     626         /* The "-1" in the shift is a sort of kludge that trades less efficient update speed for decrease noise */ 
     627         for (i=0;i<N;i++) 
     628            st->W[j*N+i] -= SHL32(EXTEND32(st->wtmp2[i]),16+NORMALIZE_SCALEDOWN-NORMALIZE_SCALEUP-1); 
     629#else 
     630         spx_ifft(st->fft_table, &st->W[j*N], st->wtmp); 
     631         for (i=st->frame_size;i<N;i++) 
     632         { 
     633            st->wtmp[i]=0; 
     634         } 
     635         spx_fft(st->fft_table, st->wtmp, &st->W[j*N]); 
     636#endif 
     637      } 
     638   } 
     639 
    510640   /* Compute filter response Y */ 
    511641   spectral_mul_accum(st->X, st->W, st->Y, N, M); 
    512     
    513642   spx_ifft(st->fft_table, st->Y, st->y); 
    514643 
    515 #if 1 
    516    spectral_mul_accum(st->X, st->PHI, st->Y, N, M);    
    517    spx_ifft(st->fft_table, st->Y, st->e); 
    518 #endif 
    519  
     644    
    520645   /* Compute error signal (for the output with de-emphasis) */  
    521646   for (i=0;i<st->frame_size;i++) 
    522647   { 
    523648      spx_word32_t tmp_out; 
    524 #if 1 
     649#ifdef SMOOTH_BLOCKS 
    525650      spx_word16_t y = MULT16_16_Q15(st->window[i+st->frame_size],st->e[i+st->frame_size]) + MULT16_16_Q15(st->window[i],st->y[i+st->frame_size]); 
    526       tmp_out = SUB32(EXTEND32(st->d[i+st->frame_size]), EXTEND32(y)); 
    527 #else 
    528       tmp_out = SUB32(EXTEND32(st->d[i+st->frame_size]), EXTEND32(st->y[i+st->frame_size])); 
     651      tmp_out = SUB32(EXTEND32(st->input[i]), EXTEND32(y)); 
     652#else 
     653      tmp_out = SUB32(EXTEND32(st->input[i]), EXTEND32(st->y[i+st->frame_size])); 
    529654#endif 
    530655 
     
    535660         tmp_out = -32768; 
    536661      tmp_out = ADD32(tmp_out, EXTEND32(MULT16_16_P15(st->preemph, st->memE))); 
    537       /* This is an arbitrary test for saturation */ 
    538       if (ref[i] <= -32000 || ref[i] >= 32000) 
     662      /* This is an arbitrary test for saturation in the microphone signal */ 
     663      if (in[i] <= -32000 || in[i] >= 32000) 
    539664      { 
    540665         tmp_out = 0; 
    541          saturated = 1; 
    542       } 
    543       out[i] = tmp_out; 
     666         if (st->saturated == 0) 
     667            st->saturated = 1; 
     668      } 
     669      out[i] = (spx_int16_t)tmp_out; 
    544670      st->memE = tmp_out; 
    545671   } 
     
    549675   { 
    550676      st->e[i] = 0; 
    551       st->e[i+st->frame_size] = st->d[i+st->frame_size] - st->y[i+st->frame_size]; 
     677      st->e[i+st->frame_size] = st->input[i] - st->y[i+st->frame_size]; 
    552678   } 
    553679 
    554680   /* Compute a bunch of correlations */ 
     681   Sey = mdf_inner_prod(st->e+st->frame_size, st->y+st->frame_size, st->frame_size); 
    555682   See = mdf_inner_prod(st->e+st->frame_size, st->e+st->frame_size, st->frame_size); 
    556    See = ADD32(See, SHR32(EXTEND32(10000),6)); 
    557683   Syy = mdf_inner_prod(st->y+st->frame_size, st->y+st->frame_size, st->frame_size); 
    558     
     684   Sxx = mdf_inner_prod(st->x+st->frame_size, st->x+st->frame_size, st->frame_size); 
     685   Sdd = mdf_inner_prod(st->input, st->input, st->frame_size); 
     686    
     687   /* Do some sanity check */ 
     688   if (!(Syy>=0 && Sxx>=0 && See >= 0) 
     689#ifndef FIXED_POINT 
     690       || !(See < N*1e9 && Syy < N*1e9 && Sxx < N*1e9) 
     691#endif 
     692      ) 
     693   { 
     694      /* Things have gone really bad */ 
     695      st->screwed_up += 50; 
     696      for (i=0;i<st->frame_size;i++) 
     697         out[i] = 0; 
     698   } else if (SHR32(See, 2) > ADD32(Sdd, SHR32(MULT16_16(N, 100),6))) 
     699   { 
     700      /* AEC seems to add lots of echo instead of removing it, let's see if it will improve */ 
     701      st->screwed_up++; 
     702   } else { 
     703      /* Everything's fine */ 
     704      st->screwed_up=0; 
     705   } 
     706   if (st->screwed_up>=50) 
     707   { 
     708      speex_warning("The echo canceller started acting funny and got slapped (reset). It swears it will behave now."); 
     709      speex_echo_state_reset(st); 
     710      return; 
     711   } 
     712 
     713   /* Add a small noise floor to make sure not to have problems when dividing */ 
     714   See = MAX32(See, SHR32(MULT16_16(N, 100),6)); 
     715 
    559716   /* Convert error to frequency domain */ 
    560717   spx_fft(st->fft_table, st->e, st->E); 
     
    563720   spx_fft(st->fft_table, st->y, st->Y); 
    564721 
    565    /* Compute power spectrum of echo (X), error (E) and filter response (Y) */ 
     722   /* Compute power spectrum of far end (X), error (E) and filter response (Y) */ 
    566723   power_spectrum(st->E, st->Rf, N); 
    567724   power_spectrum(st->Y, st->Yf, N); 
    568    power_spectrum(&st->X[(M-1)*N], st->Xf, N); 
    569     
    570    /* Smooth echo energy estimate over time */ 
     725   power_spectrum(st->X, st->Xf, N); 
     726    
     727   /* Smooth far end energy estimate over time */ 
    571728   for (j=0;j<=st->frame_size;j++) 
    572729      st->power[j] = MULT16_32_Q15(ss_1,st->power[j]) + 1 + MULT16_32_Q15(ss,st->Xf[j]); 
     
    578735      float scale2 = .5f/M; 
    579736      for (j=0;j<=st->frame_size;j++) 
    580          st->power[j] = 0; 
     737         st->power[j] = 100; 
    581738      for (i=0;i<M;i++) 
    582739      { 
     
    604761   } 
    605762    
     763   Pyy = FLOAT_SQRT(Pyy); 
     764   Pey = FLOAT_DIVU(Pey,Pyy); 
     765 
    606766   /* Compute correlation updatete rate */ 
    607767   tmp32 = MULT16_32_Q15(st->beta0,Syy); 
     
    621781      st->Pey = st->Pyy; 
    622782   /* leak_estimate is the linear regression result */ 
    623    leak_estimate = FLOAT_EXTRACT16(FLOAT_SHL(FLOAT_DIVU(st->Pey, st->Pyy),14)); 
     783   st->leak_estimate = FLOAT_EXTRACT16(FLOAT_SHL(FLOAT_DIVU(st->Pey, st->Pyy),14)); 
    624784   /* This looks like a stupid bug, but it's right (because we convert from Q14 to Q15) */ 
    625    if (leak_estimate > 16383) 
    626       leak_estimate = 32767; 
     785   if (st->leak_estimate > 16383) 
     786      st->leak_estimate = 32767; 
    627787   else 
    628       leak_estimate = SHL16(leak_estimate,1); 
    629    /*printf ("%f\n", leak_estimate);*/ 
     788      st->leak_estimate = SHL16(st->leak_estimate,1); 
     789   /*printf ("%f\n", st->leak_estimate);*/ 
    630790    
    631791   /* Compute Residual to Error Ratio */ 
    632792#ifdef FIXED_POINT 
    633    tmp32 = MULT16_32_Q15(leak_estimate,Syy); 
    634    tmp32 = ADD32(tmp32, SHL32(tmp32,1)); 
     793   tmp32 = MULT16_32_Q15(st->leak_estimate,Syy); 
     794   tmp32 = ADD32(SHR32(Sxx,13), ADD32(tmp32, SHL32(tmp32,1))); 
     795   /* Check for y in e (lower bound on RER) */ 
     796   { 
     797      spx_float_t bound = PSEUDOFLOAT(Sey); 
     798      bound = FLOAT_DIVU(FLOAT_MULT(bound, bound), PSEUDOFLOAT(ADD32(1,Syy))); 
     799      if (FLOAT_GT(bound, PSEUDOFLOAT(See))) 
     800         tmp32 = See; 
     801      else if (tmp32 < FLOAT_EXTRACT32(bound)) 
     802         tmp32 = FLOAT_EXTRACT32(bound); 
     803   } 
    635804   if (tmp32 > SHR32(See,1)) 
    636805      tmp32 = SHR32(See,1); 
    637806   RER = FLOAT_EXTRACT16(FLOAT_SHL(FLOAT_DIV32(tmp32,See),15)); 
    638807#else 
    639    RER = 3.*MULT16_32_Q15(leak_estimate,Syy) / See; 
     808   RER = (.0001*Sxx + 3.*MULT16_32_Q15(st->leak_estimate,Syy)) / See; 
     809   /* Check for y in e (lower bound on RER) */ 
     810   if (RER < Sey*Sey/(1+See*Syy)) 
     811      RER = Sey*Sey/(1+See*Syy); 
    640812   if (RER > .5) 
    641813      RER = .5; 
     
    643815 
    644816   /* We consider that the filter has had minimal adaptation if the following is true*/ 
    645    if (!st->adapted && st->sum_adapt > QCONST32(1,15)) 
     817   if (!st->adapted && st->sum_adapt > QCONST32(M,15)) 
    646818   { 
    647819      st->adapted = 1; 
     
    654826         spx_word32_t r, e; 
    655827         /* Compute frequency-domain adaptation mask */ 
    656          r = MULT16_32_Q15(leak_estimate,SHL32(st->Yf[i],3)); 
     828         r = MULT16_32_Q15(st->leak_estimate,SHL32(st->Yf[i],3)); 
    657829         e = SHL32(st->Rf[i],3)+1; 
    658830#ifdef FIXED_POINT 
     
    663835            r = .5*e; 
    664836#endif 
    665          r = MULT16_32_Q15(QCONST16(.8,15),r) + MULT16_32_Q15(QCONST16(.2,15),(spx_word32_t)(MULT16_32_Q15(RER,e))); 
     837         r = MULT16_32_Q15(QCONST16(.7,15),r) + MULT16_32_Q15(QCONST16(.3,15),(spx_word32_t)(MULT16_32_Q15(RER,e))); 
    666838         /*st->power_1[i] = adapt_rate*r/(e*(1+st->power[i]));*/ 
    667          st->power_1[i] = FLOAT_SHL(FLOAT_DIV32_FLOAT(MULT16_32_Q15(M_1,r),FLOAT_MUL32U(e,st->power[i]+10)),WEIGHT_SHIFT+16); 
     839         st->power_1[i] = FLOAT_SHL(FLOAT_DIV32_FLOAT(r,FLOAT_MUL32U(e,st->power[i]+10)),WEIGHT_SHIFT+16); 
    668840      } 
    669841   } else { 
    670       spx_word32_t Sxx; 
     842      /* Temporary adaption rate if filter is not yet adapted enough */ 
    671843      spx_word16_t adapt_rate=0; 
    672844 
    673       Sxx = mdf_inner_prod(st->x+st->frame_size, st->x+st->frame_size, st->frame_size); 
    674       /* Temporary adaption rate if filter is not adapted correctly */ 
    675  
    676       tmp32 = MULT16_32_Q15(QCONST16(.15f, 15), Sxx); 
    677 #ifdef FIXED_POINT 
    678       if (Sxx > SHR32(See,2)) 
    679          Sxx = SHR32(See,2); 
    680 #else 
    681       if (Sxx > .25*See) 
    682          Sxx = .25*See;       
    683 #endif 
    684       adapt_rate = FLOAT_EXTRACT16(FLOAT_SHL(FLOAT_DIV32(MULT16_32_Q15(M_1,Sxx), See),15)); 
    685        
     845      if (Sxx > SHR32(MULT16_16(N, 1000),6))  
     846      { 
     847         tmp32 = MULT16_32_Q15(QCONST16(.25f, 15), Sxx); 
     848#ifdef FIXED_POINT 
     849         if (tmp32 > SHR32(See,2)) 
     850            tmp32 = SHR32(See,2); 
     851#else 
     852         if (tmp32 > .25*See) 
     853            tmp32 = .25*See; 
     854#endif 
     855         adapt_rate = FLOAT_EXTRACT16(FLOAT_SHL(FLOAT_DIV32(tmp32, See),15)); 
     856      } 
    686857      for (i=0;i<=st->frame_size;i++) 
    687858         st->power_1[i] = FLOAT_SHL(FLOAT_DIV32(EXTEND32(adapt_rate),ADD32(st->power[i],10)),WEIGHT_SHIFT+1); 
     
    691862      st->sum_adapt = ADD32(st->sum_adapt,adapt_rate); 
    692863   } 
    693    /* Compute weight gradient */ 
    694    for (j=0;j<M;j++) 
    695    { 
    696       weighted_spectral_mul_conj(st->power_1, &st->X[j*N], st->E, st->PHI+N*j, N); 
    697    } 
    698  
    699    if (!saturated) 
    700    { 
    701       /* Gradient descent */ 
    702       for (i=0;i<M*N;i++) 
    703       { 
    704          st->W[i] += st->PHI[i]; 
    705          /* Old value of W in PHI */ 
    706          st->PHI[i] = st->W[i] - st->PHI[i]; 
    707       } 
    708    } 
    709     
    710    /* Update weight to prevent circular convolution (MDF / AUMDF) */ 
    711    for (j=0;j<M;j++) 
    712    { 
    713       /* This is a variant of the Alternatively Updated MDF (AUMDF) */ 
    714       /* Remove the "if" to make this an MDF filter */ 
    715       if (j==M-1 || st->cancel_count%(M-1) == j) 
    716       { 
    717 #ifdef FIXED_POINT 
    718          for (i=0;i<N;i++) 
    719             st->wtmp2[i] = EXTRACT16(PSHR32(st->W[j*N+i],NORMALIZE_SCALEDOWN+16)); 
    720          spx_ifft(st->fft_table, st->wtmp2, st->wtmp); 
    721          for (i=0;i<st->frame_size;i++) 
    722          { 
    723             st->wtmp[i]=0; 
    724          } 
    725          for (i=st->frame_size;i<N;i++) 
    726          { 
    727             st->wtmp[i]=SHL16(st->wtmp[i],NORMALIZE_SCALEUP); 
    728          } 
    729          spx_fft(st->fft_table, st->wtmp, st->wtmp2); 
    730          /* The "-1" in the shift is a sort of kludge that trades less efficient update speed for decrease noise */ 
    731          for (i=0;i<N;i++) 
    732             st->W[j*N+i] -= SHL32(EXTEND32(st->wtmp2[i]),16+NORMALIZE_SCALEDOWN-NORMALIZE_SCALEUP-1); 
    733 #else 
    734          spx_ifft(st->fft_table, &st->W[j*N], st->wtmp); 
    735          for (i=st->frame_size;i<N;i++) 
    736          { 
    737             st->wtmp[i]=0; 
    738          } 
    739          spx_fft(st->fft_table, st->wtmp, &st->W[j*N]); 
    740 #endif 
    741       } 
    742    } 
    743  
    744    /* Compute spectrum of estimated echo for use in an echo post-filter (if necessary)*/ 
    745    if (Yout) 
    746    { 
    747       spx_word16_t leak2; 
    748       if (st->adapted) 
    749       { 
    750          /* If the filter is adapted, take the filtered echo */ 
    751          for (i=0;i<st->frame_size;i++) 
    752             st->last_y[i] = st->last_y[st->frame_size+i]; 
    753          for (i=0;i<st->frame_size;i++) 
    754             st->last_y[st->frame_size+i] = ref[i]-out[i]; 
    755       } else { 
    756          /* If filter isn't adapted yet, all we can do is take the echo signal directly */ 
    757          for (i=0;i<N;i++) 
    758             st->last_y[i] = st->x[i]; 
    759       } 
     864 
     865   /* Save residual echo so it can be used by the nonlinear processor */ 
     866   if (st->adapted) 
     867   { 
     868      /* If the filter is adapted, take the filtered echo */ 
     869      for (i=0;i<st->frame_size;i++) 
     870         st->last_y[i] = st->last_y[st->frame_size+i]; 
     871      for (i=0;i<st->frame_size;i++) 
     872         st->last_y[st->frame_size+i] = in[i]-out[i]; 
     873   } else { 
     874      /* If filter isn't adapted yet, all we can do is take the far end signal directly */ 
     875      for (i=0;i<N;i++) 
     876         st->last_y[i] = st->x[i]; 
     877   } 
     878 
     879} 
     880 
     881/* Compute spectrum of estimated echo for use in an echo post-filter */ 
     882void speex_echo_get_residual(SpeexEchoState *st, spx_word32_t *residual_echo, int len) 
     883{ 
     884   int i; 
     885   spx_word16_t leak2; 
     886   int N; 
     887    
     888   N = st->window_size; 
     889 
     890   /* Apply hanning window (should pre-compute it)*/ 
     891   for (i=0;i<N;i++) 
     892      st->y[i] = MULT16_16_Q15(st->window[i],st->last_y[i]); 
    760893       
    761       /* Apply hanning window (should pre-compute it)*/ 
    762       for (i=0;i<N;i++) 
    763          st->y[i] = MULT16_16_Q15(st->window[i],st->last_y[i]); 
     894   /* Compute power spectrum of the echo */ 
     895   spx_fft(st->fft_table, st->y, st->Y); 
     896   power_spectrum(st->Y, residual_echo, N); 
    764897       
    765       /* Compute power spectrum of the echo */ 
    766       spx_fft(st->fft_table, st->y, st->Y); 
    767       power_spectrum(st->Y, st->Yps, N); 
    768        
    769 #ifdef FIXED_POINT 
    770       if (leak_estimate > 16383) 
    771          leak2 = 32767; 
    772       else 
    773          leak2 = SHL16(leak_estimate, 1); 
    774 #else 
    775       if (leak_estimate>.5) 
    776          leak2 = 1; 
    777       else 
    778          leak2 = 2*leak_estimate; 
    779 #endif 
    780       /* Estimate residual echo */ 
    781       for (i=0;i<=st->frame_size;i++) 
    782          Yout[i] = MULT16_32_Q15(leak2,st->Yps[i]); 
    783    } 
    784 } 
    785  
     898#ifdef FIXED_POINT 
     899   if (st->leak_estimate > 16383) 
     900      leak2 = 32767; 
     901   else 
     902      leak2 = SHL16(st->leak_estimate, 1); 
     903#else 
     904   if (st->leak_estimate>.5) 
     905      leak2 = 1; 
     906   else 
     907      leak2 = 2*st->leak_estimate; 
     908#endif 
     909   /* Estimate residual echo */ 
     910   for (i=0;i<=st->frame_size;i++) 
     911      residual_echo[i] = (spx_int32_t)MULT16_32_Q15(leak2,residual_echo[i]); 
     912    
     913} 
    786914 
    787915int speex_echo_ctl(SpeexEchoState *st, int request, void *ptr) 
Note: See TracChangeset for help on using the changeset viewer.