Ignore:
Timestamp:
Aug 9, 2008 5:40:22 AM (16 years ago)
Author:
bennylp
Message:

Ticket #588: Improvements to echo cancellation framework

File:
1 edited

Legend:

Unmodified
Added
Removed
  • pjproject/trunk/third_party/speex/libspeex/mdf.c

    r2184 r2198  
    1 /* Copyright (C) 2003-2008 Jean-Marc Valin 
     1/* Copyright (C) 2003-2006 Jean-Marc Valin 
    22 
    33   File: mdf.c 
     
    8989#endif 
    9090 
    91 #ifdef FIXED_POINT 
    92 #define WORD2INT(x) ((x) < -32767 ? -32768 : ((x) > 32766 ? 32767 : (x)))   
    93 #else 
    94 #define WORD2INT(x) ((x) < -32767.5f ? -32768 : ((x) > 32766.5f ? 32767 : floor(.5+(x))))   
    95 #endif 
    96  
    9791/* If enabled, the AEC will use a foreground filter and a background filter to be more robust to double-talk 
    9892   and difficult signals in general. The cost is an extra FFT and a matrix-vector multiply */ 
     
    138132   int saturated; 
    139133   int screwed_up; 
    140    int C;                    /** Number of input channels (microphones) */ 
    141    int K;                    /** Number of output channels (loudspeakers) */ 
    142134   spx_int32_t sampling_rate; 
    143135   spx_word16_t spec_average; 
     
    180172   spx_word16_t *prop; 
    181173   void *fft_table; 
    182    spx_word16_t *memX, *memD, *memE; 
     174   spx_word16_t memX, memD, memE; 
    183175   spx_word16_t preemph; 
    184176   spx_word16_t notch_radius; 
    185    spx_mem_t *notch_mem; 
     177   spx_mem_t notch_mem[2]; 
    186178 
    187179   /* NOTE: If you only use speex_echo_cancel() and want to save some memory, remove this */ 
     
    191183}; 
    192184 
    193 static inline void filter_dc_notch16(const spx_int16_t *in, spx_word16_t radius, spx_word16_t *out, int len, spx_mem_t *mem, int stride) 
     185static inline void filter_dc_notch16(const spx_int16_t *in, spx_word16_t radius, spx_word16_t *out, int len, spx_mem_t *mem) 
    194186{ 
    195187   int i; 
     
    203195   for (i=0;i<len;i++) 
    204196   { 
    205       spx_word16_t vin = in[i*stride]; 
     197      spx_word16_t vin = in[i]; 
    206198      spx_word32_t vout = mem[0] + SHL32(EXTEND32(vin),15); 
    207199#ifdef FIXED_POINT 
     
    243235} 
    244236 
    245 /** Compute power spectrum of a half-complex (packed) vector and accumulate */ 
    246 static inline void power_spectrum_accum(const spx_word16_t *X, spx_word32_t *ps, int N) 
    247 { 
    248    int i, j; 
    249    ps[0]+=MULT16_16(X[0],X[0]); 
    250    for (i=1,j=1;i<N-1;i+=2,j++) 
    251    { 
    252       ps[j] +=  MULT16_16(X[i],X[i]) + MULT16_16(X[i+1],X[i+1]); 
    253    } 
    254    ps[j]+=MULT16_16(X[i],X[i]); 
    255 } 
    256  
    257237/** Compute cross-power spectrum of a half-complex (packed) vectors and add to acc */ 
    258238#ifdef FIXED_POINT 
     
    351331} 
    352332 
    353 static inline void mdf_adjust_prop(const spx_word32_t *W, int N, int M, int P, spx_word16_t *prop) 
    354 { 
    355    int i, j, p; 
     333static inline void mdf_adjust_prop(const spx_word32_t *W, int N, int M, spx_word16_t *prop) 
     334{ 
     335   int i, j; 
    356336   spx_word16_t max_sum = 1; 
    357337   spx_word32_t prop_sum = 1; 
     
    359339   { 
    360340      spx_word32_t tmp = 1; 
    361       for (p=0;p<P;p++) 
    362          for (j=0;j<N;j++) 
    363             tmp += MULT16_16(EXTRACT16(SHR32(W[p*N*M + i*N+j],18)), EXTRACT16(SHR32(W[p*N*M + i*N+j],18))); 
     341      for (j=0;j<N;j++) 
     342         tmp += MULT16_16(EXTRACT16(SHR32(W[i*N+j],18)), EXTRACT16(SHR32(W[i*N+j],18))); 
    364343#ifdef FIXED_POINT 
    365344      /* Just a security in case an overflow were to occur */ 
     
    400379 
    401380/** Creates a new echo canceller state */ 
    402 EXPORT SpeexEchoState *speex_echo_state_init(int frame_size, int filter_length) 
    403 { 
    404    return speex_echo_state_init_mc(frame_size, filter_length, 1, 1); 
    405 } 
    406  
    407 EXPORT SpeexEchoState *speex_echo_state_init_mc(int frame_size, int filter_length, int nb_mic, int nb_speakers) 
    408 { 
    409    int i,N,M, C, K; 
     381SpeexEchoState *speex_echo_state_init(int frame_size, int filter_length) 
     382{ 
     383   int i,N,M; 
    410384   SpeexEchoState *st = (SpeexEchoState *)speex_alloc(sizeof(SpeexEchoState)); 
    411385 
    412    st->K = nb_speakers; 
    413    st->C = nb_mic; 
    414    C=st->C; 
    415    K=st->K; 
    416386#ifdef DUMP_ECHO_CANCEL_DATA 
    417387   if (rFile || pFile || oFile) 
     
    444414   st->fft_table = spx_fft_init(N); 
    445415    
    446    st->e = (spx_word16_t*)speex_alloc(C*N*sizeof(spx_word16_t)); 
    447    st->x = (spx_word16_t*)speex_alloc(K*N*sizeof(spx_word16_t)); 
    448    st->input = (spx_word16_t*)speex_alloc(C*st->frame_size*sizeof(spx_word16_t)); 
    449    st->y = (spx_word16_t*)speex_alloc(C*N*sizeof(spx_word16_t)); 
    450    st->last_y = (spx_word16_t*)speex_alloc(C*N*sizeof(spx_word16_t)); 
     416   st->e = (spx_word16_t*)speex_alloc(N*sizeof(spx_word16_t)); 
     417   st->x = (spx_word16_t*)speex_alloc(N*sizeof(spx_word16_t)); 
     418   st->input = (spx_word16_t*)speex_alloc(st->frame_size*sizeof(spx_word16_t)); 
     419   st->y = (spx_word16_t*)speex_alloc(N*sizeof(spx_word16_t)); 
     420   st->last_y = (spx_word16_t*)speex_alloc(N*sizeof(spx_word16_t)); 
    451421   st->Yf = (spx_word32_t*)speex_alloc((st->frame_size+1)*sizeof(spx_word32_t)); 
    452422   st->Rf = (spx_word32_t*)speex_alloc((st->frame_size+1)*sizeof(spx_word32_t)); 
     
    455425   st->Eh = (spx_word32_t*)speex_alloc((st->frame_size+1)*sizeof(spx_word32_t)); 
    456426 
    457    st->X = (spx_word16_t*)speex_alloc(K*(M+1)*N*sizeof(spx_word16_t)); 
    458    st->Y = (spx_word16_t*)speex_alloc(C*N*sizeof(spx_word16_t)); 
    459    st->E = (spx_word16_t*)speex_alloc(C*N*sizeof(spx_word16_t)); 
    460    st->W = (spx_word32_t*)speex_alloc(C*K*M*N*sizeof(spx_word32_t)); 
     427   st->X = (spx_word16_t*)speex_alloc((M+1)*N*sizeof(spx_word16_t)); 
     428   st->Y = (spx_word16_t*)speex_alloc(N*sizeof(spx_word16_t)); 
     429   st->E = (spx_word16_t*)speex_alloc(N*sizeof(spx_word16_t)); 
     430   st->W = (spx_word32_t*)speex_alloc(M*N*sizeof(spx_word32_t)); 
    461431#ifdef TWO_PATH 
    462    st->foreground = (spx_word16_t*)speex_alloc(M*N*C*K*sizeof(spx_word16_t)); 
     432   st->foreground = (spx_word16_t*)speex_alloc(M*N*sizeof(spx_word16_t)); 
    463433#endif 
    464434   st->PHI = (spx_word32_t*)speex_alloc(N*sizeof(spx_word32_t)); 
     
    481451   for (i=0;i<=st->frame_size;i++) 
    482452      st->power_1[i] = FLOAT_ONE; 
    483    for (i=0;i<N*M*K*C;i++) 
     453   for (i=0;i<N*M;i++) 
    484454      st->W[i] = 0; 
    485455   { 
     
    496466      for (i=M-1;i>=0;i--) 
    497467      { 
    498          st->prop[i] = DIV32(MULT16_16(QCONST16(.8f,15), st->prop[i]),sum); 
    499       } 
    500    } 
    501     
    502    st->memX = (spx_word16_t*)speex_alloc(K*sizeof(spx_word16_t)); 
    503    st->memD = (spx_word16_t*)speex_alloc(C*sizeof(spx_word16_t)); 
    504    st->memE = (spx_word16_t*)speex_alloc(C*sizeof(spx_word16_t)); 
     468         st->prop[i] = DIV32(MULT16_16(QCONST16(.8,15), st->prop[i]),sum); 
     469      } 
     470   } 
     471    
     472   st->memX=st->memD=st->memE=0; 
    505473   st->preemph = QCONST16(.9,15); 
    506474   if (st->sampling_rate<12000) 
     
    511479      st->notch_radius = QCONST16(.992, 15); 
    512480 
    513    st->notch_mem = (spx_mem_t*)speex_alloc(2*C*sizeof(spx_mem_t)); 
     481   st->notch_mem[0] = st->notch_mem[1] = 0; 
    514482   st->adapted = 0; 
    515483   st->Pey = st->Pyy = FLOAT_ONE; 
     
    520488#endif 
    521489    
    522    st->play_buf = (spx_int16_t*)speex_alloc(K*(PLAYBACK_DELAY+1)*st->frame_size*sizeof(spx_int16_t)); 
     490   st->play_buf = (spx_int16_t*)speex_alloc((PLAYBACK_DELAY+1)*st->frame_size*sizeof(spx_int16_t)); 
    523491   st->play_buf_pos = PLAYBACK_DELAY*st->frame_size; 
    524492   st->play_buf_started = 0; 
     
    528496 
    529497/** Resets echo canceller state */ 
    530 EXPORT void speex_echo_state_reset(SpeexEchoState *st) 
    531 { 
    532    int i, M, N, C, K; 
     498void speex_echo_state_reset(SpeexEchoState *st) 
     499{ 
     500   int i, M, N; 
    533501   st->cancel_count=0; 
    534502   st->screwed_up = 0; 
    535503   N = st->window_size; 
    536504   M = st->M; 
    537    C=st->C; 
    538    K=st->K; 
    539505   for (i=0;i<N*M;i++) 
    540506      st->W[i] = 0; 
     
    556522      st->last_y[i] = 0; 
    557523   } 
    558    for (i=0;i<N*C;i++) 
     524   for (i=0;i<N;i++) 
    559525   { 
    560526      st->E[i] = 0; 
    561    } 
    562    for (i=0;i<N*K;i++) 
    563    { 
    564527      st->x[i] = 0; 
    565528   } 
    566    for (i=0;i<2*C;i++) 
    567       st->notch_mem[i] = 0; 
    568    for (i=0;i<C;i++) 
    569       st->memD[i]=st->memE[i]=0; 
    570    for (i=0;i<K;i++) 
    571       st->memX[i]=0; 
     529   st->notch_mem[0] = st->notch_mem[1] = 0; 
     530   st->memX=st->memD=st->memE=0; 
    572531 
    573532   st->saturated = 0; 
     
    587546 
    588547/** Destroys an echo canceller state */ 
    589 EXPORT void speex_echo_state_destroy(SpeexEchoState *st) 
     548void speex_echo_state_destroy(SpeexEchoState *st) 
    590549{ 
    591550   spx_fft_destroy(st->fft_table); 
     
    618577   speex_free(st->wtmp2); 
    619578#endif 
    620    speex_free(st->memX); 
    621    speex_free(st->memD); 
    622    speex_free(st->memE); 
    623    speex_free(st->notch_mem); 
    624  
    625579   speex_free(st->play_buf); 
    626580   speex_free(st); 
     
    634588} 
    635589 
    636 EXPORT void speex_echo_capture(SpeexEchoState *st, const spx_int16_t *rec, spx_int16_t *out) 
     590void speex_echo_capture(SpeexEchoState *st, const spx_int16_t *rec, spx_int16_t *out) 
    637591{ 
    638592   int i; 
     
    657611} 
    658612 
    659 EXPORT void speex_echo_playback(SpeexEchoState *st, const spx_int16_t *play) 
     613void speex_echo_playback(SpeexEchoState *st, const spx_int16_t *play) 
    660614{ 
    661615   /*speex_warning_int("playback with fill level ", st->play_buf_pos/st->frame_size);*/ 
     
    684638 
    685639/** Performs echo cancellation on a frame (deprecated, last arg now ignored) */ 
    686 EXPORT void speex_echo_cancel(SpeexEchoState *st, const spx_int16_t *in, const spx_int16_t *far_end, spx_int16_t *out, spx_int32_t *Yout) 
     640void speex_echo_cancel(SpeexEchoState *st, const spx_int16_t *in, const spx_int16_t *far_end, spx_int16_t *out, spx_int32_t *Yout) 
    687641{ 
    688642   speex_echo_cancellation(st, in, far_end, out); 
     
    690644 
    691645/** Performs echo cancellation on a frame */ 
    692 EXPORT void speex_echo_cancellation(SpeexEchoState *st, const spx_int16_t *in, const spx_int16_t *far_end, spx_int16_t *out) 
    693 { 
    694    int i,j, chan, speak; 
    695    int N,M, C, K; 
     646void speex_echo_cancellation(SpeexEchoState *st, const spx_int16_t *in, const spx_int16_t *far_end, spx_int16_t *out) 
     647{ 
     648   int i,j; 
     649   int N,M; 
    696650   spx_word32_t Syy,See,Sxx,Sdd, Sff; 
    697651#ifdef TWO_PATH 
     
    708662   N = st->window_size; 
    709663   M = st->M; 
    710    C = st->C; 
    711    K = st->K; 
    712  
    713664   st->cancel_count++; 
    714665#ifdef FIXED_POINT 
     
    720671#endif 
    721672 
    722    for (chan = 0; chan < C; chan++) 
    723    { 
    724       /* Apply a notch filter to make sure DC doesn't end up causing problems */ 
    725       filter_dc_notch16(in+chan, st->notch_radius, st->input+chan*st->frame_size, st->frame_size, st->notch_mem+2*chan, C); 
    726       /* Copy input data to buffer and apply pre-emphasis */ 
    727       /* Copy input data to buffer */ 
    728       for (i=0;i<st->frame_size;i++) 
    729       { 
    730          spx_word32_t tmp32; 
    731          /* FIXME: This core has changed a bit, need to merge properly */ 
    732          tmp32 = SUB32(EXTEND32(st->input[chan*st->frame_size+i]), EXTEND32(MULT16_16_P15(st->preemph, st->memD[chan]))); 
    733 #ifdef FIXED_POINT 
    734          if (tmp32 > 32767) 
    735          { 
    736             tmp32 = 32767; 
    737             if (st->saturated == 0) 
    738                st->saturated = 1; 
    739          }       
    740          if (tmp32 < -32767) 
    741          { 
    742             tmp32 = -32767; 
    743             if (st->saturated == 0) 
    744                st->saturated = 1; 
    745          } 
    746 #endif 
    747          st->memD[chan] = st->input[chan*st->frame_size+i]; 
    748          st->input[chan*st->frame_size+i] = EXTRACT16(tmp32); 
    749       } 
    750    } 
    751  
    752    for (speak = 0; speak < K; speak++) 
    753    { 
    754       for (i=0;i<st->frame_size;i++) 
    755       { 
    756          spx_word32_t tmp32; 
    757          st->x[speak*N+i] = st->x[speak*N+i+st->frame_size]; 
    758          tmp32 = SUB32(EXTEND32(far_end[i*K+speak]), EXTEND32(MULT16_16_P15(st->preemph, st->memX[speak]))); 
    759 #ifdef FIXED_POINT 
    760          /*FIXME: If saturation occurs here, we need to freeze adaptation for M frames (not just one) */ 
    761          if (tmp32 > 32767) 
    762          { 
    763             tmp32 = 32767; 
    764             st->saturated = M+1; 
    765          }       
    766          if (tmp32 < -32767) 
    767          { 
    768             tmp32 = -32767; 
    769             st->saturated = M+1; 
    770          }       
    771 #endif 
    772          st->x[speak*N+i+st->frame_size] = EXTRACT16(tmp32); 
    773          st->memX[speak] = far_end[i*K+speak]; 
    774       } 
    775    }    
    776     
    777    for (speak = 0; speak < K; speak++) 
    778    { 
    779       /* Shift memory: this could be optimized eventually*/ 
    780       for (j=M-1;j>=0;j--) 
    781       { 
    782          for (i=0;i<N;i++) 
    783             st->X[(j+1)*N*K+speak*N+i] = st->X[j*N*K+speak*N+i]; 
    784       } 
    785       /* Convert x (echo input) to frequency domain */ 
    786       spx_fft(st->fft_table, st->x+speak*N, &st->X[speak*N]); 
    787    } 
    788     
    789    Sxx = 0; 
    790    for (speak = 0; speak < K; speak++) 
    791    { 
    792       Sxx += mdf_inner_prod(st->x+speak*N+st->frame_size, st->x+speak*N+st->frame_size, st->frame_size); 
    793       power_spectrum_accum(st->X+speak*N, st->Xf, N); 
    794    } 
    795     
    796    Sff = 0;   
    797    for (chan = 0; chan < C; chan++) 
    798    { 
     673   /* Apply a notch filter to make sure DC doesn't end up causing problems */ 
     674   filter_dc_notch16(in, st->notch_radius, st->input, st->frame_size, st->notch_mem); 
     675   /* Copy input data to buffer and apply pre-emphasis */ 
     676   for (i=0;i<st->frame_size;i++) 
     677   { 
     678      spx_word32_t tmp32; 
     679      tmp32 = SUB32(EXTEND32(far_end[i]), EXTEND32(MULT16_16_P15(st->preemph, st->memX))); 
     680#ifdef FIXED_POINT 
     681      /* If saturation occurs here, we need to freeze adaptation for M+1 frames (not just one) */ 
     682      if (tmp32 > 32767) 
     683      { 
     684         tmp32 = 32767; 
     685         st->saturated = M+1; 
     686      } 
     687      if (tmp32 < -32767) 
     688      { 
     689         tmp32 = -32767; 
     690         st->saturated = M+1; 
     691      }       
     692#endif 
     693      st->x[i+st->frame_size] = EXTRACT16(tmp32); 
     694      st->memX = far_end[i]; 
     695       
     696      tmp32 = SUB32(EXTEND32(st->input[i]), EXTEND32(MULT16_16_P15(st->preemph, st->memD))); 
     697#ifdef FIXED_POINT 
     698      if (tmp32 > 32767) 
     699      { 
     700         tmp32 = 32767; 
     701         if (st->saturated == 0) 
     702            st->saturated = 1; 
     703      }       
     704      if (tmp32 < -32767) 
     705      { 
     706         tmp32 = -32767; 
     707         if (st->saturated == 0) 
     708            st->saturated = 1; 
     709      } 
     710#endif 
     711      st->memD = st->input[i]; 
     712      st->input[i] = tmp32; 
     713   } 
     714 
     715   /* Shift memory: this could be optimized eventually*/ 
     716   for (j=M-1;j>=0;j--) 
     717   { 
     718      for (i=0;i<N;i++) 
     719         st->X[(j+1)*N+i] = st->X[j*N+i]; 
     720   } 
     721 
     722   /* Convert x (far end) to frequency domain */ 
     723   spx_fft(st->fft_table, st->x, &st->X[0]); 
     724   for (i=0;i<N;i++) 
     725      st->last_y[i] = st->x[i]; 
     726   Sxx = mdf_inner_prod(st->x+st->frame_size, st->x+st->frame_size, st->frame_size); 
     727   for (i=0;i<st->frame_size;i++) 
     728      st->x[i] = st->x[i+st->frame_size]; 
     729   /* From here on, the top part of x is used as scratch space */ 
     730    
    799731#ifdef TWO_PATH 
    800       /* Compute foreground filter */ 
    801       spectral_mul_accum16(st->X, st->foreground+chan*N*K*M, st->Y+chan*N, N, M*K); 
    802       spx_ifft(st->fft_table, st->Y+chan*N, st->e+chan*N); 
    803       for (i=0;i<st->frame_size;i++) 
    804          st->e[chan*N+i] = SUB16(st->input[chan*st->frame_size+i], st->e[chan*N+i+st->frame_size]); 
    805       Sff += mdf_inner_prod(st->e+chan*N, st->e+chan*N, st->frame_size); 
    806 #endif 
    807    } 
     732   /* Compute foreground filter */ 
     733   spectral_mul_accum16(st->X, st->foreground, st->Y, N, M);    
     734   spx_ifft(st->fft_table, st->Y, st->e); 
     735   for (i=0;i<st->frame_size;i++) 
     736      st->e[i] = SUB16(st->input[i], st->e[i+st->frame_size]); 
     737   Sff = mdf_inner_prod(st->e, st->e, st->frame_size); 
     738#endif 
    808739    
    809740   /* Adjust proportional adaption rate */ 
    810    /* FIXME: Adjust that for C, K*/ 
    811    if (st->adapted) 
    812       mdf_adjust_prop (st->W, N, M, C*K, st->prop); 
     741   mdf_adjust_prop (st->W, N, M, st->prop); 
    813742   /* Compute weight gradient */ 
    814743   if (st->saturated == 0) 
    815744   { 
    816       for (chan = 0; chan < C; chan++) 
    817       { 
    818          for (speak = 0; speak < K; speak++) 
    819          { 
    820             for (j=M-1;j>=0;j--) 
    821             { 
    822                weighted_spectral_mul_conj(st->power_1, FLOAT_SHL(PSEUDOFLOAT(st->prop[j]),-15), &st->X[(j+1)*N*K+speak*N], st->E+chan*N, st->PHI, N); 
    823                for (i=0;i<N;i++) 
    824                   st->W[chan*N*K*M + j*N*K + speak*N + i] += st->PHI[i]; 
    825             } 
    826          } 
     745      for (j=M-1;j>=0;j--) 
     746      { 
     747         weighted_spectral_mul_conj(st->power_1, FLOAT_SHL(PSEUDOFLOAT(st->prop[j]),-15), &st->X[(j+1)*N], st->E, st->PHI, N); 
     748         for (i=0;i<N;i++) 
     749            st->W[j*N+i] = ADD32(st->W[j*N+i], st->PHI[i]); 
     750          
    827751      } 
    828752   } else { 
     
    830754   } 
    831755    
    832    /* FIXME: MC conversion required */  
    833756   /* Update weight to prevent circular convolution (MDF / AUMDF) */ 
    834    for (chan = 0; chan < C; chan++) 
    835    { 
    836       for (speak = 0; speak < K; speak++) 
    837       { 
    838          for (j=0;j<M;j++) 
     757   for (j=0;j<M;j++) 
     758   { 
     759      /* This is a variant of the Alternatively Updated MDF (AUMDF) */ 
     760      /* Remove the "if" to make this an MDF filter */ 
     761      if (j==0 || st->cancel_count%(M-1) == j-1) 
     762      { 
     763#ifdef FIXED_POINT 
     764         for (i=0;i<N;i++) 
     765            st->wtmp2[i] = EXTRACT16(PSHR32(st->W[j*N+i],NORMALIZE_SCALEDOWN+16)); 
     766         spx_ifft(st->fft_table, st->wtmp2, st->wtmp); 
     767         for (i=0;i<st->frame_size;i++) 
    839768         { 
    840             /* This is a variant of the Alternatively Updated MDF (AUMDF) */ 
    841             /* Remove the "if" to make this an MDF filter */ 
    842             if (j==0 || st->cancel_count%(M-1) == j-1) 
    843             { 
    844 #ifdef FIXED_POINT 
    845                for (i=0;i<N;i++) 
    846                   st->wtmp2[i] = EXTRACT16(PSHR32(st->W[chan*N*K*M + j*N*K + speak*N + i],NORMALIZE_SCALEDOWN+16)); 
    847                spx_ifft(st->fft_table, st->wtmp2, st->wtmp); 
    848                for (i=0;i<st->frame_size;i++) 
    849                { 
    850                   st->wtmp[i]=0; 
    851                } 
    852                for (i=st->frame_size;i<N;i++) 
    853                { 
    854                   st->wtmp[i]=SHL16(st->wtmp[i],NORMALIZE_SCALEUP); 
    855                } 
    856                spx_fft(st->fft_table, st->wtmp, st->wtmp2); 
    857                /* The "-1" in the shift is a sort of kludge that trades less efficient update speed for decrease noise */ 
    858                for (i=0;i<N;i++) 
    859                   st->W[chan*N*K*M + j*N*K + speak*N + i] -= SHL32(EXTEND32(st->wtmp2[i]),16+NORMALIZE_SCALEDOWN-NORMALIZE_SCALEUP-1); 
    860 #else 
    861                spx_ifft(st->fft_table, &st->W[chan*N*K*M + j*N*K + speak*N], st->wtmp); 
    862                for (i=st->frame_size;i<N;i++) 
    863                { 
    864                   st->wtmp[i]=0; 
    865                } 
    866                spx_fft(st->fft_table, st->wtmp, &st->W[chan*N*K*M + j*N*K + speak*N]); 
    867 #endif 
    868             } 
     769            st->wtmp[i]=0; 
    869770         } 
    870       } 
    871    } 
    872     
    873    /* So we can use power_spectrum_accum */  
    874    for (i=0;i<=st->frame_size;i++) 
    875       st->Rf[i] = st->Yf[i] = st->Xf[i] = 0; 
    876        
    877    Dbf = 0; 
    878    See = 0;     
     771         for (i=st->frame_size;i<N;i++) 
     772         { 
     773            st->wtmp[i]=SHL16(st->wtmp[i],NORMALIZE_SCALEUP); 
     774         } 
     775         spx_fft(st->fft_table, st->wtmp, st->wtmp2); 
     776         /* The "-1" in the shift is a sort of kludge that trades less efficient update speed for decrease noise */ 
     777         for (i=0;i<N;i++) 
     778            st->W[j*N+i] -= SHL32(EXTEND32(st->wtmp2[i]),16+NORMALIZE_SCALEDOWN-NORMALIZE_SCALEUP-1); 
     779#else 
     780         spx_ifft(st->fft_table, &st->W[j*N], st->wtmp); 
     781         for (i=st->frame_size;i<N;i++) 
     782         { 
     783            st->wtmp[i]=0; 
     784         } 
     785         spx_fft(st->fft_table, st->wtmp, &st->W[j*N]); 
     786#endif 
     787      } 
     788   } 
     789 
     790   /* Compute filter response Y */ 
     791   spectral_mul_accum(st->X, st->W, st->Y, N, M); 
     792   spx_ifft(st->fft_table, st->Y, st->y); 
     793 
    879794#ifdef TWO_PATH 
    880795   /* Difference in response, this is used to estimate the variance of our residual power estimate */ 
    881    for (chan = 0; chan < C; chan++) 
    882    { 
    883       spectral_mul_accum(st->X, st->W+chan*N*K*M, st->Y+chan*N, N, M*K); 
    884       spx_ifft(st->fft_table, st->Y+chan*N, st->y+chan*N); 
    885       for (i=0;i<st->frame_size;i++) 
    886          st->e[chan*N+i] = SUB16(st->e[chan*N+i+st->frame_size], st->y[chan*N+i+st->frame_size]); 
    887       Dbf += 10+mdf_inner_prod(st->e+chan*N, st->e+chan*N, st->frame_size); 
    888       for (i=0;i<st->frame_size;i++) 
    889          st->e[chan*N+i] = SUB16(st->input[chan*st->frame_size+i], st->y[chan*N+i+st->frame_size]); 
    890       See += mdf_inner_prod(st->e+chan*N, st->e+chan*N, st->frame_size); 
    891    } 
    892 #endif 
    893  
     796   for (i=0;i<st->frame_size;i++) 
     797      st->e[i] = SUB16(st->e[i+st->frame_size], st->y[i+st->frame_size]); 
     798   Dbf = 10+mdf_inner_prod(st->e, st->e, st->frame_size); 
     799#endif 
     800 
     801   for (i=0;i<st->frame_size;i++) 
     802      st->e[i] = SUB16(st->input[i], st->y[i+st->frame_size]); 
     803   See = mdf_inner_prod(st->e, st->e, st->frame_size); 
    894804#ifndef TWO_PATH 
    895805   Sff = See; 
     
    928838      st->Dvar1 = st->Dvar2 = FLOAT_ZERO; 
    929839      /* Copy background filter to foreground filter */ 
    930       for (i=0;i<N*M*C*K;i++) 
     840      for (i=0;i<N*M;i++) 
    931841         st->foreground[i] = EXTRACT16(PSHR32(st->W[i],16)); 
    932842      /* Apply a smooth transition so as to not introduce blocking artifacts */ 
    933       for (chan = 0; chan < C; chan++) 
    934          for (i=0;i<st->frame_size;i++) 
    935             st->e[chan*N+i+st->frame_size] = MULT16_16_Q15(st->window[i+st->frame_size],st->e[chan*N+i+st->frame_size]) + MULT16_16_Q15(st->window[i],st->y[chan*N+i+st->frame_size]); 
     843      for (i=0;i<st->frame_size;i++) 
     844         st->e[i+st->frame_size] = MULT16_16_Q15(st->window[i+st->frame_size],st->e[i+st->frame_size]) + MULT16_16_Q15(st->window[i],st->y[i+st->frame_size]); 
    936845   } else { 
    937846      int reset_background=0; 
     
    946855      { 
    947856         /* Copy foreground filter to background filter */ 
    948          for (i=0;i<N*M*C*K;i++) 
     857         for (i=0;i<N*M;i++) 
    949858            st->W[i] = SHL32(EXTEND32(st->foreground[i]),16); 
    950859         /* We also need to copy the output so as to get correct adaptation */ 
    951          for (chan = 0; chan < C; chan++) 
    952          {         
    953             for (i=0;i<st->frame_size;i++) 
    954                st->y[chan*N+i+st->frame_size] = st->e[chan*N+i+st->frame_size]; 
    955             for (i=0;i<st->frame_size;i++) 
    956                st->e[chan*N+i] = SUB16(st->input[chan*st->frame_size+i], st->y[chan*N+i+st->frame_size]); 
    957          }         
     860         for (i=0;i<st->frame_size;i++) 
     861            st->y[i+st->frame_size] = st->e[i+st->frame_size]; 
     862         for (i=0;i<st->frame_size;i++) 
     863            st->e[i] = SUB16(st->input[i], st->y[i+st->frame_size]); 
    958864         See = Sff; 
    959865         st->Davg1 = st->Davg2 = 0; 
     
    963869#endif 
    964870 
    965    Sey = Syy = Sdd = 0;   
    966    for (chan = 0; chan < C; chan++) 
    967    {     
    968       /* Compute error signal (for the output with de-emphasis) */  
    969       for (i=0;i<st->frame_size;i++) 
    970       { 
    971          spx_word32_t tmp_out; 
     871   /* Compute error signal (for the output with de-emphasis) */  
     872   for (i=0;i<st->frame_size;i++) 
     873   { 
     874      spx_word32_t tmp_out; 
    972875#ifdef TWO_PATH 
    973          tmp_out = SUB32(EXTEND32(st->input[chan*st->frame_size+i]), EXTEND32(st->e[chan*N+i+st->frame_size])); 
    974 #else 
    975          tmp_out = SUB32(EXTEND32(st->input[chan*st->frame_size+i]), EXTEND32(st->y[chan*N+i+st->frame_size])); 
    976 #endif 
    977          tmp_out = ADD32(tmp_out, EXTEND32(MULT16_16_P15(st->preemph, st->memE[chan]))); 
     876      tmp_out = SUB32(EXTEND32(st->input[i]), EXTEND32(st->e[i+st->frame_size])); 
     877#else 
     878      tmp_out = SUB32(EXTEND32(st->input[i]), EXTEND32(st->y[i+st->frame_size])); 
     879#endif 
     880      /* Saturation */ 
     881      if (tmp_out>32767) 
     882         tmp_out = 32767; 
     883      else if (tmp_out<-32768) 
     884         tmp_out = -32768; 
     885      tmp_out = ADD32(tmp_out, EXTEND32(MULT16_16_P15(st->preemph, st->memE))); 
    978886      /* This is an arbitrary test for saturation in the microphone signal */ 
    979          if (in[i*C+chan] <= -32000 || in[i*C+chan] >= 32000) 
    980          { 
     887      if (in[i] <= -32000 || in[i] >= 32000) 
     888      { 
     889         tmp_out = 0; 
    981890         if (st->saturated == 0) 
    982891            st->saturated = 1; 
    983          } 
    984          out[i*C+chan] = WORD2INT(tmp_out); 
    985          st->memE[chan] = tmp_out; 
    986       } 
    987  
     892      } 
     893      out[i] = (spx_int16_t)tmp_out; 
     894      st->memE = tmp_out; 
     895   } 
     896    
    988897#ifdef DUMP_ECHO_CANCEL_DATA 
    989       dump_audio(in, far_end, out, st->frame_size); 
    990 #endif 
    991     
    992       /* Compute error signal (filter update version) */  
    993       for (i=0;i<st->frame_size;i++) 
    994       { 
    995          st->e[chan*N+i+st->frame_size] = st->e[chan*N+i]; 
    996          st->e[chan*N+i] = 0; 
    997       } 
    998        
    999       /* Compute a bunch of correlations */ 
    1000       /* FIXME: bad merge */ 
    1001       Sey += mdf_inner_prod(st->e+chan*N+st->frame_size, st->y+chan*N+st->frame_size, st->frame_size); 
    1002       Syy += mdf_inner_prod(st->y+chan*N+st->frame_size, st->y+chan*N+st->frame_size, st->frame_size); 
    1003       Sdd += mdf_inner_prod(st->input+chan*st->frame_size, st->input+chan*st->frame_size, st->frame_size); 
    1004        
    1005       /* Convert error to frequency domain */ 
    1006       spx_fft(st->fft_table, st->e+chan*N, st->E+chan*N); 
    1007       for (i=0;i<st->frame_size;i++) 
    1008          st->y[i+chan*N] = 0; 
    1009       spx_fft(st->fft_table, st->y+chan*N, st->Y+chan*N); 
    1010     
    1011       /* Compute power spectrum of echo (X), error (E) and filter response (Y) */ 
    1012       power_spectrum_accum(st->E+chan*N, st->Rf, N); 
    1013       power_spectrum_accum(st->Y+chan*N, st->Yf, N); 
    1014      
    1015    } 
     898   dump_audio(in, far_end, out, st->frame_size); 
     899#endif 
     900    
     901   /* Compute error signal (filter update version) */  
     902   for (i=0;i<st->frame_size;i++) 
     903   { 
     904      st->e[i+st->frame_size] = st->e[i]; 
     905      st->e[i] = 0; 
     906   } 
     907 
     908   /* Compute a bunch of correlations */ 
     909   Sey = mdf_inner_prod(st->e+st->frame_size, st->y+st->frame_size, st->frame_size); 
     910   Syy = mdf_inner_prod(st->y+st->frame_size, st->y+st->frame_size, st->frame_size); 
     911   Sdd = mdf_inner_prod(st->input, st->input, st->frame_size); 
    1016912    
    1017913   /*printf ("%f %f %f %f\n", Sff, See, Syy, Sdd, st->update_cond);*/ 
     
    1026922      /* Things have gone really bad */ 
    1027923      st->screwed_up += 50; 
    1028       for (i=0;i<st->frame_size*C;i++) 
     924      for (i=0;i<st->frame_size;i++) 
    1029925         out[i] = 0; 
    1030926   } else if (SHR32(Sff, 2) > ADD32(Sdd, SHR32(MULT16_16(N, 10000),6))) 
     
    1045941   /* Add a small noise floor to make sure not to have problems when dividing */ 
    1046942   See = MAX32(See, SHR32(MULT16_16(N, 100),6)); 
    1047       
    1048    for (speak = 0; speak < K; speak++) 
    1049    { 
    1050       Sxx += mdf_inner_prod(st->x+speak*N+st->frame_size, st->x+speak*N+st->frame_size, st->frame_size); 
    1051       power_spectrum_accum(st->X+speak*N, st->Xf, N); 
    1052    } 
    1053  
     943 
     944   /* Convert error to frequency domain */ 
     945   spx_fft(st->fft_table, st->e, st->E); 
     946   for (i=0;i<st->frame_size;i++) 
     947      st->y[i] = 0; 
     948   spx_fft(st->fft_table, st->y, st->Y); 
     949 
     950   /* Compute power spectrum of far end (X), error (E) and filter response (Y) */ 
     951   power_spectrum(st->E, st->Rf, N); 
     952   power_spectrum(st->Y, st->Yf, N); 
     953   power_spectrum(st->X, st->Xf, N); 
    1054954    
    1055955   /* Smooth far end energy estimate over time */ 
    1056956   for (j=0;j<=st->frame_size;j++) 
    1057957      st->power[j] = MULT16_32_Q15(ss_1,st->power[j]) + 1 + MULT16_32_Q15(ss,st->Xf[j]); 
     958    
     959   /* Enable this to compute the power based only on the tail (would need to compute more  
     960      efficiently to make this really useful */ 
     961   if (0) 
     962   { 
     963      float scale2 = .5f/M; 
     964      for (j=0;j<=st->frame_size;j++) 
     965         st->power[j] = 100; 
     966      for (i=0;i<M;i++) 
     967      { 
     968         power_spectrum(&st->X[i*N], st->Xf, N); 
     969         for (j=0;j<=st->frame_size;j++) 
     970            st->power[j] += scale2*st->Xf[j]; 
     971      } 
     972   } 
    1058973 
    1059974   /* Compute filtered spectra and (cross-)correlations */ 
     
    11771092   } 
    11781093 
    1179    /* FIXME: MC conversion required */  
     1094   /* Save residual echo so it can be used by the nonlinear processor */ 
     1095   if (st->adapted) 
     1096   { 
     1097      /* If the filter is adapted, take the filtered echo */ 
    11801098      for (i=0;i<st->frame_size;i++) 
    11811099         st->last_y[i] = st->last_y[st->frame_size+i]; 
    1182    if (st->adapted) 
    1183    { 
    1184       /* If the filter is adapted, take the filtered echo */ 
    11851100      for (i=0;i<st->frame_size;i++) 
    11861101         st->last_y[st->frame_size+i] = in[i]-out[i]; 
     
    12271142} 
    12281143 
    1229 EXPORT int speex_echo_ctl(SpeexEchoState *st, int request, void *ptr) 
     1144int speex_echo_ctl(SpeexEchoState *st, int request, void *ptr) 
    12301145{ 
    12311146   switch(request) 
     
    12551170         (*(int*)ptr) = st->sampling_rate; 
    12561171         break; 
    1257       case SPEEX_ECHO_GET_IMPULSE_RESPONSE_SIZE: 
    1258          /*FIXME: Implement this for multiple channels */ 
    1259          *((spx_int32_t *)ptr) = st->M * st->frame_size; 
    1260          break; 
    1261       case SPEEX_ECHO_GET_IMPULSE_RESPONSE: 
    1262       { 
    1263          int M = st->M, N = st->window_size, n = st->frame_size, i, j; 
    1264          spx_int32_t *filt = (spx_int32_t *) ptr; 
    1265          for(j=0;j<M;j++) 
    1266          { 
    1267             /*FIXME: Implement this for multiple channels */ 
    1268 #ifdef FIXED_POINT 
    1269             for (i=0;i<N;i++) 
    1270                st->wtmp2[i] = EXTRACT16(PSHR32(st->W[j*N+i],16+NORMALIZE_SCALEDOWN)); 
    1271             spx_ifft(st->fft_table, st->wtmp2, st->wtmp); 
    1272 #else 
    1273             spx_ifft(st->fft_table, &st->W[j*N], st->wtmp); 
    1274 #endif 
    1275             for(i=0;i<n;i++) 
    1276                filt[j*n+i] = PSHR32(MULT16_16(32767,st->wtmp[i]), WEIGHT_SHIFT-NORMALIZE_SCALEDOWN); 
    1277          } 
    1278       } 
    1279          break; 
    12801172      default: 
    12811173         speex_warning_int("Unknown speex_echo_ctl request: ", request); 
Note: See TracChangeset for help on using the changeset viewer.