Ignore:
Timestamp:
Jan 9, 2020 9:05:50 AM (4 years ago)
Author:
ming
Message:

Closed #589: Update Speex AEC to the latest version to get multichannel EC

File:
1 edited

Legend:

Unmodified
Added
Removed
  • pjproject/trunk/third_party/speex/libspeex/mdf.c

    r2198 r6129  
    1 /* Copyright (C) 2003-2006 Jean-Marc Valin 
     1/* Copyright (C) 2003-2008 Jean-Marc Valin 
    22 
    33   File: mdf.c 
     
    3434   The echo canceller is based on the MDF algorithm described in: 
    3535 
    36    J. S. Soo, K. K. Pang Multidelay block frequency adaptive filter,  
    37    IEEE Trans. Acoust. Speech Signal Process., Vol. ASSP-38, No. 2,  
     36   J. S. Soo, K. K. Pang Multidelay block frequency adaptive filter, 
     37   IEEE Trans. Acoust. Speech Signal Process., Vol. ASSP-38, No. 2, 
    3838   February 1990. 
    39     
    40    We use the Alternatively Updated MDF (AUMDF) variant. Robustness to  
     39 
     40   We use the Alternatively Updated MDF (AUMDF) variant. Robustness to 
    4141   double-talk is achieved using a variable learning rate as described in: 
    42     
    43    Valin, J.-M., On Adjusting the Learning Rate in Frequency Domain Echo  
     42 
     43   Valin, J.-M., On Adjusting the Learning Rate in Frequency Domain Echo 
    4444   Cancellation With Double-Talk. IEEE Transactions on Audio, 
    4545   Speech and Language Processing, Vol. 15, No. 3, pp. 1030-1034, 2007. 
    4646   http://people.xiph.org/~jm/papers/valin_taslp2006.pdf 
    47     
     47 
    4848   There is no explicit double-talk detection, but a continuous variation 
    4949   in the learning rate based on residual echo, double-talk and background 
    5050   noise. 
    51     
     51 
    5252   About the fixed-point version: 
    53    All the signals are represented with 16-bit words. The filter weights  
     53   All the signals are represented with 16-bit words. The filter weights 
    5454   are represented with 32-bit words, but only the top 16 bits are used 
    5555   in most cases. The lower 16 bits are completely unreliable (due to the 
     
    5757   adaptation -- probably by removing a "threshold effect" due to 
    5858   quantization (rounding going to zero) when the gradient is small. 
    59     
     59 
    6060   Another kludge that seems to work good: when performing the weight 
    6161   update, we only move half the way toward the "goal" this seems to 
     
    6363   can be seen as applying a gradient descent on a "soft constraint" 
    6464   instead of having a hard constraint. 
    65     
     65 
    6666*/ 
    6767 
     
    132132   int saturated; 
    133133   int screwed_up; 
     134   int C;                    /** Number of input channels (microphones) */ 
     135   int K;                    /** Number of output channels (loudspeakers) */ 
    134136   spx_int32_t sampling_rate; 
    135137   spx_word16_t spec_average; 
     
    138140   spx_word32_t sum_adapt; 
    139141   spx_word16_t leak_estimate; 
    140     
     142 
    141143   spx_word16_t *e;      /* scratch */ 
    142144   spx_word16_t *x;      /* Far-end input buffer (2N) */ 
     
    172174   spx_word16_t *prop; 
    173175   void *fft_table; 
    174    spx_word16_t memX, memD, memE; 
     176   spx_word16_t *memX, *memD, *memE; 
    175177   spx_word16_t preemph; 
    176178   spx_word16_t notch_radius; 
    177    spx_mem_t notch_mem[2]; 
     179   spx_mem_t *notch_mem; 
    178180 
    179181   /* NOTE: If you only use speex_echo_cancel() and want to save some memory, remove this */ 
     
    183185}; 
    184186 
    185 static inline void filter_dc_notch16(const spx_int16_t *in, spx_word16_t radius, spx_word16_t *out, int len, spx_mem_t *mem) 
     187static inline void filter_dc_notch16(const spx_int16_t *in, spx_word16_t radius, spx_word16_t *out, int len, spx_mem_t *mem, int stride) 
    186188{ 
    187189   int i; 
     
    191193#else 
    192194   den2 = radius*radius + .7*(1-radius)*(1-radius); 
    193 #endif    
     195#endif 
    194196   /*printf ("%d %d %d %d %d %d\n", num[0], num[1], num[2], den[0], den[1], den[2]);*/ 
    195197   for (i=0;i<len;i++) 
    196198   { 
    197       spx_word16_t vin = in[i]; 
     199      spx_word16_t vin = in[i*stride]; 
    198200      spx_word32_t vout = mem[0] + SHL32(EXTEND32(vin),15); 
    199201#ifdef FIXED_POINT 
     
    235237} 
    236238 
     239/** Compute power spectrum of a half-complex (packed) vector and accumulate */ 
     240static inline void power_spectrum_accum(const spx_word16_t *X, spx_word32_t *ps, int N) 
     241{ 
     242   int i, j; 
     243   ps[0]+=MULT16_16(X[0],X[0]); 
     244   for (i=1,j=1;i<N-1;i+=2,j++) 
     245   { 
     246      ps[j] +=  MULT16_16(X[i],X[i]) + MULT16_16(X[i+1],X[i+1]); 
     247   } 
     248   ps[j]+=MULT16_16(X[i],X[i]); 
     249} 
     250 
    237251/** Compute cross-power spectrum of a half-complex (packed) vectors and add to acc */ 
    238252#ifdef FIXED_POINT 
     
    331345} 
    332346 
    333 static inline void mdf_adjust_prop(const spx_word32_t *W, int N, int M, spx_word16_t *prop) 
    334 { 
    335    int i, j; 
     347static inline void mdf_adjust_prop(const spx_word32_t *W, int N, int M, int P, spx_word16_t *prop) 
     348{ 
     349   int i, j, p; 
    336350   spx_word16_t max_sum = 1; 
    337351   spx_word32_t prop_sum = 1; 
     
    339353   { 
    340354      spx_word32_t tmp = 1; 
    341       for (j=0;j<N;j++) 
    342          tmp += MULT16_16(EXTRACT16(SHR32(W[i*N+j],18)), EXTRACT16(SHR32(W[i*N+j],18))); 
     355      for (p=0;p<P;p++) 
     356         for (j=0;j<N;j++) 
     357            tmp += MULT16_16(EXTRACT16(SHR32(W[p*N*M + i*N+j],18)), EXTRACT16(SHR32(W[p*N*M + i*N+j],18))); 
    343358#ifdef FIXED_POINT 
    344359      /* Just a security in case an overflow were to occur */ 
     
    379394 
    380395/** Creates a new echo canceller state */ 
    381 SpeexEchoState *speex_echo_state_init(int frame_size, int filter_length) 
    382 { 
    383    int i,N,M; 
     396EXPORT SpeexEchoState *speex_echo_state_init(int frame_size, int filter_length) 
     397{ 
     398   return speex_echo_state_init_mc(frame_size, filter_length, 1, 1); 
     399} 
     400 
     401EXPORT SpeexEchoState *speex_echo_state_init_mc(int frame_size, int filter_length, int nb_mic, int nb_speakers) 
     402{ 
     403   int i,N,M, C, K; 
    384404   SpeexEchoState *st = (SpeexEchoState *)speex_alloc(sizeof(SpeexEchoState)); 
    385405 
     406   st->K = nb_speakers; 
     407   st->C = nb_mic; 
     408   C=st->C; 
     409   K=st->K; 
    386410#ifdef DUMP_ECHO_CANCEL_DATA 
    387411   if (rFile || pFile || oFile) 
     
    391415   oFile = fopen("aec_out.sw", "wb"); 
    392416#endif 
    393     
     417 
    394418   st->frame_size = frame_size; 
    395419   st->window_size = 2*frame_size; 
     
    413437 
    414438   st->fft_table = spx_fft_init(N); 
    415     
    416    st->e = (spx_word16_t*)speex_alloc(N*sizeof(spx_word16_t)); 
    417    st->x = (spx_word16_t*)speex_alloc(N*sizeof(spx_word16_t)); 
    418    st->input = (spx_word16_t*)speex_alloc(st->frame_size*sizeof(spx_word16_t)); 
    419    st->y = (spx_word16_t*)speex_alloc(N*sizeof(spx_word16_t)); 
    420    st->last_y = (spx_word16_t*)speex_alloc(N*sizeof(spx_word16_t)); 
     439 
     440   st->e = (spx_word16_t*)speex_alloc(C*N*sizeof(spx_word16_t)); 
     441   st->x = (spx_word16_t*)speex_alloc(K*N*sizeof(spx_word16_t)); 
     442   st->input = (spx_word16_t*)speex_alloc(C*st->frame_size*sizeof(spx_word16_t)); 
     443   st->y = (spx_word16_t*)speex_alloc(C*N*sizeof(spx_word16_t)); 
     444   st->last_y = (spx_word16_t*)speex_alloc(C*N*sizeof(spx_word16_t)); 
    421445   st->Yf = (spx_word32_t*)speex_alloc((st->frame_size+1)*sizeof(spx_word32_t)); 
    422446   st->Rf = (spx_word32_t*)speex_alloc((st->frame_size+1)*sizeof(spx_word32_t)); 
     
    425449   st->Eh = (spx_word32_t*)speex_alloc((st->frame_size+1)*sizeof(spx_word32_t)); 
    426450 
    427    st->X = (spx_word16_t*)speex_alloc((M+1)*N*sizeof(spx_word16_t)); 
    428    st->Y = (spx_word16_t*)speex_alloc(N*sizeof(spx_word16_t)); 
    429    st->E = (spx_word16_t*)speex_alloc(N*sizeof(spx_word16_t)); 
    430    st->W = (spx_word32_t*)speex_alloc(M*N*sizeof(spx_word32_t)); 
     451   st->X = (spx_word16_t*)speex_alloc(K*(M+1)*N*sizeof(spx_word16_t)); 
     452   st->Y = (spx_word16_t*)speex_alloc(C*N*sizeof(spx_word16_t)); 
     453   st->E = (spx_word16_t*)speex_alloc(C*N*sizeof(spx_word16_t)); 
     454   st->W = (spx_word32_t*)speex_alloc(C*K*M*N*sizeof(spx_word32_t)); 
    431455#ifdef TWO_PATH 
    432    st->foreground = (spx_word16_t*)speex_alloc(M*N*sizeof(spx_word16_t)); 
     456   st->foreground = (spx_word16_t*)speex_alloc(M*N*C*K*sizeof(spx_word16_t)); 
    433457#endif 
    434458   st->PHI = (spx_word32_t*)speex_alloc(N*sizeof(spx_word32_t)); 
     
    451475   for (i=0;i<=st->frame_size;i++) 
    452476      st->power_1[i] = FLOAT_ONE; 
    453    for (i=0;i<N*M;i++) 
     477   for (i=0;i<N*M*K*C;i++) 
    454478      st->W[i] = 0; 
    455479   { 
     
    466490      for (i=M-1;i>=0;i--) 
    467491      { 
    468          st->prop[i] = DIV32(MULT16_16(QCONST16(.8,15), st->prop[i]),sum); 
    469       } 
    470    } 
    471     
    472    st->memX=st->memD=st->memE=0; 
     492         st->prop[i] = DIV32(MULT16_16(QCONST16(.8f,15), st->prop[i]),sum); 
     493      } 
     494   } 
     495 
     496   st->memX = (spx_word16_t*)speex_alloc(K*sizeof(spx_word16_t)); 
     497   st->memD = (spx_word16_t*)speex_alloc(C*sizeof(spx_word16_t)); 
     498   st->memE = (spx_word16_t*)speex_alloc(C*sizeof(spx_word16_t)); 
    473499   st->preemph = QCONST16(.9,15); 
    474500   if (st->sampling_rate<12000) 
     
    479505      st->notch_radius = QCONST16(.992, 15); 
    480506 
    481    st->notch_mem[0] = st->notch_mem[1] = 0; 
     507   st->notch_mem = (spx_mem_t*)speex_alloc(2*C*sizeof(spx_mem_t)); 
    482508   st->adapted = 0; 
    483509   st->Pey = st->Pyy = FLOAT_ONE; 
    484     
     510 
    485511#ifdef TWO_PATH 
    486512   st->Davg1 = st->Davg2 = 0; 
    487513   st->Dvar1 = st->Dvar2 = FLOAT_ZERO; 
    488514#endif 
    489     
    490    st->play_buf = (spx_int16_t*)speex_alloc((PLAYBACK_DELAY+1)*st->frame_size*sizeof(spx_int16_t)); 
     515 
     516   st->play_buf = (spx_int16_t*)speex_alloc(K*(PLAYBACK_DELAY+1)*st->frame_size*sizeof(spx_int16_t)); 
    491517   st->play_buf_pos = PLAYBACK_DELAY*st->frame_size; 
    492518   st->play_buf_started = 0; 
    493     
     519 
    494520   return st; 
    495521} 
    496522 
    497523/** Resets echo canceller state */ 
    498 void speex_echo_state_reset(SpeexEchoState *st) 
    499 { 
    500    int i, M, N; 
     524EXPORT void speex_echo_state_reset(SpeexEchoState *st) 
     525{ 
     526   int i, M, N, C, K; 
    501527   st->cancel_count=0; 
    502528   st->screwed_up = 0; 
    503529   N = st->window_size; 
    504530   M = st->M; 
     531   C=st->C; 
     532   K=st->K; 
    505533   for (i=0;i<N*M;i++) 
    506534      st->W[i] = 0; 
     
    522550      st->last_y[i] = 0; 
    523551   } 
    524    for (i=0;i<N;i++) 
     552   for (i=0;i<N*C;i++) 
    525553   { 
    526554      st->E[i] = 0; 
     555   } 
     556   for (i=0;i<N*K;i++) 
     557   { 
    527558      st->x[i] = 0; 
    528559   } 
    529    st->notch_mem[0] = st->notch_mem[1] = 0; 
    530    st->memX=st->memD=st->memE=0; 
     560   for (i=0;i<2*C;i++) 
     561      st->notch_mem[i] = 0; 
     562   for (i=0;i<C;i++) 
     563      st->memD[i]=st->memE[i]=0; 
     564   for (i=0;i<K;i++) 
     565      st->memX[i]=0; 
    531566 
    532567   st->saturated = 0; 
     
    546581 
    547582/** Destroys an echo canceller state */ 
    548 void speex_echo_state_destroy(SpeexEchoState *st) 
     583EXPORT void speex_echo_state_destroy(SpeexEchoState *st) 
    549584{ 
    550585   spx_fft_destroy(st->fft_table); 
     
    577612   speex_free(st->wtmp2); 
    578613#endif 
     614   speex_free(st->memX); 
     615   speex_free(st->memD); 
     616   speex_free(st->memE); 
     617   speex_free(st->notch_mem); 
     618 
    579619   speex_free(st->play_buf); 
    580620   speex_free(st); 
    581     
     621 
    582622#ifdef DUMP_ECHO_CANCEL_DATA 
    583623   fclose(rFile); 
     
    588628} 
    589629 
    590 void speex_echo_capture(SpeexEchoState *st, const spx_int16_t *rec, spx_int16_t *out) 
     630EXPORT void speex_echo_capture(SpeexEchoState *st, const spx_int16_t *rec, spx_int16_t *out) 
    591631{ 
    592632   int i; 
     
    611651} 
    612652 
    613 void speex_echo_playback(SpeexEchoState *st, const spx_int16_t *play) 
     653EXPORT void speex_echo_playback(SpeexEchoState *st, const spx_int16_t *play) 
    614654{ 
    615655   /*speex_warning_int("playback with fill level ", st->play_buf_pos/st->frame_size);*/ 
     
    638678 
    639679/** Performs echo cancellation on a frame (deprecated, last arg now ignored) */ 
    640 void speex_echo_cancel(SpeexEchoState *st, const spx_int16_t *in, const spx_int16_t *far_end, spx_int16_t *out, spx_int32_t *Yout) 
     680EXPORT void speex_echo_cancel(SpeexEchoState *st, const spx_int16_t *in, const spx_int16_t *far_end, spx_int16_t *out, spx_int32_t *Yout) 
    641681{ 
    642682   speex_echo_cancellation(st, in, far_end, out); 
     
    644684 
    645685/** Performs echo cancellation on a frame */ 
    646 void speex_echo_cancellation(SpeexEchoState *st, const spx_int16_t *in, const spx_int16_t *far_end, spx_int16_t *out) 
    647 { 
    648    int i,j; 
    649    int N,M; 
     686EXPORT void speex_echo_cancellation(SpeexEchoState *st, const spx_int16_t *in, const spx_int16_t *far_end, spx_int16_t *out) 
     687{ 
     688   int i,j, chan, speak; 
     689   int N,M, C, K; 
    650690   spx_word32_t Syy,See,Sxx,Sdd, Sff; 
    651691#ifdef TWO_PATH 
     
    659699   spx_word16_t RER; 
    660700   spx_word32_t tmp32; 
    661     
     701 
    662702   N = st->window_size; 
    663703   M = st->M; 
     704   C = st->C; 
     705   K = st->K; 
     706 
    664707   st->cancel_count++; 
    665708#ifdef FIXED_POINT 
     
    671714#endif 
    672715 
    673    /* Apply a notch filter to make sure DC doesn't end up causing problems */ 
    674    filter_dc_notch16(in, st->notch_radius, st->input, st->frame_size, st->notch_mem); 
    675    /* Copy input data to buffer and apply pre-emphasis */ 
    676    for (i=0;i<st->frame_size;i++) 
    677    { 
    678       spx_word32_t tmp32; 
    679       tmp32 = SUB32(EXTEND32(far_end[i]), EXTEND32(MULT16_16_P15(st->preemph, st->memX))); 
    680 #ifdef FIXED_POINT 
    681       /* If saturation occurs here, we need to freeze adaptation for M+1 frames (not just one) */ 
    682       if (tmp32 > 32767) 
    683       { 
    684          tmp32 = 32767; 
    685          st->saturated = M+1; 
    686       } 
    687       if (tmp32 < -32767) 
    688       { 
    689          tmp32 = -32767; 
    690          st->saturated = M+1; 
    691       }       
    692 #endif 
    693       st->x[i+st->frame_size] = EXTRACT16(tmp32); 
    694       st->memX = far_end[i]; 
    695        
    696       tmp32 = SUB32(EXTEND32(st->input[i]), EXTEND32(MULT16_16_P15(st->preemph, st->memD))); 
    697 #ifdef FIXED_POINT 
    698       if (tmp32 > 32767) 
    699       { 
    700          tmp32 = 32767; 
    701          if (st->saturated == 0) 
    702             st->saturated = 1; 
    703       }       
    704       if (tmp32 < -32767) 
    705       { 
    706          tmp32 = -32767; 
    707          if (st->saturated == 0) 
    708             st->saturated = 1; 
    709       } 
    710 #endif 
    711       st->memD = st->input[i]; 
    712       st->input[i] = tmp32; 
    713    } 
    714  
    715    /* Shift memory: this could be optimized eventually*/ 
    716    for (j=M-1;j>=0;j--) 
    717    { 
    718       for (i=0;i<N;i++) 
    719          st->X[(j+1)*N+i] = st->X[j*N+i]; 
    720    } 
    721  
    722    /* Convert x (far end) to frequency domain */ 
    723    spx_fft(st->fft_table, st->x, &st->X[0]); 
    724    for (i=0;i<N;i++) 
    725       st->last_y[i] = st->x[i]; 
    726    Sxx = mdf_inner_prod(st->x+st->frame_size, st->x+st->frame_size, st->frame_size); 
    727    for (i=0;i<st->frame_size;i++) 
    728       st->x[i] = st->x[i+st->frame_size]; 
    729    /* From here on, the top part of x is used as scratch space */ 
    730     
     716   for (chan = 0; chan < C; chan++) 
     717   { 
     718      /* Apply a notch filter to make sure DC doesn't end up causing problems */ 
     719      filter_dc_notch16(in+chan, st->notch_radius, st->input+chan*st->frame_size, st->frame_size, st->notch_mem+2*chan, C); 
     720      /* Copy input data to buffer and apply pre-emphasis */ 
     721      /* Copy input data to buffer */ 
     722      for (i=0;i<st->frame_size;i++) 
     723      { 
     724         spx_word32_t tmp32; 
     725         /* FIXME: This core has changed a bit, need to merge properly */ 
     726         tmp32 = SUB32(EXTEND32(st->input[chan*st->frame_size+i]), EXTEND32(MULT16_16_P15(st->preemph, st->memD[chan]))); 
     727#ifdef FIXED_POINT 
     728         if (tmp32 > 32767) 
     729         { 
     730            tmp32 = 32767; 
     731            if (st->saturated == 0) 
     732               st->saturated = 1; 
     733         } 
     734         if (tmp32 < -32767) 
     735         { 
     736            tmp32 = -32767; 
     737            if (st->saturated == 0) 
     738               st->saturated = 1; 
     739         } 
     740#endif 
     741         st->memD[chan] = st->input[chan*st->frame_size+i]; 
     742         st->input[chan*st->frame_size+i] = EXTRACT16(tmp32); 
     743      } 
     744   } 
     745 
     746   for (speak = 0; speak < K; speak++) 
     747   { 
     748      for (i=0;i<st->frame_size;i++) 
     749      { 
     750         spx_word32_t tmp32; 
     751         st->x[speak*N+i] = st->x[speak*N+i+st->frame_size]; 
     752         tmp32 = SUB32(EXTEND32(far_end[i*K+speak]), EXTEND32(MULT16_16_P15(st->preemph, st->memX[speak]))); 
     753#ifdef FIXED_POINT 
     754         /*FIXME: If saturation occurs here, we need to freeze adaptation for M frames (not just one) */ 
     755         if (tmp32 > 32767) 
     756         { 
     757            tmp32 = 32767; 
     758            st->saturated = M+1; 
     759         } 
     760         if (tmp32 < -32767) 
     761         { 
     762            tmp32 = -32767; 
     763            st->saturated = M+1; 
     764         } 
     765#endif 
     766         st->x[speak*N+i+st->frame_size] = EXTRACT16(tmp32); 
     767         st->memX[speak] = far_end[i*K+speak]; 
     768      } 
     769   } 
     770 
     771   for (speak = 0; speak < K; speak++) 
     772   { 
     773      /* Shift memory: this could be optimized eventually*/ 
     774      for (j=M-1;j>=0;j--) 
     775      { 
     776         for (i=0;i<N;i++) 
     777            st->X[(j+1)*N*K+speak*N+i] = st->X[j*N*K+speak*N+i]; 
     778      } 
     779      /* Convert x (echo input) to frequency domain */ 
     780      spx_fft(st->fft_table, st->x+speak*N, &st->X[speak*N]); 
     781   } 
     782 
     783   Sxx = 0; 
     784   for (speak = 0; speak < K; speak++) 
     785   { 
     786      Sxx += mdf_inner_prod(st->x+speak*N+st->frame_size, st->x+speak*N+st->frame_size, st->frame_size); 
     787      power_spectrum_accum(st->X+speak*N, st->Xf, N); 
     788   } 
     789 
     790   Sff = 0; 
     791   for (chan = 0; chan < C; chan++) 
     792   { 
    731793#ifdef TWO_PATH 
    732    /* Compute foreground filter */ 
    733    spectral_mul_accum16(st->X, st->foreground, st->Y, N, M);    
    734    spx_ifft(st->fft_table, st->Y, st->e); 
    735    for (i=0;i<st->frame_size;i++) 
    736       st->e[i] = SUB16(st->input[i], st->e[i+st->frame_size]); 
    737    Sff = mdf_inner_prod(st->e, st->e, st->frame_size); 
    738 #endif 
    739     
     794      /* Compute foreground filter */ 
     795      spectral_mul_accum16(st->X, st->foreground+chan*N*K*M, st->Y+chan*N, N, M*K); 
     796      spx_ifft(st->fft_table, st->Y+chan*N, st->e+chan*N); 
     797      for (i=0;i<st->frame_size;i++) 
     798         st->e[chan*N+i] = SUB16(st->input[chan*st->frame_size+i], st->e[chan*N+i+st->frame_size]); 
     799      Sff += mdf_inner_prod(st->e+chan*N, st->e+chan*N, st->frame_size); 
     800#endif 
     801   } 
     802 
    740803   /* Adjust proportional adaption rate */ 
    741    mdf_adjust_prop (st->W, N, M, st->prop); 
     804   /* FIXME: Adjust that for C, K*/ 
     805   if (st->adapted) 
     806      mdf_adjust_prop (st->W, N, M, C*K, st->prop); 
    742807   /* Compute weight gradient */ 
    743808   if (st->saturated == 0) 
    744809   { 
    745       for (j=M-1;j>=0;j--) 
    746       { 
    747          weighted_spectral_mul_conj(st->power_1, FLOAT_SHL(PSEUDOFLOAT(st->prop[j]),-15), &st->X[(j+1)*N], st->E, st->PHI, N); 
    748          for (i=0;i<N;i++) 
    749             st->W[j*N+i] = ADD32(st->W[j*N+i], st->PHI[i]); 
    750           
     810      for (chan = 0; chan < C; chan++) 
     811      { 
     812         for (speak = 0; speak < K; speak++) 
     813         { 
     814            for (j=M-1;j>=0;j--) 
     815            { 
     816               weighted_spectral_mul_conj(st->power_1, FLOAT_SHL(PSEUDOFLOAT(st->prop[j]),-15), &st->X[(j+1)*N*K+speak*N], st->E+chan*N, st->PHI, N); 
     817               for (i=0;i<N;i++) 
     818                  st->W[chan*N*K*M + j*N*K + speak*N + i] += st->PHI[i]; 
     819            } 
     820         } 
    751821      } 
    752822   } else { 
    753823      st->saturated--; 
    754824   } 
    755     
     825 
     826   /* FIXME: MC conversion required */ 
    756827   /* Update weight to prevent circular convolution (MDF / AUMDF) */ 
    757    for (j=0;j<M;j++) 
    758    { 
    759       /* This is a variant of the Alternatively Updated MDF (AUMDF) */ 
    760       /* Remove the "if" to make this an MDF filter */ 
    761       if (j==0 || st->cancel_count%(M-1) == j-1) 
    762       { 
    763 #ifdef FIXED_POINT 
    764          for (i=0;i<N;i++) 
    765             st->wtmp2[i] = EXTRACT16(PSHR32(st->W[j*N+i],NORMALIZE_SCALEDOWN+16)); 
    766          spx_ifft(st->fft_table, st->wtmp2, st->wtmp); 
    767          for (i=0;i<st->frame_size;i++) 
     828   for (chan = 0; chan < C; chan++) 
     829   { 
     830      for (speak = 0; speak < K; speak++) 
     831      { 
     832         for (j=0;j<M;j++) 
    768833         { 
    769             st->wtmp[i]=0; 
     834            /* This is a variant of the Alternatively Updated MDF (AUMDF) */ 
     835            /* Remove the "if" to make this an MDF filter */ 
     836            if (j==0 || st->cancel_count%(M-1) == j-1) 
     837            { 
     838#ifdef FIXED_POINT 
     839               for (i=0;i<N;i++) 
     840                  st->wtmp2[i] = EXTRACT16(PSHR32(st->W[chan*N*K*M + j*N*K + speak*N + i],NORMALIZE_SCALEDOWN+16)); 
     841               spx_ifft(st->fft_table, st->wtmp2, st->wtmp); 
     842               for (i=0;i<st->frame_size;i++) 
     843               { 
     844                  st->wtmp[i]=0; 
     845               } 
     846               for (i=st->frame_size;i<N;i++) 
     847               { 
     848                  st->wtmp[i]=SHL16(st->wtmp[i],NORMALIZE_SCALEUP); 
     849               } 
     850               spx_fft(st->fft_table, st->wtmp, st->wtmp2); 
     851               /* The "-1" in the shift is a sort of kludge that trades less efficient update speed for decrease noise */ 
     852               for (i=0;i<N;i++) 
     853                  st->W[chan*N*K*M + j*N*K + speak*N + i] -= SHL32(EXTEND32(st->wtmp2[i]),16+NORMALIZE_SCALEDOWN-NORMALIZE_SCALEUP-1); 
     854#else 
     855               spx_ifft(st->fft_table, &st->W[chan*N*K*M + j*N*K + speak*N], st->wtmp); 
     856               for (i=st->frame_size;i<N;i++) 
     857               { 
     858                  st->wtmp[i]=0; 
     859               } 
     860               spx_fft(st->fft_table, st->wtmp, &st->W[chan*N*K*M + j*N*K + speak*N]); 
     861#endif 
     862            } 
    770863         } 
    771          for (i=st->frame_size;i<N;i++) 
    772          { 
    773             st->wtmp[i]=SHL16(st->wtmp[i],NORMALIZE_SCALEUP); 
    774          } 
    775          spx_fft(st->fft_table, st->wtmp, st->wtmp2); 
    776          /* The "-1" in the shift is a sort of kludge that trades less efficient update speed for decrease noise */ 
    777          for (i=0;i<N;i++) 
    778             st->W[j*N+i] -= SHL32(EXTEND32(st->wtmp2[i]),16+NORMALIZE_SCALEDOWN-NORMALIZE_SCALEUP-1); 
    779 #else 
    780          spx_ifft(st->fft_table, &st->W[j*N], st->wtmp); 
    781          for (i=st->frame_size;i<N;i++) 
    782          { 
    783             st->wtmp[i]=0; 
    784          } 
    785          spx_fft(st->fft_table, st->wtmp, &st->W[j*N]); 
    786 #endif 
    787       } 
    788    } 
    789  
    790    /* Compute filter response Y */ 
    791    spectral_mul_accum(st->X, st->W, st->Y, N, M); 
    792    spx_ifft(st->fft_table, st->Y, st->y); 
    793  
     864      } 
     865   } 
     866 
     867   /* So we can use power_spectrum_accum */ 
     868   for (i=0;i<=st->frame_size;i++) 
     869      st->Rf[i] = st->Yf[i] = st->Xf[i] = 0; 
     870 
     871   Dbf = 0; 
     872   See = 0; 
    794873#ifdef TWO_PATH 
    795874   /* Difference in response, this is used to estimate the variance of our residual power estimate */ 
    796    for (i=0;i<st->frame_size;i++) 
    797       st->e[i] = SUB16(st->e[i+st->frame_size], st->y[i+st->frame_size]); 
    798    Dbf = 10+mdf_inner_prod(st->e, st->e, st->frame_size); 
    799 #endif 
    800  
    801    for (i=0;i<st->frame_size;i++) 
    802       st->e[i] = SUB16(st->input[i], st->y[i+st->frame_size]); 
    803    See = mdf_inner_prod(st->e, st->e, st->frame_size); 
     875   for (chan = 0; chan < C; chan++) 
     876   { 
     877      spectral_mul_accum(st->X, st->W+chan*N*K*M, st->Y+chan*N, N, M*K); 
     878      spx_ifft(st->fft_table, st->Y+chan*N, st->y+chan*N); 
     879      for (i=0;i<st->frame_size;i++) 
     880         st->e[chan*N+i] = SUB16(st->e[chan*N+i+st->frame_size], st->y[chan*N+i+st->frame_size]); 
     881      Dbf += 10+mdf_inner_prod(st->e+chan*N, st->e+chan*N, st->frame_size); 
     882      for (i=0;i<st->frame_size;i++) 
     883         st->e[chan*N+i] = SUB16(st->input[chan*st->frame_size+i], st->y[chan*N+i+st->frame_size]); 
     884      See += mdf_inner_prod(st->e+chan*N, st->e+chan*N, st->frame_size); 
     885   } 
     886#endif 
     887 
    804888#ifndef TWO_PATH 
    805889   Sff = See; 
     
    808892#ifdef TWO_PATH 
    809893   /* Logic for updating the foreground filter */ 
    810     
     894 
    811895   /* For two time windows, compute the mean of the energy difference, as well as the variance */ 
    812896   st->Davg1 = ADD32(MULT16_32_Q15(QCONST16(.6f,15),st->Davg1), MULT16_32_Q15(QCONST16(.4f,15),SUB32(Sff,See))); 
     
    814898   st->Dvar1 = FLOAT_ADD(FLOAT_MULT(VAR1_SMOOTH, st->Dvar1), FLOAT_MUL32U(MULT16_32_Q15(QCONST16(.4f,15),Sff), MULT16_32_Q15(QCONST16(.4f,15),Dbf))); 
    815899   st->Dvar2 = FLOAT_ADD(FLOAT_MULT(VAR2_SMOOTH, st->Dvar2), FLOAT_MUL32U(MULT16_32_Q15(QCONST16(.15f,15),Sff), MULT16_32_Q15(QCONST16(.15f,15),Dbf))); 
    816     
     900 
    817901   /* Equivalent float code: 
    818902   st->Davg1 = .6*st->Davg1 + .4*(Sff-See); 
     
    821905   st->Dvar2 = .7225*st->Dvar2 + .0225*Sff*Dbf; 
    822906   */ 
    823     
     907 
    824908   update_foreground = 0; 
    825909   /* Check if we have a statistically significant reduction in the residual echo */ 
     
    831915   else if (FLOAT_GT(FLOAT_MUL32U(st->Davg2, ABS32(st->Davg2)), FLOAT_MULT(VAR2_UPDATE,(st->Dvar2)))) 
    832916      update_foreground = 1; 
    833     
     917 
    834918   /* Do we update? */ 
    835919   if (update_foreground) 
     
    838922      st->Dvar1 = st->Dvar2 = FLOAT_ZERO; 
    839923      /* Copy background filter to foreground filter */ 
    840       for (i=0;i<N*M;i++) 
     924      for (i=0;i<N*M*C*K;i++) 
    841925         st->foreground[i] = EXTRACT16(PSHR32(st->W[i],16)); 
    842926      /* Apply a smooth transition so as to not introduce blocking artifacts */ 
    843       for (i=0;i<st->frame_size;i++) 
    844          st->e[i+st->frame_size] = MULT16_16_Q15(st->window[i+st->frame_size],st->e[i+st->frame_size]) + MULT16_16_Q15(st->window[i],st->y[i+st->frame_size]); 
     927      for (chan = 0; chan < C; chan++) 
     928         for (i=0;i<st->frame_size;i++) 
     929            st->e[chan*N+i+st->frame_size] = MULT16_16_Q15(st->window[i+st->frame_size],st->e[chan*N+i+st->frame_size]) + MULT16_16_Q15(st->window[i],st->y[chan*N+i+st->frame_size]); 
    845930   } else { 
    846931      int reset_background=0; 
     
    855940      { 
    856941         /* Copy foreground filter to background filter */ 
    857          for (i=0;i<N*M;i++) 
     942         for (i=0;i<N*M*C*K;i++) 
    858943            st->W[i] = SHL32(EXTEND32(st->foreground[i]),16); 
    859944         /* We also need to copy the output so as to get correct adaptation */ 
    860          for (i=0;i<st->frame_size;i++) 
    861             st->y[i+st->frame_size] = st->e[i+st->frame_size]; 
    862          for (i=0;i<st->frame_size;i++) 
    863             st->e[i] = SUB16(st->input[i], st->y[i+st->frame_size]); 
     945         for (chan = 0; chan < C; chan++) 
     946         { 
     947            for (i=0;i<st->frame_size;i++) 
     948               st->y[chan*N+i+st->frame_size] = st->e[chan*N+i+st->frame_size]; 
     949            for (i=0;i<st->frame_size;i++) 
     950               st->e[chan*N+i] = SUB16(st->input[chan*st->frame_size+i], st->y[chan*N+i+st->frame_size]); 
     951         } 
    864952         See = Sff; 
    865953         st->Davg1 = st->Davg2 = 0; 
     
    869957#endif 
    870958 
    871    /* Compute error signal (for the output with de-emphasis) */  
    872    for (i=0;i<st->frame_size;i++) 
    873    { 
    874       spx_word32_t tmp_out; 
     959   Sey = Syy = Sdd = 0; 
     960   for (chan = 0; chan < C; chan++) 
     961   { 
     962      /* Compute error signal (for the output with de-emphasis) */ 
     963      for (i=0;i<st->frame_size;i++) 
     964      { 
     965         spx_word32_t tmp_out; 
    875966#ifdef TWO_PATH 
    876       tmp_out = SUB32(EXTEND32(st->input[i]), EXTEND32(st->e[i+st->frame_size])); 
    877 #else 
    878       tmp_out = SUB32(EXTEND32(st->input[i]), EXTEND32(st->y[i+st->frame_size])); 
    879 #endif 
    880       /* Saturation */ 
    881       if (tmp_out>32767) 
    882          tmp_out = 32767; 
    883       else if (tmp_out<-32768) 
    884          tmp_out = -32768; 
    885       tmp_out = ADD32(tmp_out, EXTEND32(MULT16_16_P15(st->preemph, st->memE))); 
     967         tmp_out = SUB32(EXTEND32(st->input[chan*st->frame_size+i]), EXTEND32(st->e[chan*N+i+st->frame_size])); 
     968#else 
     969         tmp_out = SUB32(EXTEND32(st->input[chan*st->frame_size+i]), EXTEND32(st->y[chan*N+i+st->frame_size])); 
     970#endif 
     971         tmp_out = ADD32(tmp_out, EXTEND32(MULT16_16_P15(st->preemph, st->memE[chan]))); 
    886972      /* This is an arbitrary test for saturation in the microphone signal */ 
    887       if (in[i] <= -32000 || in[i] >= 32000) 
    888       { 
    889          tmp_out = 0; 
     973         if (in[i*C+chan] <= -32000 || in[i*C+chan] >= 32000) 
     974         { 
    890975         if (st->saturated == 0) 
    891976            st->saturated = 1; 
    892       } 
    893       out[i] = (spx_int16_t)tmp_out; 
    894       st->memE = tmp_out; 
    895    } 
    896     
     977         } 
     978         out[i*C+chan] = WORD2INT(tmp_out); 
     979         st->memE[chan] = tmp_out; 
     980      } 
     981 
    897982#ifdef DUMP_ECHO_CANCEL_DATA 
    898    dump_audio(in, far_end, out, st->frame_size); 
    899 #endif 
    900     
    901    /* Compute error signal (filter update version) */  
    902    for (i=0;i<st->frame_size;i++) 
    903    { 
    904       st->e[i+st->frame_size] = st->e[i]; 
    905       st->e[i] = 0; 
    906    } 
    907  
    908    /* Compute a bunch of correlations */ 
    909    Sey = mdf_inner_prod(st->e+st->frame_size, st->y+st->frame_size, st->frame_size); 
    910    Syy = mdf_inner_prod(st->y+st->frame_size, st->y+st->frame_size, st->frame_size); 
    911    Sdd = mdf_inner_prod(st->input, st->input, st->frame_size); 
    912     
     983      dump_audio(in, far_end, out, st->frame_size); 
     984#endif 
     985 
     986      /* Compute error signal (filter update version) */ 
     987      for (i=0;i<st->frame_size;i++) 
     988      { 
     989         st->e[chan*N+i+st->frame_size] = st->e[chan*N+i]; 
     990         st->e[chan*N+i] = 0; 
     991      } 
     992 
     993      /* Compute a bunch of correlations */ 
     994      /* FIXME: bad merge */ 
     995      Sey += mdf_inner_prod(st->e+chan*N+st->frame_size, st->y+chan*N+st->frame_size, st->frame_size); 
     996      Syy += mdf_inner_prod(st->y+chan*N+st->frame_size, st->y+chan*N+st->frame_size, st->frame_size); 
     997      Sdd += mdf_inner_prod(st->input+chan*st->frame_size, st->input+chan*st->frame_size, st->frame_size); 
     998 
     999      /* Convert error to frequency domain */ 
     1000      spx_fft(st->fft_table, st->e+chan*N, st->E+chan*N); 
     1001      for (i=0;i<st->frame_size;i++) 
     1002         st->y[i+chan*N] = 0; 
     1003      spx_fft(st->fft_table, st->y+chan*N, st->Y+chan*N); 
     1004 
     1005      /* Compute power spectrum of echo (X), error (E) and filter response (Y) */ 
     1006      power_spectrum_accum(st->E+chan*N, st->Rf, N); 
     1007      power_spectrum_accum(st->Y+chan*N, st->Yf, N); 
     1008 
     1009   } 
     1010 
    9131011   /*printf ("%f %f %f %f\n", Sff, See, Syy, Sdd, st->update_cond);*/ 
    914     
     1012 
    9151013   /* Do some sanity check */ 
    9161014   if (!(Syy>=0 && Sxx>=0 && See >= 0) 
     
    9221020      /* Things have gone really bad */ 
    9231021      st->screwed_up += 50; 
    924       for (i=0;i<st->frame_size;i++) 
     1022      for (i=0;i<st->frame_size*C;i++) 
    9251023         out[i] = 0; 
    9261024   } else if (SHR32(Sff, 2) > ADD32(Sdd, SHR32(MULT16_16(N, 10000),6))) 
     
    9421040   See = MAX32(See, SHR32(MULT16_16(N, 100),6)); 
    9431041 
    944    /* Convert error to frequency domain */ 
    945    spx_fft(st->fft_table, st->e, st->E); 
    946    for (i=0;i<st->frame_size;i++) 
    947       st->y[i] = 0; 
    948    spx_fft(st->fft_table, st->y, st->Y); 
    949  
    950    /* Compute power spectrum of far end (X), error (E) and filter response (Y) */ 
    951    power_spectrum(st->E, st->Rf, N); 
    952    power_spectrum(st->Y, st->Yf, N); 
    953    power_spectrum(st->X, st->Xf, N); 
    954     
     1042   for (speak = 0; speak < K; speak++) 
     1043   { 
     1044      Sxx += mdf_inner_prod(st->x+speak*N+st->frame_size, st->x+speak*N+st->frame_size, st->frame_size); 
     1045      power_spectrum_accum(st->X+speak*N, st->Xf, N); 
     1046   } 
     1047 
     1048 
    9551049   /* Smooth far end energy estimate over time */ 
    9561050   for (j=0;j<=st->frame_size;j++) 
    9571051      st->power[j] = MULT16_32_Q15(ss_1,st->power[j]) + 1 + MULT16_32_Q15(ss,st->Xf[j]); 
    958     
    959    /* Enable this to compute the power based only on the tail (would need to compute more  
    960       efficiently to make this really useful */ 
    961    if (0) 
    962    { 
    963       float scale2 = .5f/M; 
    964       for (j=0;j<=st->frame_size;j++) 
    965          st->power[j] = 100; 
    966       for (i=0;i<M;i++) 
    967       { 
    968          power_spectrum(&st->X[i*N], st->Xf, N); 
    969          for (j=0;j<=st->frame_size;j++) 
    970             st->power[j] += scale2*st->Xf[j]; 
    971       } 
    972    } 
    9731052 
    9741053   /* Compute filtered spectra and (cross-)correlations */ 
     
    9881067#endif 
    9891068   } 
    990     
     1069 
    9911070   Pyy = FLOAT_SQRT(Pyy); 
    9921071   Pey = FLOAT_DIVU(Pey,Pyy); 
     
    10161095      st->leak_estimate = SHL16(st->leak_estimate,1); 
    10171096   /*printf ("%f\n", st->leak_estimate);*/ 
    1018     
     1097 
    10191098   /* Compute Residual to Error Ratio */ 
    10201099#ifdef FIXED_POINT 
     
    10721151      spx_word16_t adapt_rate=0; 
    10731152 
    1074       if (Sxx > SHR32(MULT16_16(N, 1000),6))  
     1153      if (Sxx > SHR32(MULT16_16(N, 1000),6)) 
    10751154      { 
    10761155         tmp32 = MULT16_32_Q15(QCONST16(.25f, 15), Sxx); 
     
    10921171   } 
    10931172 
    1094    /* Save residual echo so it can be used by the nonlinear processor */ 
    1095    if (st->adapted) 
    1096    { 
    1097       /* If the filter is adapted, take the filtered echo */ 
     1173   /* FIXME: MC conversion required */ 
    10981174      for (i=0;i<st->frame_size;i++) 
    10991175         st->last_y[i] = st->last_y[st->frame_size+i]; 
     1176   if (st->adapted) 
     1177   { 
     1178      /* If the filter is adapted, take the filtered echo */ 
    11001179      for (i=0;i<st->frame_size;i++) 
    11011180         st->last_y[st->frame_size+i] = in[i]-out[i]; 
     
    11141193   spx_word16_t leak2; 
    11151194   int N; 
    1116     
     1195 
    11171196   N = st->window_size; 
    11181197 
     
    11201199   for (i=0;i<N;i++) 
    11211200      st->y[i] = MULT16_16_Q15(st->window[i],st->last_y[i]); 
    1122        
     1201 
    11231202   /* Compute power spectrum of the echo */ 
    11241203   spx_fft(st->fft_table, st->y, st->Y); 
    11251204   power_spectrum(st->Y, residual_echo, N); 
    1126        
     1205 
    11271206#ifdef FIXED_POINT 
    11281207   if (st->leak_estimate > 16383) 
     
    11391218   for (i=0;i<=st->frame_size;i++) 
    11401219      residual_echo[i] = (spx_int32_t)MULT16_32_Q15(leak2,residual_echo[i]); 
    1141     
    1142 } 
    1143  
    1144 int speex_echo_ctl(SpeexEchoState *st, int request, void *ptr) 
     1220 
     1221} 
     1222 
     1223EXPORT int speex_echo_ctl(SpeexEchoState *st, int request, void *ptr) 
    11451224{ 
    11461225   switch(request) 
    11471226   { 
    1148        
     1227 
    11491228      case SPEEX_ECHO_GET_FRAME_SIZE: 
    11501229         (*(int*)ptr) = st->frame_size; 
     
    11701249         (*(int*)ptr) = st->sampling_rate; 
    11711250         break; 
     1251      case SPEEX_ECHO_GET_IMPULSE_RESPONSE_SIZE: 
     1252         /*FIXME: Implement this for multiple channels */ 
     1253         *((spx_int32_t *)ptr) = st->M * st->frame_size; 
     1254         break; 
     1255      case SPEEX_ECHO_GET_IMPULSE_RESPONSE: 
     1256      { 
     1257         int M = st->M, N = st->window_size, n = st->frame_size, i, j; 
     1258         spx_int32_t *filt = (spx_int32_t *) ptr; 
     1259         for(j=0;j<M;j++) 
     1260         { 
     1261            /*FIXME: Implement this for multiple channels */ 
     1262#ifdef FIXED_POINT 
     1263            for (i=0;i<N;i++) 
     1264               st->wtmp2[i] = EXTRACT16(PSHR32(st->W[j*N+i],16+NORMALIZE_SCALEDOWN)); 
     1265            spx_ifft(st->fft_table, st->wtmp2, st->wtmp); 
     1266#else 
     1267            spx_ifft(st->fft_table, &st->W[j*N], st->wtmp); 
     1268#endif 
     1269            for(i=0;i<n;i++) 
     1270               filt[j*n+i] = PSHR32(MULT16_16(32767,st->wtmp[i]), WEIGHT_SHIFT-NORMALIZE_SCALEDOWN); 
     1271         } 
     1272      } 
     1273         break; 
    11721274      default: 
    11731275         speex_warning_int("Unknown speex_echo_ctl request: ", request); 
Note: See TracChangeset for help on using the changeset viewer.