Changeset 6129 for pjproject/trunk/third_party/speex/libspeex/mdf.c
- Timestamp:
- Jan 9, 2020 9:05:50 AM (5 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
pjproject/trunk/third_party/speex/libspeex/mdf.c
r2198 r6129 1 /* Copyright (C) 2003-200 6Jean-Marc Valin1 /* Copyright (C) 2003-2008 Jean-Marc Valin 2 2 3 3 File: mdf.c … … 34 34 The echo canceller is based on the MDF algorithm described in: 35 35 36 J. S. Soo, K. K. Pang Multidelay block frequency adaptive filter, 37 IEEE Trans. Acoust. Speech Signal Process., Vol. ASSP-38, No. 2, 36 J. S. Soo, K. K. Pang Multidelay block frequency adaptive filter, 37 IEEE Trans. Acoust. Speech Signal Process., Vol. ASSP-38, No. 2, 38 38 February 1990. 39 40 We use the Alternatively Updated MDF (AUMDF) variant. Robustness to 39 40 We use the Alternatively Updated MDF (AUMDF) variant. Robustness to 41 41 double-talk is achieved using a variable learning rate as described in: 42 43 Valin, J.-M., On Adjusting the Learning Rate in Frequency Domain Echo 42 43 Valin, J.-M., On Adjusting the Learning Rate in Frequency Domain Echo 44 44 Cancellation With Double-Talk. IEEE Transactions on Audio, 45 45 Speech and Language Processing, Vol. 15, No. 3, pp. 1030-1034, 2007. 46 46 http://people.xiph.org/~jm/papers/valin_taslp2006.pdf 47 47 48 48 There is no explicit double-talk detection, but a continuous variation 49 49 in the learning rate based on residual echo, double-talk and background 50 50 noise. 51 51 52 52 About the fixed-point version: 53 All the signals are represented with 16-bit words. The filter weights 53 All the signals are represented with 16-bit words. The filter weights 54 54 are represented with 32-bit words, but only the top 16 bits are used 55 55 in most cases. The lower 16 bits are completely unreliable (due to the … … 57 57 adaptation -- probably by removing a "threshold effect" due to 58 58 quantization (rounding going to zero) when the gradient is small. 59 59 60 60 Another kludge that seems to work good: when performing the weight 61 61 update, we only move half the way toward the "goal" this seems to … … 63 63 can be seen as applying a gradient descent on a "soft constraint" 64 64 instead of having a hard constraint. 65 65 66 66 */ 67 67 … … 132 132 int saturated; 133 133 int screwed_up; 134 int C; /** Number of input channels (microphones) */ 135 int K; /** Number of output channels (loudspeakers) */ 134 136 spx_int32_t sampling_rate; 135 137 spx_word16_t spec_average; … … 138 140 spx_word32_t sum_adapt; 139 141 spx_word16_t leak_estimate; 140 142 141 143 spx_word16_t *e; /* scratch */ 142 144 spx_word16_t *x; /* Far-end input buffer (2N) */ … … 172 174 spx_word16_t *prop; 173 175 void *fft_table; 174 spx_word16_t memX, memD,memE;176 spx_word16_t *memX, *memD, *memE; 175 177 spx_word16_t preemph; 176 178 spx_word16_t notch_radius; 177 spx_mem_t notch_mem[2];179 spx_mem_t *notch_mem; 178 180 179 181 /* NOTE: If you only use speex_echo_cancel() and want to save some memory, remove this */ … … 183 185 }; 184 186 185 static inline void filter_dc_notch16(const spx_int16_t *in, spx_word16_t radius, spx_word16_t *out, int len, spx_mem_t *mem )187 static inline void filter_dc_notch16(const spx_int16_t *in, spx_word16_t radius, spx_word16_t *out, int len, spx_mem_t *mem, int stride) 186 188 { 187 189 int i; … … 191 193 #else 192 194 den2 = radius*radius + .7*(1-radius)*(1-radius); 193 #endif 195 #endif 194 196 /*printf ("%d %d %d %d %d %d\n", num[0], num[1], num[2], den[0], den[1], den[2]);*/ 195 197 for (i=0;i<len;i++) 196 198 { 197 spx_word16_t vin = in[i ];199 spx_word16_t vin = in[i*stride]; 198 200 spx_word32_t vout = mem[0] + SHL32(EXTEND32(vin),15); 199 201 #ifdef FIXED_POINT … … 235 237 } 236 238 239 /** Compute power spectrum of a half-complex (packed) vector and accumulate */ 240 static inline void power_spectrum_accum(const spx_word16_t *X, spx_word32_t *ps, int N) 241 { 242 int i, j; 243 ps[0]+=MULT16_16(X[0],X[0]); 244 for (i=1,j=1;i<N-1;i+=2,j++) 245 { 246 ps[j] += MULT16_16(X[i],X[i]) + MULT16_16(X[i+1],X[i+1]); 247 } 248 ps[j]+=MULT16_16(X[i],X[i]); 249 } 250 237 251 /** Compute cross-power spectrum of a half-complex (packed) vectors and add to acc */ 238 252 #ifdef FIXED_POINT … … 331 345 } 332 346 333 static inline void mdf_adjust_prop(const spx_word32_t *W, int N, int M, spx_word16_t *prop)334 { 335 int i, j ;347 static inline void mdf_adjust_prop(const spx_word32_t *W, int N, int M, int P, spx_word16_t *prop) 348 { 349 int i, j, p; 336 350 spx_word16_t max_sum = 1; 337 351 spx_word32_t prop_sum = 1; … … 339 353 { 340 354 spx_word32_t tmp = 1; 341 for (j=0;j<N;j++) 342 tmp += MULT16_16(EXTRACT16(SHR32(W[i*N+j],18)), EXTRACT16(SHR32(W[i*N+j],18))); 355 for (p=0;p<P;p++) 356 for (j=0;j<N;j++) 357 tmp += MULT16_16(EXTRACT16(SHR32(W[p*N*M + i*N+j],18)), EXTRACT16(SHR32(W[p*N*M + i*N+j],18))); 343 358 #ifdef FIXED_POINT 344 359 /* Just a security in case an overflow were to occur */ … … 379 394 380 395 /** Creates a new echo canceller state */ 381 SpeexEchoState *speex_echo_state_init(int frame_size, int filter_length) 382 { 383 int i,N,M; 396 EXPORT SpeexEchoState *speex_echo_state_init(int frame_size, int filter_length) 397 { 398 return speex_echo_state_init_mc(frame_size, filter_length, 1, 1); 399 } 400 401 EXPORT SpeexEchoState *speex_echo_state_init_mc(int frame_size, int filter_length, int nb_mic, int nb_speakers) 402 { 403 int i,N,M, C, K; 384 404 SpeexEchoState *st = (SpeexEchoState *)speex_alloc(sizeof(SpeexEchoState)); 385 405 406 st->K = nb_speakers; 407 st->C = nb_mic; 408 C=st->C; 409 K=st->K; 386 410 #ifdef DUMP_ECHO_CANCEL_DATA 387 411 if (rFile || pFile || oFile) … … 391 415 oFile = fopen("aec_out.sw", "wb"); 392 416 #endif 393 417 394 418 st->frame_size = frame_size; 395 419 st->window_size = 2*frame_size; … … 413 437 414 438 st->fft_table = spx_fft_init(N); 415 416 st->e = (spx_word16_t*)speex_alloc( N*sizeof(spx_word16_t));417 st->x = (spx_word16_t*)speex_alloc( N*sizeof(spx_word16_t));418 st->input = (spx_word16_t*)speex_alloc( st->frame_size*sizeof(spx_word16_t));419 st->y = (spx_word16_t*)speex_alloc( N*sizeof(spx_word16_t));420 st->last_y = (spx_word16_t*)speex_alloc( N*sizeof(spx_word16_t));439 440 st->e = (spx_word16_t*)speex_alloc(C*N*sizeof(spx_word16_t)); 441 st->x = (spx_word16_t*)speex_alloc(K*N*sizeof(spx_word16_t)); 442 st->input = (spx_word16_t*)speex_alloc(C*st->frame_size*sizeof(spx_word16_t)); 443 st->y = (spx_word16_t*)speex_alloc(C*N*sizeof(spx_word16_t)); 444 st->last_y = (spx_word16_t*)speex_alloc(C*N*sizeof(spx_word16_t)); 421 445 st->Yf = (spx_word32_t*)speex_alloc((st->frame_size+1)*sizeof(spx_word32_t)); 422 446 st->Rf = (spx_word32_t*)speex_alloc((st->frame_size+1)*sizeof(spx_word32_t)); … … 425 449 st->Eh = (spx_word32_t*)speex_alloc((st->frame_size+1)*sizeof(spx_word32_t)); 426 450 427 st->X = (spx_word16_t*)speex_alloc( (M+1)*N*sizeof(spx_word16_t));428 st->Y = (spx_word16_t*)speex_alloc( N*sizeof(spx_word16_t));429 st->E = (spx_word16_t*)speex_alloc( N*sizeof(spx_word16_t));430 st->W = (spx_word32_t*)speex_alloc( M*N*sizeof(spx_word32_t));451 st->X = (spx_word16_t*)speex_alloc(K*(M+1)*N*sizeof(spx_word16_t)); 452 st->Y = (spx_word16_t*)speex_alloc(C*N*sizeof(spx_word16_t)); 453 st->E = (spx_word16_t*)speex_alloc(C*N*sizeof(spx_word16_t)); 454 st->W = (spx_word32_t*)speex_alloc(C*K*M*N*sizeof(spx_word32_t)); 431 455 #ifdef TWO_PATH 432 st->foreground = (spx_word16_t*)speex_alloc(M*N* sizeof(spx_word16_t));456 st->foreground = (spx_word16_t*)speex_alloc(M*N*C*K*sizeof(spx_word16_t)); 433 457 #endif 434 458 st->PHI = (spx_word32_t*)speex_alloc(N*sizeof(spx_word32_t)); … … 451 475 for (i=0;i<=st->frame_size;i++) 452 476 st->power_1[i] = FLOAT_ONE; 453 for (i=0;i<N*M ;i++)477 for (i=0;i<N*M*K*C;i++) 454 478 st->W[i] = 0; 455 479 { … … 466 490 for (i=M-1;i>=0;i--) 467 491 { 468 st->prop[i] = DIV32(MULT16_16(QCONST16(.8,15), st->prop[i]),sum); 469 } 470 } 471 472 st->memX=st->memD=st->memE=0; 492 st->prop[i] = DIV32(MULT16_16(QCONST16(.8f,15), st->prop[i]),sum); 493 } 494 } 495 496 st->memX = (spx_word16_t*)speex_alloc(K*sizeof(spx_word16_t)); 497 st->memD = (spx_word16_t*)speex_alloc(C*sizeof(spx_word16_t)); 498 st->memE = (spx_word16_t*)speex_alloc(C*sizeof(spx_word16_t)); 473 499 st->preemph = QCONST16(.9,15); 474 500 if (st->sampling_rate<12000) … … 479 505 st->notch_radius = QCONST16(.992, 15); 480 506 481 st->notch_mem [0] = st->notch_mem[1] = 0;507 st->notch_mem = (spx_mem_t*)speex_alloc(2*C*sizeof(spx_mem_t)); 482 508 st->adapted = 0; 483 509 st->Pey = st->Pyy = FLOAT_ONE; 484 510 485 511 #ifdef TWO_PATH 486 512 st->Davg1 = st->Davg2 = 0; 487 513 st->Dvar1 = st->Dvar2 = FLOAT_ZERO; 488 514 #endif 489 490 st->play_buf = (spx_int16_t*)speex_alloc( (PLAYBACK_DELAY+1)*st->frame_size*sizeof(spx_int16_t));515 516 st->play_buf = (spx_int16_t*)speex_alloc(K*(PLAYBACK_DELAY+1)*st->frame_size*sizeof(spx_int16_t)); 491 517 st->play_buf_pos = PLAYBACK_DELAY*st->frame_size; 492 518 st->play_buf_started = 0; 493 519 494 520 return st; 495 521 } 496 522 497 523 /** Resets echo canceller state */ 498 void speex_echo_state_reset(SpeexEchoState *st)499 { 500 int i, M, N ;524 EXPORT void speex_echo_state_reset(SpeexEchoState *st) 525 { 526 int i, M, N, C, K; 501 527 st->cancel_count=0; 502 528 st->screwed_up = 0; 503 529 N = st->window_size; 504 530 M = st->M; 531 C=st->C; 532 K=st->K; 505 533 for (i=0;i<N*M;i++) 506 534 st->W[i] = 0; … … 522 550 st->last_y[i] = 0; 523 551 } 524 for (i=0;i<N ;i++)552 for (i=0;i<N*C;i++) 525 553 { 526 554 st->E[i] = 0; 555 } 556 for (i=0;i<N*K;i++) 557 { 527 558 st->x[i] = 0; 528 559 } 529 st->notch_mem[0] = st->notch_mem[1] = 0; 530 st->memX=st->memD=st->memE=0; 560 for (i=0;i<2*C;i++) 561 st->notch_mem[i] = 0; 562 for (i=0;i<C;i++) 563 st->memD[i]=st->memE[i]=0; 564 for (i=0;i<K;i++) 565 st->memX[i]=0; 531 566 532 567 st->saturated = 0; … … 546 581 547 582 /** Destroys an echo canceller state */ 548 void speex_echo_state_destroy(SpeexEchoState *st)583 EXPORT void speex_echo_state_destroy(SpeexEchoState *st) 549 584 { 550 585 spx_fft_destroy(st->fft_table); … … 577 612 speex_free(st->wtmp2); 578 613 #endif 614 speex_free(st->memX); 615 speex_free(st->memD); 616 speex_free(st->memE); 617 speex_free(st->notch_mem); 618 579 619 speex_free(st->play_buf); 580 620 speex_free(st); 581 621 582 622 #ifdef DUMP_ECHO_CANCEL_DATA 583 623 fclose(rFile); … … 588 628 } 589 629 590 void speex_echo_capture(SpeexEchoState *st, const spx_int16_t *rec, spx_int16_t *out)630 EXPORT void speex_echo_capture(SpeexEchoState *st, const spx_int16_t *rec, spx_int16_t *out) 591 631 { 592 632 int i; … … 611 651 } 612 652 613 void speex_echo_playback(SpeexEchoState *st, const spx_int16_t *play)653 EXPORT void speex_echo_playback(SpeexEchoState *st, const spx_int16_t *play) 614 654 { 615 655 /*speex_warning_int("playback with fill level ", st->play_buf_pos/st->frame_size);*/ … … 638 678 639 679 /** Performs echo cancellation on a frame (deprecated, last arg now ignored) */ 640 void speex_echo_cancel(SpeexEchoState *st, const spx_int16_t *in, const spx_int16_t *far_end, spx_int16_t *out, spx_int32_t *Yout)680 EXPORT void speex_echo_cancel(SpeexEchoState *st, const spx_int16_t *in, const spx_int16_t *far_end, spx_int16_t *out, spx_int32_t *Yout) 641 681 { 642 682 speex_echo_cancellation(st, in, far_end, out); … … 644 684 645 685 /** Performs echo cancellation on a frame */ 646 void speex_echo_cancellation(SpeexEchoState *st, const spx_int16_t *in, const spx_int16_t *far_end, spx_int16_t *out)647 { 648 int i,j ;649 int N,M ;686 EXPORT void speex_echo_cancellation(SpeexEchoState *st, const spx_int16_t *in, const spx_int16_t *far_end, spx_int16_t *out) 687 { 688 int i,j, chan, speak; 689 int N,M, C, K; 650 690 spx_word32_t Syy,See,Sxx,Sdd, Sff; 651 691 #ifdef TWO_PATH … … 659 699 spx_word16_t RER; 660 700 spx_word32_t tmp32; 661 701 662 702 N = st->window_size; 663 703 M = st->M; 704 C = st->C; 705 K = st->K; 706 664 707 st->cancel_count++; 665 708 #ifdef FIXED_POINT … … 671 714 #endif 672 715 673 /* Apply a notch filter to make sure DC doesn't end up causing problems */ 674 filter_dc_notch16(in, st->notch_radius, st->input, st->frame_size, st->notch_mem); 675 /* Copy input data to buffer and apply pre-emphasis */ 676 for (i=0;i<st->frame_size;i++) 677 { 678 spx_word32_t tmp32; 679 tmp32 = SUB32(EXTEND32(far_end[i]), EXTEND32(MULT16_16_P15(st->preemph, st->memX))); 680 #ifdef FIXED_POINT 681 /* If saturation occurs here, we need to freeze adaptation for M+1 frames (not just one) */ 682 if (tmp32 > 32767) 683 { 684 tmp32 = 32767; 685 st->saturated = M+1; 686 } 687 if (tmp32 < -32767) 688 { 689 tmp32 = -32767; 690 st->saturated = M+1; 691 } 692 #endif 693 st->x[i+st->frame_size] = EXTRACT16(tmp32); 694 st->memX = far_end[i]; 695 696 tmp32 = SUB32(EXTEND32(st->input[i]), EXTEND32(MULT16_16_P15(st->preemph, st->memD))); 697 #ifdef FIXED_POINT 698 if (tmp32 > 32767) 699 { 700 tmp32 = 32767; 701 if (st->saturated == 0) 702 st->saturated = 1; 703 } 704 if (tmp32 < -32767) 705 { 706 tmp32 = -32767; 707 if (st->saturated == 0) 708 st->saturated = 1; 709 } 710 #endif 711 st->memD = st->input[i]; 712 st->input[i] = tmp32; 713 } 714 715 /* Shift memory: this could be optimized eventually*/ 716 for (j=M-1;j>=0;j--) 717 { 718 for (i=0;i<N;i++) 719 st->X[(j+1)*N+i] = st->X[j*N+i]; 720 } 721 722 /* Convert x (far end) to frequency domain */ 723 spx_fft(st->fft_table, st->x, &st->X[0]); 724 for (i=0;i<N;i++) 725 st->last_y[i] = st->x[i]; 726 Sxx = mdf_inner_prod(st->x+st->frame_size, st->x+st->frame_size, st->frame_size); 727 for (i=0;i<st->frame_size;i++) 728 st->x[i] = st->x[i+st->frame_size]; 729 /* From here on, the top part of x is used as scratch space */ 730 716 for (chan = 0; chan < C; chan++) 717 { 718 /* Apply a notch filter to make sure DC doesn't end up causing problems */ 719 filter_dc_notch16(in+chan, st->notch_radius, st->input+chan*st->frame_size, st->frame_size, st->notch_mem+2*chan, C); 720 /* Copy input data to buffer and apply pre-emphasis */ 721 /* Copy input data to buffer */ 722 for (i=0;i<st->frame_size;i++) 723 { 724 spx_word32_t tmp32; 725 /* FIXME: This core has changed a bit, need to merge properly */ 726 tmp32 = SUB32(EXTEND32(st->input[chan*st->frame_size+i]), EXTEND32(MULT16_16_P15(st->preemph, st->memD[chan]))); 727 #ifdef FIXED_POINT 728 if (tmp32 > 32767) 729 { 730 tmp32 = 32767; 731 if (st->saturated == 0) 732 st->saturated = 1; 733 } 734 if (tmp32 < -32767) 735 { 736 tmp32 = -32767; 737 if (st->saturated == 0) 738 st->saturated = 1; 739 } 740 #endif 741 st->memD[chan] = st->input[chan*st->frame_size+i]; 742 st->input[chan*st->frame_size+i] = EXTRACT16(tmp32); 743 } 744 } 745 746 for (speak = 0; speak < K; speak++) 747 { 748 for (i=0;i<st->frame_size;i++) 749 { 750 spx_word32_t tmp32; 751 st->x[speak*N+i] = st->x[speak*N+i+st->frame_size]; 752 tmp32 = SUB32(EXTEND32(far_end[i*K+speak]), EXTEND32(MULT16_16_P15(st->preemph, st->memX[speak]))); 753 #ifdef FIXED_POINT 754 /*FIXME: If saturation occurs here, we need to freeze adaptation for M frames (not just one) */ 755 if (tmp32 > 32767) 756 { 757 tmp32 = 32767; 758 st->saturated = M+1; 759 } 760 if (tmp32 < -32767) 761 { 762 tmp32 = -32767; 763 st->saturated = M+1; 764 } 765 #endif 766 st->x[speak*N+i+st->frame_size] = EXTRACT16(tmp32); 767 st->memX[speak] = far_end[i*K+speak]; 768 } 769 } 770 771 for (speak = 0; speak < K; speak++) 772 { 773 /* Shift memory: this could be optimized eventually*/ 774 for (j=M-1;j>=0;j--) 775 { 776 for (i=0;i<N;i++) 777 st->X[(j+1)*N*K+speak*N+i] = st->X[j*N*K+speak*N+i]; 778 } 779 /* Convert x (echo input) to frequency domain */ 780 spx_fft(st->fft_table, st->x+speak*N, &st->X[speak*N]); 781 } 782 783 Sxx = 0; 784 for (speak = 0; speak < K; speak++) 785 { 786 Sxx += mdf_inner_prod(st->x+speak*N+st->frame_size, st->x+speak*N+st->frame_size, st->frame_size); 787 power_spectrum_accum(st->X+speak*N, st->Xf, N); 788 } 789 790 Sff = 0; 791 for (chan = 0; chan < C; chan++) 792 { 731 793 #ifdef TWO_PATH 732 /* Compute foreground filter */ 733 spectral_mul_accum16(st->X, st->foreground, st->Y, N, M); 734 spx_ifft(st->fft_table, st->Y, st->e); 735 for (i=0;i<st->frame_size;i++) 736 st->e[i] = SUB16(st->input[i], st->e[i+st->frame_size]); 737 Sff = mdf_inner_prod(st->e, st->e, st->frame_size); 738 #endif 739 794 /* Compute foreground filter */ 795 spectral_mul_accum16(st->X, st->foreground+chan*N*K*M, st->Y+chan*N, N, M*K); 796 spx_ifft(st->fft_table, st->Y+chan*N, st->e+chan*N); 797 for (i=0;i<st->frame_size;i++) 798 st->e[chan*N+i] = SUB16(st->input[chan*st->frame_size+i], st->e[chan*N+i+st->frame_size]); 799 Sff += mdf_inner_prod(st->e+chan*N, st->e+chan*N, st->frame_size); 800 #endif 801 } 802 740 803 /* Adjust proportional adaption rate */ 741 mdf_adjust_prop (st->W, N, M, st->prop); 804 /* FIXME: Adjust that for C, K*/ 805 if (st->adapted) 806 mdf_adjust_prop (st->W, N, M, C*K, st->prop); 742 807 /* Compute weight gradient */ 743 808 if (st->saturated == 0) 744 809 { 745 for (j=M-1;j>=0;j--) 746 { 747 weighted_spectral_mul_conj(st->power_1, FLOAT_SHL(PSEUDOFLOAT(st->prop[j]),-15), &st->X[(j+1)*N], st->E, st->PHI, N); 748 for (i=0;i<N;i++) 749 st->W[j*N+i] = ADD32(st->W[j*N+i], st->PHI[i]); 750 810 for (chan = 0; chan < C; chan++) 811 { 812 for (speak = 0; speak < K; speak++) 813 { 814 for (j=M-1;j>=0;j--) 815 { 816 weighted_spectral_mul_conj(st->power_1, FLOAT_SHL(PSEUDOFLOAT(st->prop[j]),-15), &st->X[(j+1)*N*K+speak*N], st->E+chan*N, st->PHI, N); 817 for (i=0;i<N;i++) 818 st->W[chan*N*K*M + j*N*K + speak*N + i] += st->PHI[i]; 819 } 820 } 751 821 } 752 822 } else { 753 823 st->saturated--; 754 824 } 755 825 826 /* FIXME: MC conversion required */ 756 827 /* Update weight to prevent circular convolution (MDF / AUMDF) */ 757 for (j=0;j<M;j++) 758 { 759 /* This is a variant of the Alternatively Updated MDF (AUMDF) */ 760 /* Remove the "if" to make this an MDF filter */ 761 if (j==0 || st->cancel_count%(M-1) == j-1) 762 { 763 #ifdef FIXED_POINT 764 for (i=0;i<N;i++) 765 st->wtmp2[i] = EXTRACT16(PSHR32(st->W[j*N+i],NORMALIZE_SCALEDOWN+16)); 766 spx_ifft(st->fft_table, st->wtmp2, st->wtmp); 767 for (i=0;i<st->frame_size;i++) 828 for (chan = 0; chan < C; chan++) 829 { 830 for (speak = 0; speak < K; speak++) 831 { 832 for (j=0;j<M;j++) 768 833 { 769 st->wtmp[i]=0; 834 /* This is a variant of the Alternatively Updated MDF (AUMDF) */ 835 /* Remove the "if" to make this an MDF filter */ 836 if (j==0 || st->cancel_count%(M-1) == j-1) 837 { 838 #ifdef FIXED_POINT 839 for (i=0;i<N;i++) 840 st->wtmp2[i] = EXTRACT16(PSHR32(st->W[chan*N*K*M + j*N*K + speak*N + i],NORMALIZE_SCALEDOWN+16)); 841 spx_ifft(st->fft_table, st->wtmp2, st->wtmp); 842 for (i=0;i<st->frame_size;i++) 843 { 844 st->wtmp[i]=0; 845 } 846 for (i=st->frame_size;i<N;i++) 847 { 848 st->wtmp[i]=SHL16(st->wtmp[i],NORMALIZE_SCALEUP); 849 } 850 spx_fft(st->fft_table, st->wtmp, st->wtmp2); 851 /* The "-1" in the shift is a sort of kludge that trades less efficient update speed for decrease noise */ 852 for (i=0;i<N;i++) 853 st->W[chan*N*K*M + j*N*K + speak*N + i] -= SHL32(EXTEND32(st->wtmp2[i]),16+NORMALIZE_SCALEDOWN-NORMALIZE_SCALEUP-1); 854 #else 855 spx_ifft(st->fft_table, &st->W[chan*N*K*M + j*N*K + speak*N], st->wtmp); 856 for (i=st->frame_size;i<N;i++) 857 { 858 st->wtmp[i]=0; 859 } 860 spx_fft(st->fft_table, st->wtmp, &st->W[chan*N*K*M + j*N*K + speak*N]); 861 #endif 862 } 770 863 } 771 for (i=st->frame_size;i<N;i++) 772 { 773 st->wtmp[i]=SHL16(st->wtmp[i],NORMALIZE_SCALEUP); 774 } 775 spx_fft(st->fft_table, st->wtmp, st->wtmp2); 776 /* The "-1" in the shift is a sort of kludge that trades less efficient update speed for decrease noise */ 777 for (i=0;i<N;i++) 778 st->W[j*N+i] -= SHL32(EXTEND32(st->wtmp2[i]),16+NORMALIZE_SCALEDOWN-NORMALIZE_SCALEUP-1); 779 #else 780 spx_ifft(st->fft_table, &st->W[j*N], st->wtmp); 781 for (i=st->frame_size;i<N;i++) 782 { 783 st->wtmp[i]=0; 784 } 785 spx_fft(st->fft_table, st->wtmp, &st->W[j*N]); 786 #endif 787 } 788 } 789 790 /* Compute filter response Y */ 791 spectral_mul_accum(st->X, st->W, st->Y, N, M); 792 spx_ifft(st->fft_table, st->Y, st->y); 793 864 } 865 } 866 867 /* So we can use power_spectrum_accum */ 868 for (i=0;i<=st->frame_size;i++) 869 st->Rf[i] = st->Yf[i] = st->Xf[i] = 0; 870 871 Dbf = 0; 872 See = 0; 794 873 #ifdef TWO_PATH 795 874 /* Difference in response, this is used to estimate the variance of our residual power estimate */ 796 for (i=0;i<st->frame_size;i++) 797 st->e[i] = SUB16(st->e[i+st->frame_size], st->y[i+st->frame_size]); 798 Dbf = 10+mdf_inner_prod(st->e, st->e, st->frame_size); 799 #endif 800 801 for (i=0;i<st->frame_size;i++) 802 st->e[i] = SUB16(st->input[i], st->y[i+st->frame_size]); 803 See = mdf_inner_prod(st->e, st->e, st->frame_size); 875 for (chan = 0; chan < C; chan++) 876 { 877 spectral_mul_accum(st->X, st->W+chan*N*K*M, st->Y+chan*N, N, M*K); 878 spx_ifft(st->fft_table, st->Y+chan*N, st->y+chan*N); 879 for (i=0;i<st->frame_size;i++) 880 st->e[chan*N+i] = SUB16(st->e[chan*N+i+st->frame_size], st->y[chan*N+i+st->frame_size]); 881 Dbf += 10+mdf_inner_prod(st->e+chan*N, st->e+chan*N, st->frame_size); 882 for (i=0;i<st->frame_size;i++) 883 st->e[chan*N+i] = SUB16(st->input[chan*st->frame_size+i], st->y[chan*N+i+st->frame_size]); 884 See += mdf_inner_prod(st->e+chan*N, st->e+chan*N, st->frame_size); 885 } 886 #endif 887 804 888 #ifndef TWO_PATH 805 889 Sff = See; … … 808 892 #ifdef TWO_PATH 809 893 /* Logic for updating the foreground filter */ 810 894 811 895 /* For two time windows, compute the mean of the energy difference, as well as the variance */ 812 896 st->Davg1 = ADD32(MULT16_32_Q15(QCONST16(.6f,15),st->Davg1), MULT16_32_Q15(QCONST16(.4f,15),SUB32(Sff,See))); … … 814 898 st->Dvar1 = FLOAT_ADD(FLOAT_MULT(VAR1_SMOOTH, st->Dvar1), FLOAT_MUL32U(MULT16_32_Q15(QCONST16(.4f,15),Sff), MULT16_32_Q15(QCONST16(.4f,15),Dbf))); 815 899 st->Dvar2 = FLOAT_ADD(FLOAT_MULT(VAR2_SMOOTH, st->Dvar2), FLOAT_MUL32U(MULT16_32_Q15(QCONST16(.15f,15),Sff), MULT16_32_Q15(QCONST16(.15f,15),Dbf))); 816 900 817 901 /* Equivalent float code: 818 902 st->Davg1 = .6*st->Davg1 + .4*(Sff-See); … … 821 905 st->Dvar2 = .7225*st->Dvar2 + .0225*Sff*Dbf; 822 906 */ 823 907 824 908 update_foreground = 0; 825 909 /* Check if we have a statistically significant reduction in the residual echo */ … … 831 915 else if (FLOAT_GT(FLOAT_MUL32U(st->Davg2, ABS32(st->Davg2)), FLOAT_MULT(VAR2_UPDATE,(st->Dvar2)))) 832 916 update_foreground = 1; 833 917 834 918 /* Do we update? */ 835 919 if (update_foreground) … … 838 922 st->Dvar1 = st->Dvar2 = FLOAT_ZERO; 839 923 /* Copy background filter to foreground filter */ 840 for (i=0;i<N*M ;i++)924 for (i=0;i<N*M*C*K;i++) 841 925 st->foreground[i] = EXTRACT16(PSHR32(st->W[i],16)); 842 926 /* Apply a smooth transition so as to not introduce blocking artifacts */ 843 for (i=0;i<st->frame_size;i++) 844 st->e[i+st->frame_size] = MULT16_16_Q15(st->window[i+st->frame_size],st->e[i+st->frame_size]) + MULT16_16_Q15(st->window[i],st->y[i+st->frame_size]); 927 for (chan = 0; chan < C; chan++) 928 for (i=0;i<st->frame_size;i++) 929 st->e[chan*N+i+st->frame_size] = MULT16_16_Q15(st->window[i+st->frame_size],st->e[chan*N+i+st->frame_size]) + MULT16_16_Q15(st->window[i],st->y[chan*N+i+st->frame_size]); 845 930 } else { 846 931 int reset_background=0; … … 855 940 { 856 941 /* Copy foreground filter to background filter */ 857 for (i=0;i<N*M ;i++)942 for (i=0;i<N*M*C*K;i++) 858 943 st->W[i] = SHL32(EXTEND32(st->foreground[i]),16); 859 944 /* We also need to copy the output so as to get correct adaptation */ 860 for (i=0;i<st->frame_size;i++) 861 st->y[i+st->frame_size] = st->e[i+st->frame_size]; 862 for (i=0;i<st->frame_size;i++) 863 st->e[i] = SUB16(st->input[i], st->y[i+st->frame_size]); 945 for (chan = 0; chan < C; chan++) 946 { 947 for (i=0;i<st->frame_size;i++) 948 st->y[chan*N+i+st->frame_size] = st->e[chan*N+i+st->frame_size]; 949 for (i=0;i<st->frame_size;i++) 950 st->e[chan*N+i] = SUB16(st->input[chan*st->frame_size+i], st->y[chan*N+i+st->frame_size]); 951 } 864 952 See = Sff; 865 953 st->Davg1 = st->Davg2 = 0; … … 869 957 #endif 870 958 871 /* Compute error signal (for the output with de-emphasis) */ 872 for (i=0;i<st->frame_size;i++) 873 { 874 spx_word32_t tmp_out; 959 Sey = Syy = Sdd = 0; 960 for (chan = 0; chan < C; chan++) 961 { 962 /* Compute error signal (for the output with de-emphasis) */ 963 for (i=0;i<st->frame_size;i++) 964 { 965 spx_word32_t tmp_out; 875 966 #ifdef TWO_PATH 876 tmp_out = SUB32(EXTEND32(st->input[i]), EXTEND32(st->e[i+st->frame_size])); 877 #else 878 tmp_out = SUB32(EXTEND32(st->input[i]), EXTEND32(st->y[i+st->frame_size])); 879 #endif 880 /* Saturation */ 881 if (tmp_out>32767) 882 tmp_out = 32767; 883 else if (tmp_out<-32768) 884 tmp_out = -32768; 885 tmp_out = ADD32(tmp_out, EXTEND32(MULT16_16_P15(st->preemph, st->memE))); 967 tmp_out = SUB32(EXTEND32(st->input[chan*st->frame_size+i]), EXTEND32(st->e[chan*N+i+st->frame_size])); 968 #else 969 tmp_out = SUB32(EXTEND32(st->input[chan*st->frame_size+i]), EXTEND32(st->y[chan*N+i+st->frame_size])); 970 #endif 971 tmp_out = ADD32(tmp_out, EXTEND32(MULT16_16_P15(st->preemph, st->memE[chan]))); 886 972 /* This is an arbitrary test for saturation in the microphone signal */ 887 if (in[i] <= -32000 || in[i] >= 32000) 888 { 889 tmp_out = 0; 973 if (in[i*C+chan] <= -32000 || in[i*C+chan] >= 32000) 974 { 890 975 if (st->saturated == 0) 891 976 st->saturated = 1; 892 }893 out[i] = (spx_int16_t)tmp_out;894 st->memE= tmp_out;895 }896 977 } 978 out[i*C+chan] = WORD2INT(tmp_out); 979 st->memE[chan] = tmp_out; 980 } 981 897 982 #ifdef DUMP_ECHO_CANCEL_DATA 898 dump_audio(in, far_end, out, st->frame_size); 899 #endif 900 901 /* Compute error signal (filter update version) */ 902 for (i=0;i<st->frame_size;i++) 903 { 904 st->e[i+st->frame_size] = st->e[i]; 905 st->e[i] = 0; 906 } 907 908 /* Compute a bunch of correlations */ 909 Sey = mdf_inner_prod(st->e+st->frame_size, st->y+st->frame_size, st->frame_size); 910 Syy = mdf_inner_prod(st->y+st->frame_size, st->y+st->frame_size, st->frame_size); 911 Sdd = mdf_inner_prod(st->input, st->input, st->frame_size); 912 983 dump_audio(in, far_end, out, st->frame_size); 984 #endif 985 986 /* Compute error signal (filter update version) */ 987 for (i=0;i<st->frame_size;i++) 988 { 989 st->e[chan*N+i+st->frame_size] = st->e[chan*N+i]; 990 st->e[chan*N+i] = 0; 991 } 992 993 /* Compute a bunch of correlations */ 994 /* FIXME: bad merge */ 995 Sey += mdf_inner_prod(st->e+chan*N+st->frame_size, st->y+chan*N+st->frame_size, st->frame_size); 996 Syy += mdf_inner_prod(st->y+chan*N+st->frame_size, st->y+chan*N+st->frame_size, st->frame_size); 997 Sdd += mdf_inner_prod(st->input+chan*st->frame_size, st->input+chan*st->frame_size, st->frame_size); 998 999 /* Convert error to frequency domain */ 1000 spx_fft(st->fft_table, st->e+chan*N, st->E+chan*N); 1001 for (i=0;i<st->frame_size;i++) 1002 st->y[i+chan*N] = 0; 1003 spx_fft(st->fft_table, st->y+chan*N, st->Y+chan*N); 1004 1005 /* Compute power spectrum of echo (X), error (E) and filter response (Y) */ 1006 power_spectrum_accum(st->E+chan*N, st->Rf, N); 1007 power_spectrum_accum(st->Y+chan*N, st->Yf, N); 1008 1009 } 1010 913 1011 /*printf ("%f %f %f %f\n", Sff, See, Syy, Sdd, st->update_cond);*/ 914 1012 915 1013 /* Do some sanity check */ 916 1014 if (!(Syy>=0 && Sxx>=0 && See >= 0) … … 922 1020 /* Things have gone really bad */ 923 1021 st->screwed_up += 50; 924 for (i=0;i<st->frame_size ;i++)1022 for (i=0;i<st->frame_size*C;i++) 925 1023 out[i] = 0; 926 1024 } else if (SHR32(Sff, 2) > ADD32(Sdd, SHR32(MULT16_16(N, 10000),6))) … … 942 1040 See = MAX32(See, SHR32(MULT16_16(N, 100),6)); 943 1041 944 /* Convert error to frequency domain */ 945 spx_fft(st->fft_table, st->e, st->E); 946 for (i=0;i<st->frame_size;i++) 947 st->y[i] = 0; 948 spx_fft(st->fft_table, st->y, st->Y); 949 950 /* Compute power spectrum of far end (X), error (E) and filter response (Y) */ 951 power_spectrum(st->E, st->Rf, N); 952 power_spectrum(st->Y, st->Yf, N); 953 power_spectrum(st->X, st->Xf, N); 954 1042 for (speak = 0; speak < K; speak++) 1043 { 1044 Sxx += mdf_inner_prod(st->x+speak*N+st->frame_size, st->x+speak*N+st->frame_size, st->frame_size); 1045 power_spectrum_accum(st->X+speak*N, st->Xf, N); 1046 } 1047 1048 955 1049 /* Smooth far end energy estimate over time */ 956 1050 for (j=0;j<=st->frame_size;j++) 957 1051 st->power[j] = MULT16_32_Q15(ss_1,st->power[j]) + 1 + MULT16_32_Q15(ss,st->Xf[j]); 958 959 /* Enable this to compute the power based only on the tail (would need to compute more960 efficiently to make this really useful */961 if (0)962 {963 float scale2 = .5f/M;964 for (j=0;j<=st->frame_size;j++)965 st->power[j] = 100;966 for (i=0;i<M;i++)967 {968 power_spectrum(&st->X[i*N], st->Xf, N);969 for (j=0;j<=st->frame_size;j++)970 st->power[j] += scale2*st->Xf[j];971 }972 }973 1052 974 1053 /* Compute filtered spectra and (cross-)correlations */ … … 988 1067 #endif 989 1068 } 990 1069 991 1070 Pyy = FLOAT_SQRT(Pyy); 992 1071 Pey = FLOAT_DIVU(Pey,Pyy); … … 1016 1095 st->leak_estimate = SHL16(st->leak_estimate,1); 1017 1096 /*printf ("%f\n", st->leak_estimate);*/ 1018 1097 1019 1098 /* Compute Residual to Error Ratio */ 1020 1099 #ifdef FIXED_POINT … … 1072 1151 spx_word16_t adapt_rate=0; 1073 1152 1074 if (Sxx > SHR32(MULT16_16(N, 1000),6)) 1153 if (Sxx > SHR32(MULT16_16(N, 1000),6)) 1075 1154 { 1076 1155 tmp32 = MULT16_32_Q15(QCONST16(.25f, 15), Sxx); … … 1092 1171 } 1093 1172 1094 /* Save residual echo so it can be used by the nonlinear processor */ 1095 if (st->adapted) 1096 { 1097 /* If the filter is adapted, take the filtered echo */ 1173 /* FIXME: MC conversion required */ 1098 1174 for (i=0;i<st->frame_size;i++) 1099 1175 st->last_y[i] = st->last_y[st->frame_size+i]; 1176 if (st->adapted) 1177 { 1178 /* If the filter is adapted, take the filtered echo */ 1100 1179 for (i=0;i<st->frame_size;i++) 1101 1180 st->last_y[st->frame_size+i] = in[i]-out[i]; … … 1114 1193 spx_word16_t leak2; 1115 1194 int N; 1116 1195 1117 1196 N = st->window_size; 1118 1197 … … 1120 1199 for (i=0;i<N;i++) 1121 1200 st->y[i] = MULT16_16_Q15(st->window[i],st->last_y[i]); 1122 1201 1123 1202 /* Compute power spectrum of the echo */ 1124 1203 spx_fft(st->fft_table, st->y, st->Y); 1125 1204 power_spectrum(st->Y, residual_echo, N); 1126 1205 1127 1206 #ifdef FIXED_POINT 1128 1207 if (st->leak_estimate > 16383) … … 1139 1218 for (i=0;i<=st->frame_size;i++) 1140 1219 residual_echo[i] = (spx_int32_t)MULT16_32_Q15(leak2,residual_echo[i]); 1141 1142 } 1143 1144 int speex_echo_ctl(SpeexEchoState *st, int request, void *ptr)1220 1221 } 1222 1223 EXPORT int speex_echo_ctl(SpeexEchoState *st, int request, void *ptr) 1145 1224 { 1146 1225 switch(request) 1147 1226 { 1148 1227 1149 1228 case SPEEX_ECHO_GET_FRAME_SIZE: 1150 1229 (*(int*)ptr) = st->frame_size; … … 1170 1249 (*(int*)ptr) = st->sampling_rate; 1171 1250 break; 1251 case SPEEX_ECHO_GET_IMPULSE_RESPONSE_SIZE: 1252 /*FIXME: Implement this for multiple channels */ 1253 *((spx_int32_t *)ptr) = st->M * st->frame_size; 1254 break; 1255 case SPEEX_ECHO_GET_IMPULSE_RESPONSE: 1256 { 1257 int M = st->M, N = st->window_size, n = st->frame_size, i, j; 1258 spx_int32_t *filt = (spx_int32_t *) ptr; 1259 for(j=0;j<M;j++) 1260 { 1261 /*FIXME: Implement this for multiple channels */ 1262 #ifdef FIXED_POINT 1263 for (i=0;i<N;i++) 1264 st->wtmp2[i] = EXTRACT16(PSHR32(st->W[j*N+i],16+NORMALIZE_SCALEDOWN)); 1265 spx_ifft(st->fft_table, st->wtmp2, st->wtmp); 1266 #else 1267 spx_ifft(st->fft_table, &st->W[j*N], st->wtmp); 1268 #endif 1269 for(i=0;i<n;i++) 1270 filt[j*n+i] = PSHR32(MULT16_16(32767,st->wtmp[i]), WEIGHT_SHIFT-NORMALIZE_SCALEDOWN); 1271 } 1272 } 1273 break; 1172 1274 default: 1173 1275 speex_warning_int("Unknown speex_echo_ctl request: ", request);
Note: See TracChangeset
for help on using the changeset viewer.