Changeset 2199
- Timestamp:
- Aug 10, 2008 4:15:14 PM (16 years ago)
- Location:
- pjproject/trunk
- Files:
-
- 2 edited
Legend:
- Unmodified
- Added
- Removed
-
pjproject/trunk/pjmedia/src/pjmedia/echo_suppress.c
r2198 r2199 18 18 */ 19 19 #include <pjmedia/types.h> 20 #include <pjmedia/alaw_ulaw.h> 20 21 #include <pjmedia/errno.h> 21 22 #include <pjmedia/silencedet.h> 23 #include <pj/array.h> 22 24 #include <pj/assert.h> 23 25 #include <pj/lock.h> … … 30 32 #define THIS_FILE "echo_suppress.c" 31 33 34 /* Maximum float constant */ 35 #define MAX_FLOAT (float)1.701411e38 36 37 /* The effective learn duration (in seconds) before we declare that learning 38 * is complete. The actual learning duration itself may be longer depending 39 * on the conversation pattern (e.g. we can't detect echo if speaker is only 40 * playing silence). 41 */ 42 #define MAX_CALC_DURATION_SEC 3 43 44 /* The internal audio segment length, in milliseconds. 10ms shold be good 45 * and no need to change it. 46 */ 47 #define SEGMENT_PTIME 10 48 49 /* The length of the template signal in milliseconds. The longer the template, 50 * the better correlation will be found, at the expense of more processing 51 * and longer learning time. 52 */ 53 #define TEMPLATE_PTIME 200 54 55 /* How long to look back in the past to see if either mic or speaker is 56 * active. 57 */ 58 #define SIGNAL_LOOKUP_MSEC 200 59 60 /* The minimum level value to be considered as talking, in uLaw complement 61 * (0-255). 62 */ 63 #define MIN_SIGNAL_ULAW 35 64 65 /* The period (in seconds) on which the ES will analize it's effectiveness, 66 * and it may trigger soft-reset to force recalculation. 67 */ 68 #define CHECK_PERIOD 30 69 70 /* Maximum signal level of average echo residue (in uLaw complement). When 71 * the residue value exceeds this value, we force the ES to re-learn. 72 */ 73 #define MAX_RESIDUE 2.5 74 75 76 #if 0 77 # define TRACE_(expr) PJ_LOG(5,expr) 78 #else 79 # define TRACE_(expr) 80 #endif 81 82 PJ_INLINE(float) fabs(float val) 83 { 84 if (val < 0) 85 return -val; 86 else 87 return val; 88 } 89 90 91 #if defined(PJ_HAS_FLOATING_POINT) && PJ_HAS_FLOATING_POINT!=0 92 typedef float pj_ufloat_t; 93 # define pj_ufloat_from_float(f) (f) 94 # define pj_ufloat_mul_u(val1, f) ((val1) * (f)) 95 # define pj_ufloat_mul_i(val1, f) ((val1) * (f)) 96 #else 97 typedef pj_uint32_t pj_ufloat_t; 98 99 pj_ufloat_t pj_ufloat_from_float(float f) 100 { 101 return (pj_ufloat_t)(f * 65536); 102 } 103 104 unsigned pj_ufloat_mul_u(unsigned val1, pj_ufloat_t val2) 105 { 106 return (val1 * val2) >> 16; 107 } 108 109 int pj_ufloat_mul_i(int val1, pj_ufloat_t val2) 110 { 111 return (val1 * (pj_int32_t)val2) >> 16; 112 } 113 #endif 114 115 116 /* Conversation state */ 117 typedef enum talk_state 118 { 119 ST_NULL, 120 ST_LOCAL_TALK, 121 ST_REM_SILENT, 122 ST_DOUBLETALK, 123 ST_REM_TALK 124 } talk_state; 125 126 const char *state_names[] = 127 { 128 "Null", 129 "local talking", 130 "remote silent", 131 "doubletalk", 132 "remote talking" 133 }; 134 135 136 /* Description: 137 138 The echo suppressor tries to find the position of echoed signal by looking 139 at the correlation between signal played to the speaker (played signal) 140 and the signal captured from the microphone (recorded signal). 141 142 To do this, it first divides the frames (from mic and speaker) into 143 segments, calculate the audio level of the segment, and save the level 144 information in the playback and record history (play_hist and rec_hist 145 respectively). 146 147 In the history, the newest element (depicted as "t0" in the diagram belo) 148 is put in the last position of the array. 149 150 The record history size is as large as the template size (tmpl_cnt), since 151 we will use the record history as the template to find the best matching 152 position in the playback history. 153 154 Here is the record history buffer: 155 156 <--templ_cnt--> 157 +-------------+ 158 | rec_hist | 159 +-------------+ 160 t-templ_cnt......t0 161 162 As you can see, the newest frame ("t0") is put as the last element in the 163 array. 164 165 The playback history size is larger than record history, since we need to 166 find the matching pattern in the past. The playback history size is 167 "templ_cnt + tail_cnt", where "tail_cnt" is the number of segments equal 168 to the maximum tail length. The maximum tail length is set when the ES 169 is created. 170 171 Here is the playback history buffer: 172 173 <-----tail_cnt-----> <--templ_cnt--> 174 +-------------------+--------------+ 175 | play_hist | 176 +-------------------+--------------+ 177 t-play_hist_cnt...t-templ_cnt.......t0 178 179 180 181 Learning: 182 183 During the processing, the ES calculates the following values: 184 - the correlation value, that is how similar the playback signal compared 185 to the mic signal. The lower the correlation value the better (i.e. more 186 similar) the signal is. The correlation value is done over the template 187 duration. 188 - the gain scaling factor, that is the ratio between mic signal and 189 speaker signal. The ES calculates both the minimum and average ratios. 190 191 The ES calculates both the values above for every tail position in the 192 playback history. The values are saved in arrays below: 193 194 <-----tail_cnt-----> 195 +-------------------+ 196 | corr_sum | 197 +-------------------+ 198 | min_factor | 199 +-------------------+ 200 | avg_factor | 201 +-------------------+ 202 203 At the end of processing, the ES iterates through the correlation array and 204 picks the tail index with the lowest corr_sum value. This is the position 205 where echo is most likely to be found. 206 207 208 Processing: 209 210 Once learning is done, the ES will change the level of the mic signal 211 depending on the state of the conversation and according to the ratio that 212 has been found in the learning phase above. 213 214 */ 32 215 33 216 /* 34 * Simple echo suppresor217 * The simple echo suppresor state 35 218 */ 36 219 typedef struct echo_supp 37 220 { 38 pjmedia_silence_det *sd; 39 unsigned samples_per_frame; 40 unsigned tail_ms; 221 unsigned clock_rate; /* Clock rate. */ 222 pj_uint16_t samples_per_frame; /* Frame length in samples */ 223 pj_uint16_t samples_per_segment;/* Segment length in samples */ 224 pj_uint16_t tail_ms; /* Tail length in milliseconds */ 225 pj_uint16_t tail_samples; /* Tail length in samples. */ 226 227 pj_bool_t learning; /* Are we still learning yet? */ 228 talk_state talk_state; /* Current talking state */ 229 int tail_index; /* Echo location, -1 if not found */ 230 231 unsigned max_calc; /* # of calc before learning complete. 232 (see MAX_CALC_DURATION_SEC) */ 233 unsigned calc_cnt; /* Number of calculations so far */ 234 235 unsigned update_cnt; /* # of updates */ 236 unsigned templ_cnt; /* Template length, in # of segments */ 237 unsigned tail_cnt; /* Tail length, in # of segments */ 238 unsigned play_hist_cnt; /* # of segments in play_hist */ 239 pj_uint16_t *play_hist; /* Array of playback levels */ 240 pj_uint16_t *rec_hist; /* Array of rec levels */ 241 242 float *corr_sum; /* Array of corr for each tail pos. */ 243 float *tmp_corr; /* Temporary corr array calculation */ 244 float best_corr; /* Best correlation so far. */ 245 246 float *min_factor; /* Array of minimum scaling factor */ 247 float *avg_factor; /* Array of average scaling factor */ 248 float *tmp_factor; /* Array to store provisional result */ 249 250 unsigned running_cnt; /* Running duration in # of frames */ 251 float residue; /* Accummulated echo residue. */ 252 float last_factor; /* Last factor applied to mic signal */ 41 253 } echo_supp; 42 254 … … 55 267 { 56 268 echo_supp *ec; 57 pj_status_t status; 58 59 PJ_UNUSED_ARG(clock_rate); 269 60 270 PJ_UNUSED_ARG(channel_count); 61 271 PJ_UNUSED_ARG(options); 62 272 273 PJ_ASSERT_RETURN(samples_per_frame >= SEGMENT_PTIME * clock_rate / 1000, 274 PJ_ENOTSUP); 275 63 276 ec = PJ_POOL_ZALLOC_T(pool, struct echo_supp); 64 ec->samples_per_frame = samples_per_frame; 65 ec->tail_ms = tail_ms; 66 67 status = pjmedia_silence_det_create(pool, clock_rate, samples_per_frame, 68 &ec->sd); 69 if (status != PJ_SUCCESS) 70 return status; 71 72 pjmedia_silence_det_set_name(ec->sd, "ecsu%p"); 73 pjmedia_silence_det_set_adaptive(ec->sd, PJMEDIA_ECHO_SUPPRESS_THRESHOLD); 74 pjmedia_silence_det_set_params(ec->sd, 100, 500, 3000); 277 ec->clock_rate = clock_rate; 278 ec->samples_per_frame = (pj_uint16_t)samples_per_frame; 279 ec->samples_per_segment = (pj_uint16_t)(SEGMENT_PTIME * clock_rate / 1000); 280 ec->tail_ms = (pj_uint16_t)tail_ms; 281 ec->tail_samples = (pj_uint16_t)(tail_ms * clock_rate / 1000); 282 283 ec->templ_cnt = TEMPLATE_PTIME / SEGMENT_PTIME; 284 ec->tail_cnt = (pj_uint16_t)(tail_ms / SEGMENT_PTIME); 285 ec->play_hist_cnt = (pj_uint16_t)(ec->tail_cnt+ec->templ_cnt); 286 287 ec->max_calc = (pj_uint16_t)(MAX_CALC_DURATION_SEC * clock_rate / 288 ec->samples_per_segment); 289 290 ec->rec_hist = (pj_uint16_t*) 291 pj_pool_alloc(pool, ec->templ_cnt * 292 sizeof(ec->rec_hist[0])); 293 294 /* Note: play history has twice number of elements */ 295 ec->play_hist = (pj_uint16_t*) 296 pj_pool_alloc(pool, ec->play_hist_cnt * 297 sizeof(ec->play_hist[0])); 298 299 ec->corr_sum = (float*) 300 pj_pool_alloc(pool, ec->tail_cnt * 301 sizeof(ec->corr_sum[0])); 302 ec->tmp_corr = (float*) 303 pj_pool_alloc(pool, ec->tail_cnt * 304 sizeof(ec->tmp_corr[0])); 305 ec->min_factor = (float*) 306 pj_pool_alloc(pool, ec->tail_cnt * 307 sizeof(ec->min_factor[0])); 308 ec->avg_factor = (float*) 309 pj_pool_alloc(pool, ec->tail_cnt * 310 sizeof(ec->avg_factor[0])); 311 ec->tmp_factor = (float*) 312 pj_pool_alloc(pool, ec->tail_cnt * 313 sizeof(ec->tmp_factor[0])); 314 echo_supp_reset(ec); 75 315 76 316 *p_state = ec; … … 90 330 91 331 /* 92 * Reset332 * Hard reset 93 333 */ 94 334 PJ_DEF(void) echo_supp_reset(void *state) 95 335 { 96 PJ_UNUSED_ARG(state); 97 return; 336 unsigned i; 337 echo_supp *ec = (echo_supp*) state; 338 339 pj_bzero(ec->rec_hist, ec->templ_cnt * sizeof(ec->rec_hist[0])); 340 pj_bzero(ec->play_hist, ec->play_hist_cnt * sizeof(ec->play_hist[0])); 341 342 for (i=0; i<ec->tail_cnt; ++i) { 343 ec->corr_sum[i] = ec->avg_factor[i] = 0; 344 ec->min_factor[i] = MAX_FLOAT; 345 } 346 347 ec->update_cnt = 0; 348 ec->calc_cnt = 0; 349 ec->learning = PJ_TRUE; 350 ec->tail_index = -1; 351 ec->best_corr = MAX_FLOAT; 352 ec->talk_state = ST_NULL; 353 ec->last_factor = 1.0; 354 ec->residue = 0; 355 ec->running_cnt = 0; 98 356 } 99 357 100 358 /* 359 * Soft reset to force the EC to re-learn without having to discard all 360 * rec and playback history. 361 */ 362 PJ_DEF(void) echo_supp_soft_reset(void *state) 363 { 364 unsigned i; 365 366 echo_supp *ec = (echo_supp*) state; 367 368 for (i=0; i<ec->tail_cnt; ++i) { 369 ec->corr_sum[i] = 0; 370 } 371 372 ec->update_cnt = 0; 373 ec->calc_cnt = 0; 374 ec->learning = PJ_TRUE; 375 ec->best_corr = MAX_FLOAT; 376 ec->residue = 0; 377 ec->running_cnt = 0; 378 379 PJ_LOG(4,(THIS_FILE, "Echo suppressor soft reset. Re-learning..")); 380 } 381 382 383 /* Set state */ 384 static void echo_supp_set_state(echo_supp *ec, enum talk_state state) 385 { 386 if (state != ec->talk_state) { 387 TRACE_((THIS_FILE, "[%03d.%03d] %s --> %s", 388 (ec->update_cnt * SEGMENT_PTIME / 1000), 389 ((ec->update_cnt * SEGMENT_PTIME) % 1000), 390 state_names[ec->talk_state], 391 state_names[state])); 392 ec->talk_state = state; 393 } 394 } 395 396 /* 397 * Update EC state 398 */ 399 static void echo_supp_update(echo_supp *ec, pj_int16_t *rec_frm, 400 const pj_int16_t *play_frm) 401 { 402 int prev_index; 403 unsigned i, frm_level, sum_rec_level; 404 float rec_corr; 405 406 ++ec->update_cnt; 407 if (ec->update_cnt > 0x7FFFFFFF) 408 ec->update_cnt = 0x7FFFFFFF; /* Detect overflow */ 409 410 /* Calculate current play frame level */ 411 frm_level = pjmedia_calc_avg_signal(play_frm, ec->samples_per_segment); 412 ++frm_level; /* to avoid division by zero */ 413 414 /* Push current frame level to the back of the play history */ 415 pj_array_erase(ec->play_hist, sizeof(pj_uint16_t), ec->play_hist_cnt, 0); 416 ec->play_hist[ec->play_hist_cnt-1] = (pj_uint16_t) frm_level; 417 418 /* Calculate level of current mic frame */ 419 frm_level = pjmedia_calc_avg_signal(rec_frm, ec->samples_per_segment); 420 ++frm_level; /* to avoid division by zero */ 421 422 /* Push to the back of the rec history */ 423 pj_array_erase(ec->rec_hist, sizeof(pj_uint16_t), ec->templ_cnt, 0); 424 ec->rec_hist[ec->templ_cnt-1] = (pj_uint16_t) frm_level; 425 426 427 /* Can't do the calc until the play history is full. */ 428 if (ec->update_cnt < ec->play_hist_cnt) 429 return; 430 431 /* Skip if learning is done */ 432 if (!ec->learning) 433 return; 434 435 436 /* Calculate rec signal pattern */ 437 rec_corr = 0; 438 sum_rec_level = 0; 439 for (i=0; i < ec->templ_cnt-1; ++i) { 440 float corr; 441 corr = (float)ec->rec_hist[i+1] / ec->rec_hist[i]; 442 rec_corr += corr; 443 sum_rec_level += ec->rec_hist[i]; 444 } 445 sum_rec_level += ec->rec_hist[i]; 446 447 /* Iterate through the play history and calculate the signal correlation 448 * for every tail position in the play_hist. Save the result in temporary 449 * array since we may bail out early if the conversation state is not good 450 * to detect echo. 451 */ 452 for (i=0; i < ec->tail_cnt; ++i) { 453 unsigned j, end, sum_play_level, ulaw; 454 float play_corr = 0, corr_diff; 455 456 sum_play_level = 0; 457 for (j=i, end=i+ec->templ_cnt-1; j<end; ++j) { 458 float corr; 459 corr = (float)ec->play_hist[j+1] / ec->play_hist[j]; 460 play_corr += corr; 461 sum_play_level += ec->play_hist[j]; 462 } 463 sum_play_level += ec->play_hist[j]; 464 465 /* Bail out if remote isn't talking */ 466 ulaw = pjmedia_linear2ulaw(sum_play_level/ec->templ_cnt) ^ 0xFF; 467 if (ulaw < MIN_SIGNAL_ULAW) { 468 echo_supp_set_state(ec, ST_REM_SILENT); 469 return; 470 } 471 472 /* Bail out if local user is talking */ 473 if (sum_rec_level >= sum_play_level) { 474 echo_supp_set_state(ec, ST_LOCAL_TALK); 475 return; 476 } 477 478 /* Also bail out if we suspect there's a doubletalk */ 479 ulaw = pjmedia_linear2ulaw(sum_rec_level/ec->templ_cnt) ^ 0xFF; 480 if (ulaw > MIN_SIGNAL_ULAW) { 481 echo_supp_set_state(ec, ST_DOUBLETALK); 482 return; 483 } 484 485 /* Calculate correlation and save to temporary array */ 486 corr_diff = fabs(play_corr - rec_corr); 487 ec->tmp_corr[i] = corr_diff; 488 489 /* Also calculate the gain factor between mic and speaker level */ 490 ec->tmp_factor[i] = (float)sum_rec_level / sum_play_level; 491 pj_assert(ec->tmp_factor[i] < 1); 492 } 493 494 /* We seem to have good signal, we can update the EC state */ 495 echo_supp_set_state(ec, ST_REM_TALK); 496 497 /* Accummulate the correlation value to the history and at the same 498 * time find the tail index of the best correlation. 499 */ 500 prev_index = ec->tail_index; 501 for (i=1; i<ec->tail_cnt-1; ++i) { 502 float *p = &ec->corr_sum[i], sum; 503 504 /* Accummulate correlation value for this tail position */ 505 ec->corr_sum[i] += ec->tmp_corr[i]; 506 507 /* Update the min and avg gain factor for this tail position */ 508 if (ec->tmp_factor[i] < ec->min_factor[i]) 509 ec->min_factor[i] = ec->tmp_factor[i]; 510 ec->avg_factor[i] = ((ec->avg_factor[i] * ec->tail_cnt) + 511 ec->tmp_factor[i]) / 512 (ec->tail_cnt + 1); 513 514 /* To get the best correlation, also include the correlation 515 * value of the neighbouring tail locations. 516 */ 517 sum = *(p-1) + (*p)*2 + *(p+1); 518 //sum = *p; 519 520 /* See if we have better correlation value */ 521 if (sum < ec->best_corr) { 522 ec->tail_index = i; 523 ec->best_corr = sum; 524 } 525 } 526 527 if (ec->tail_index != prev_index) { 528 unsigned duration; 529 int imin, iavg; 530 531 duration = ec->update_cnt * SEGMENT_PTIME; 532 imin = (int)(ec->min_factor[ec->tail_index] * 1000); 533 iavg = (int)(ec->avg_factor[ec->tail_index] * 1000); 534 535 PJ_LOG(4,(THIS_FILE, 536 "Echo suppressor updated at t=%03d.%03ds, echo tail=%d msec" 537 ", factor min/avg=%d.%03d/%d.%03d", 538 (duration/1000), (duration%1000), 539 (ec->tail_cnt-ec->tail_index) * SEGMENT_PTIME, 540 imin/1000, imin%1000, 541 iavg/1000, iavg%1000)); 542 543 } 544 545 ++ec->calc_cnt; 546 547 if (ec->calc_cnt > ec->max_calc) { 548 unsigned duration; 549 int imin, iavg; 550 551 552 ec->learning = PJ_FALSE; 553 ec->running_cnt = 0; 554 555 duration = ec->update_cnt * SEGMENT_PTIME; 556 imin = (int)(ec->min_factor[ec->tail_index] * 1000); 557 iavg = (int)(ec->avg_factor[ec->tail_index] * 1000); 558 559 PJ_LOG(4,(THIS_FILE, 560 "Echo suppressor learning done at t=%03d.%03ds, tail=%d ms" 561 ", factor min/avg=%d.%03d/%d.%03d", 562 (duration/1000), (duration%1000), 563 (ec->tail_cnt-ec->tail_index) * SEGMENT_PTIME, 564 imin/1000, imin%1000, 565 iavg/1000, iavg%1000)); 566 } 567 568 } 569 570 571 /* Amplify frame */ 572 static void amplify_frame(pj_int16_t *frm, unsigned length, 573 pj_ufloat_t factor) 574 { 575 unsigned i; 576 577 for (i=0; i<length; ++i) { 578 frm[i] = (pj_int16_t)pj_ufloat_mul_i(frm[i], factor); 579 } 580 } 581 582 /* 101 583 * Perform echo cancellation. 102 584 */ … … 107 589 void *reserved ) 108 590 { 591 unsigned i, N; 109 592 echo_supp *ec = (echo_supp*) state; 110 pj_bool_t silence;111 593 112 594 PJ_UNUSED_ARG(options); 113 595 PJ_UNUSED_ARG(reserved); 114 596 115 silence = pjmedia_silence_det_detect(ec->sd, play_frm, 116 ec->samples_per_frame, NULL); 117 118 if (!silence) { 119 #if defined(PJMEDIA_ECHO_SUPPRESS_FACTOR) && PJMEDIA_ECHO_SUPPRESS_FACTOR!=0 120 unsigned i; 121 for (i=0; i<ec->samples_per_frame; ++i) { 122 rec_frm[i] = (pj_int16_t)(rec_frm[i] >> 123 PJMEDIA_ECHO_SUPPRESS_FACTOR); 124 } 125 #else 126 pjmedia_zero_samples(rec_frm, ec->samples_per_frame); 127 #endif 597 /* Calculate number of segments. This should be okay even if 598 * samples_per_frame is not a multiply of samples_per_segment, since 599 * we only calculate level. 600 */ 601 N = ec->samples_per_frame / ec->samples_per_segment; 602 pj_assert(N>0); 603 for (i=0; i<N; ++i) { 604 unsigned pos = i * ec->samples_per_segment; 605 echo_supp_update(ec, rec_frm+pos, play_frm+pos); 606 } 607 608 if (ec->tail_index < 0) { 609 /* Not ready */ 610 } else { 611 unsigned lookup_cnt, rec_level=0, play_level=0; 612 unsigned tail_cnt; 613 float factor; 614 615 /* How many previous segments to lookup */ 616 lookup_cnt = SIGNAL_LOOKUP_MSEC / SEGMENT_PTIME; 617 if (lookup_cnt > ec->templ_cnt) 618 lookup_cnt = ec->templ_cnt; 619 620 /* Lookup in recording history to get maximum mic level, to see 621 * if local user is currently talking 622 */ 623 for (i=ec->templ_cnt - lookup_cnt; i < ec->templ_cnt; ++i) { 624 if (ec->rec_hist[i] > rec_level) 625 rec_level = ec->rec_hist[i]; 626 } 627 rec_level = pjmedia_linear2ulaw(rec_level) ^ 0xFF; 628 629 /* Calculate the detected tail length, in # of segments */ 630 tail_cnt = (ec->tail_cnt - ec->tail_index); 631 632 /* Lookup in playback history to get max speaker level, to see 633 * if remote user is currently talking 634 */ 635 for (i=ec->play_hist_cnt -lookup_cnt -tail_cnt; 636 i<ec->play_hist_cnt-tail_cnt; ++i) 637 { 638 if (ec->play_hist[i] > play_level) 639 play_level = ec->play_hist[i]; 640 } 641 play_level = pjmedia_linear2ulaw(play_level) ^ 0xFF; 642 643 if (rec_level >= MIN_SIGNAL_ULAW) { 644 if (play_level < MIN_SIGNAL_ULAW) { 645 /* Mic is talking, speaker is idle. Let mic signal pass as is. 646 */ 647 factor = 1.0; 648 echo_supp_set_state(ec, ST_LOCAL_TALK); 649 } else { 650 /* Seems that both are talking. Scale the mic signal 651 * down a little bit to reduce echo, while allowing both 652 * parties to talk at the same time. 653 */ 654 factor = (float)(ec->avg_factor[ec->tail_index] * 2); 655 echo_supp_set_state(ec, ST_DOUBLETALK); 656 } 657 } else { 658 if (play_level < MIN_SIGNAL_ULAW) { 659 /* Both mic and speaker seems to be idle. Also scale the 660 * mic signal down with average factor to reduce low power 661 * echo. 662 */ 663 factor = ec->avg_factor[ec->tail_index] * 3 / 2; 664 echo_supp_set_state(ec, ST_REM_SILENT); 665 } else { 666 /* Mic is idle, but there's something playing in speaker. 667 * Scale the mic down to minimum 668 */ 669 factor = ec->min_factor[ec->tail_index] / 2; 670 echo_supp_set_state(ec, ST_REM_TALK); 671 } 672 } 673 674 /* Smoothen the transition */ 675 if (factor > ec->last_factor) 676 factor = (factor + ec->last_factor) / 2; 677 else 678 factor = (factor + ec->last_factor*9) / 10; 679 680 /* Amplify frame */ 681 amplify_frame(rec_frm, ec->samples_per_frame, 682 pj_ufloat_from_float(factor)); 683 ec->last_factor = factor; 684 685 if (ec->talk_state == ST_REM_TALK) { 686 unsigned level, recalc_cnt; 687 688 /* Get the adjusted frame signal level */ 689 level = pjmedia_calc_avg_signal(rec_frm, ec->samples_per_frame); 690 level = pjmedia_linear2ulaw(level) ^ 0xFF; 691 692 /* Accumulate average echo residue to see the ES effectiveness */ 693 ec->residue = ((ec->residue * ec->running_cnt) + level) / 694 (ec->running_cnt + 1); 695 696 ++ec->running_cnt; 697 698 /* Check if we need to re-learn */ 699 recalc_cnt = CHECK_PERIOD * ec->clock_rate / ec->samples_per_frame; 700 if (ec->running_cnt > recalc_cnt) { 701 int iresidue; 702 703 iresidue = (int)(ec->residue*1000); 704 705 PJ_LOG(5,(THIS_FILE, "Echo suppressor residue = %d.%03d", 706 iresidue/1000, iresidue%1000)); 707 708 if (ec->residue > MAX_RESIDUE && !ec->learning) { 709 echo_supp_soft_reset(ec); 710 ec->residue = 0; 711 } else { 712 ec->running_cnt = 0; 713 ec->residue = 0; 714 } 715 } 716 } 128 717 } 129 718 -
pjproject/trunk/pjsip-apps/src/samples/aectest.c
r2198 r2199 57 57 " -d The delay between playback and capture in ms. Default is zero.\n" 58 58 " -l Set the echo tail length in ms. Default is 200 ms \n" 59 " -r Set repeat count (default=1) \n" 59 60 " -a Algorithm: 0=default, 1=speex, 3=echo suppress \n"; 60 61 … … 92 93 unsigned tail_ms = TAIL_LENGTH; 93 94 pj_timestamp t0, t1; 94 int c;95 int i, repeat=1, c; 95 96 96 97 pj_optind = 0; 97 while ((c=pj_getopt(argc, argv, "d:l:a: ")) !=-1) {98 while ((c=pj_getopt(argc, argv, "d:l:a:r:")) !=-1) { 98 99 switch (c) { 99 100 case 'd': … … 122 123 } 123 124 break; 125 case 'r': 126 repeat = atoi(pj_optarg); 127 if (repeat < 1) { 128 puts("Invalid algorithm"); 129 puts(desc); 130 return 1; 131 } 132 break; 124 133 } 125 134 } … … 211 220 rec_frame.buf = pj_pool_alloc(pool, wav_play->info.samples_per_frame<<1); 212 221 pj_get_timestamp(&t0); 213 for (;;) { 214 play_frame.size = wav_play->info.samples_per_frame << 1; 215 status = pjmedia_port_get_frame(wav_play, &play_frame); 216 if (status != PJ_SUCCESS) 217 break; 218 219 status = pjmedia_echo_playback(ec, (short*)play_frame.buf); 220 221 rec_frame.size = wav_play->info.samples_per_frame << 1; 222 status = pjmedia_port_get_frame(wav_rec, &rec_frame); 223 if (status != PJ_SUCCESS) 224 break; 225 226 status = pjmedia_echo_capture(ec, (short*)rec_frame.buf, 0); 227 228 //status = pjmedia_echo_cancel(ec, (short*)rec_frame.buf, 229 // (short*)play_frame.buf, 0, NULL); 230 231 pjmedia_port_put_frame(wav_out, &rec_frame); 222 for (i=0; i < repeat; ++i) { 223 for (;;) { 224 play_frame.size = wav_play->info.samples_per_frame << 1; 225 status = pjmedia_port_get_frame(wav_play, &play_frame); 226 if (status != PJ_SUCCESS) 227 break; 228 229 status = pjmedia_echo_playback(ec, (short*)play_frame.buf); 230 231 rec_frame.size = wav_play->info.samples_per_frame << 1; 232 status = pjmedia_port_get_frame(wav_rec, &rec_frame); 233 if (status != PJ_SUCCESS) 234 break; 235 236 status = pjmedia_echo_capture(ec, (short*)rec_frame.buf, 0); 237 238 //status = pjmedia_echo_cancel(ec, (short*)rec_frame.buf, 239 // (short*)play_frame.buf, 0, NULL); 240 241 pjmedia_port_put_frame(wav_out, &rec_frame); 242 } 243 244 pjmedia_wav_player_port_set_pos(wav_play, 0); 245 pjmedia_wav_player_port_set_pos(wav_rec, 0); 232 246 } 233 247 pj_get_timestamp(&t1); … … 257 271 /* Shutdown PJLIB */ 258 272 pj_shutdown(); 273 274 #if 0 275 { 276 char s[10]; 277 puts("ENTER to quit"); 278 fgets(s, sizeof(s), stdin); 279 } 280 #endif 259 281 260 282 /* Done. */
Note: See TracChangeset
for help on using the changeset viewer.