Changeset 2199 for pjproject


Ignore:
Timestamp:
Aug 10, 2008 4:15:14 PM (16 years ago)
Author:
bennylp
Message:

Ticket #590: new echo suppressor which should work much better than the old one

Location:
pjproject/trunk
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • pjproject/trunk/pjmedia/src/pjmedia/echo_suppress.c

    r2198 r2199  
    1818 */ 
    1919#include <pjmedia/types.h> 
     20#include <pjmedia/alaw_ulaw.h> 
    2021#include <pjmedia/errno.h> 
    2122#include <pjmedia/silencedet.h> 
     23#include <pj/array.h> 
    2224#include <pj/assert.h> 
    2325#include <pj/lock.h> 
     
    3032#define THIS_FILE                           "echo_suppress.c" 
    3133 
     34/* Maximum float constant */ 
     35#define MAX_FLOAT               (float)1.701411e38 
     36 
     37/* The effective learn duration (in seconds) before we declare that learning 
     38 * is complete. The actual learning duration itself may be longer depending 
     39 * on the conversation pattern (e.g. we can't detect echo if speaker is only 
     40 * playing silence). 
     41 */ 
     42#define MAX_CALC_DURATION_SEC   3 
     43 
     44/* The internal audio segment length, in milliseconds. 10ms shold be good 
     45 * and no need to change it. 
     46 */ 
     47#define SEGMENT_PTIME           10 
     48 
     49/* The length of the template signal in milliseconds. The longer the template, 
     50 * the better correlation will be found, at the expense of more processing 
     51 * and longer learning time. 
     52 */ 
     53#define TEMPLATE_PTIME          200 
     54 
     55/* How long to look back in the past to see if either mic or speaker is 
     56 * active. 
     57 */ 
     58#define SIGNAL_LOOKUP_MSEC      200 
     59 
     60/* The minimum level value to be considered as talking, in uLaw complement 
     61 * (0-255). 
     62 */ 
     63#define MIN_SIGNAL_ULAW         35 
     64 
     65/* The period (in seconds) on which the ES will analize it's effectiveness, 
     66 * and it may trigger soft-reset to force recalculation. 
     67 */ 
     68#define CHECK_PERIOD            30 
     69 
     70/* Maximum signal level of average echo residue (in uLaw complement). When 
     71 * the residue value exceeds this value, we force the ES to re-learn. 
     72 */ 
     73#define MAX_RESIDUE             2.5 
     74 
     75 
     76#if 0 
     77#   define TRACE_(expr) PJ_LOG(5,expr) 
     78#else 
     79#   define TRACE_(expr) 
     80#endif 
     81 
     82PJ_INLINE(float) fabs(float val) 
     83{ 
     84    if (val < 0) 
     85        return -val; 
     86    else 
     87        return val; 
     88} 
     89 
     90 
     91#if defined(PJ_HAS_FLOATING_POINT) && PJ_HAS_FLOATING_POINT!=0 
     92    typedef float pj_ufloat_t; 
     93#   define pj_ufloat_from_float(f)      (f) 
     94#   define pj_ufloat_mul_u(val1, f)     ((val1) * (f)) 
     95#   define pj_ufloat_mul_i(val1, f)     ((val1) * (f)) 
     96#else 
     97    typedef pj_uint32_t pj_ufloat_t; 
     98 
     99    pj_ufloat_t pj_ufloat_from_float(float f) 
     100    { 
     101        return (pj_ufloat_t)(f * 65536); 
     102    } 
     103 
     104    unsigned pj_ufloat_mul_u(unsigned val1, pj_ufloat_t val2) 
     105    { 
     106        return (val1 * val2) >> 16; 
     107    } 
     108 
     109    int pj_ufloat_mul_i(int val1, pj_ufloat_t val2) 
     110    { 
     111        return (val1 * (pj_int32_t)val2) >> 16; 
     112    } 
     113#endif 
     114 
     115 
     116/* Conversation state */ 
     117typedef enum talk_state 
     118{ 
     119    ST_NULL, 
     120    ST_LOCAL_TALK, 
     121    ST_REM_SILENT, 
     122    ST_DOUBLETALK, 
     123    ST_REM_TALK 
     124} talk_state; 
     125 
     126const char *state_names[] =  
     127{ 
     128    "Null", 
     129    "local talking", 
     130    "remote silent", 
     131    "doubletalk", 
     132    "remote talking" 
     133}; 
     134 
     135 
     136/* Description: 
     137 
     138   The echo suppressor tries to find the position of echoed signal by looking 
     139   at the correlation between signal played to the speaker (played signal)  
     140   and the signal captured from the microphone (recorded signal). 
     141 
     142   To do this, it first divides the frames (from mic and speaker) into  
     143   segments, calculate the audio level of the segment, and save the level 
     144   information in the playback and record history (play_hist and rec_hist 
     145   respectively). 
     146 
     147   In the history, the newest element (depicted as "t0" in the diagram belo) 
     148   is put in the last position of the array. 
     149 
     150   The record history size is as large as the template size (tmpl_cnt), since 
     151   we will use the record history as the template to find the best matching  
     152   position in the playback history. 
     153 
     154   Here is the record history buffer: 
     155 
     156       <--templ_cnt--> 
     157       +-------------+ 
     158       |   rec_hist  | 
     159       +-------------+ 
     160    t-templ_cnt......t0 
     161 
     162   As you can see, the newest frame ("t0") is put as the last element in the 
     163   array. 
     164 
     165   The playback history size is larger than record history, since we need to 
     166   find the matching pattern in the past. The playback history size is 
     167   "templ_cnt + tail_cnt", where "tail_cnt" is the number of segments equal 
     168   to the maximum tail length. The maximum tail length is set when the ES 
     169   is created. 
     170 
     171   Here is the playback history buffer: 
     172 
     173       <-----tail_cnt-----> <--templ_cnt--> 
     174       +-------------------+--------------+ 
     175       |             play_hist            | 
     176       +-------------------+--------------+ 
     177   t-play_hist_cnt...t-templ_cnt.......t0 
     178 
     179 
     180 
     181   Learning: 
     182 
     183   During the processing, the ES calculates the following values: 
     184    - the correlation value, that is how similar the playback signal compared 
     185      to the mic signal. The lower the correlation value the better (i.e. more 
     186      similar) the signal is. The correlation value is done over the template 
     187      duration. 
     188    - the gain scaling factor, that is the ratio between mic signal and  
     189      speaker signal. The ES calculates both the minimum and average ratios. 
     190 
     191   The ES calculates both the values above for every tail position in the 
     192   playback history. The values are saved in arrays below: 
     193 
     194     <-----tail_cnt-----> 
     195     +-------------------+ 
     196     |      corr_sum     | 
     197     +-------------------+ 
     198     |     min_factor    | 
     199     +-------------------+ 
     200     |     avg_factor    | 
     201     +-------------------+ 
     202 
     203   At the end of processing, the ES iterates through the correlation array and 
     204   picks the tail index with the lowest corr_sum value. This is the position 
     205   where echo is most likely to be found. 
     206 
     207 
     208   Processing: 
     209 
     210   Once learning is done, the ES will change the level of the mic signal  
     211   depending on the state of the conversation and according to the ratio that 
     212   has been found in the learning phase above. 
     213 
     214 */ 
    32215 
    33216/* 
    34  * Simple echo suppresor 
     217 * The simple echo suppresor state 
    35218 */ 
    36219typedef struct echo_supp 
    37220{ 
    38     pjmedia_silence_det *sd; 
    39     unsigned             samples_per_frame; 
    40     unsigned             tail_ms; 
     221    unsigned     clock_rate;        /* Clock rate.                          */ 
     222    pj_uint16_t  samples_per_frame; /* Frame length in samples              */ 
     223    pj_uint16_t  samples_per_segment;/* Segment length in samples           */ 
     224    pj_uint16_t  tail_ms;           /* Tail length in milliseconds          */ 
     225    pj_uint16_t  tail_samples;      /* Tail length in samples.              */ 
     226 
     227    pj_bool_t    learning;          /* Are we still learning yet?           */ 
     228    talk_state   talk_state;        /* Current talking state                */ 
     229    int          tail_index;        /* Echo location, -1 if not found       */ 
     230 
     231    unsigned     max_calc;          /* # of calc before learning complete. 
     232                                       (see MAX_CALC_DURATION_SEC)          */ 
     233    unsigned     calc_cnt;          /* Number of calculations so far        */ 
     234 
     235    unsigned     update_cnt;        /* # of updates                         */ 
     236    unsigned     templ_cnt;         /* Template length, in # of segments    */ 
     237    unsigned     tail_cnt;          /* Tail length, in # of segments        */ 
     238    unsigned     play_hist_cnt;     /* # of segments in play_hist           */ 
     239    pj_uint16_t *play_hist;         /* Array of playback levels             */ 
     240    pj_uint16_t *rec_hist;          /* Array of rec levels                  */ 
     241 
     242    float       *corr_sum;          /* Array of corr for each tail pos.     */ 
     243    float       *tmp_corr;          /* Temporary corr array calculation     */ 
     244    float        best_corr;         /* Best correlation so far.             */ 
     245 
     246    float       *min_factor;        /* Array of minimum scaling factor      */ 
     247    float       *avg_factor;        /* Array of average scaling factor      */ 
     248    float       *tmp_factor;        /* Array to store provisional result    */ 
     249 
     250    unsigned     running_cnt;       /* Running duration in # of frames      */ 
     251    float        residue;           /* Accummulated echo residue.           */ 
     252    float        last_factor;       /* Last factor applied to mic signal    */ 
    41253} echo_supp; 
    42254 
     
    55267{ 
    56268    echo_supp *ec; 
    57     pj_status_t status; 
    58  
    59     PJ_UNUSED_ARG(clock_rate); 
     269 
    60270    PJ_UNUSED_ARG(channel_count); 
    61271    PJ_UNUSED_ARG(options); 
    62272 
     273    PJ_ASSERT_RETURN(samples_per_frame >= SEGMENT_PTIME * clock_rate / 1000, 
     274                     PJ_ENOTSUP); 
     275 
    63276    ec = PJ_POOL_ZALLOC_T(pool, struct echo_supp); 
    64     ec->samples_per_frame = samples_per_frame; 
    65     ec->tail_ms = tail_ms; 
    66  
    67     status = pjmedia_silence_det_create(pool, clock_rate, samples_per_frame, 
    68                                         &ec->sd); 
    69     if (status != PJ_SUCCESS) 
    70         return status; 
    71  
    72     pjmedia_silence_det_set_name(ec->sd, "ecsu%p"); 
    73     pjmedia_silence_det_set_adaptive(ec->sd, PJMEDIA_ECHO_SUPPRESS_THRESHOLD); 
    74     pjmedia_silence_det_set_params(ec->sd, 100, 500, 3000); 
     277    ec->clock_rate = clock_rate; 
     278    ec->samples_per_frame = (pj_uint16_t)samples_per_frame; 
     279    ec->samples_per_segment = (pj_uint16_t)(SEGMENT_PTIME * clock_rate / 1000); 
     280    ec->tail_ms = (pj_uint16_t)tail_ms; 
     281    ec->tail_samples = (pj_uint16_t)(tail_ms * clock_rate / 1000); 
     282 
     283    ec->templ_cnt = TEMPLATE_PTIME / SEGMENT_PTIME; 
     284    ec->tail_cnt = (pj_uint16_t)(tail_ms / SEGMENT_PTIME); 
     285    ec->play_hist_cnt = (pj_uint16_t)(ec->tail_cnt+ec->templ_cnt); 
     286 
     287    ec->max_calc = (pj_uint16_t)(MAX_CALC_DURATION_SEC * clock_rate /  
     288                                 ec->samples_per_segment); 
     289 
     290    ec->rec_hist = (pj_uint16_t*)  
     291                    pj_pool_alloc(pool, ec->templ_cnt * 
     292                                        sizeof(ec->rec_hist[0])); 
     293 
     294    /* Note: play history has twice number of elements */ 
     295    ec->play_hist = (pj_uint16_t*)  
     296                     pj_pool_alloc(pool, ec->play_hist_cnt * 
     297                                         sizeof(ec->play_hist[0])); 
     298 
     299    ec->corr_sum = (float*) 
     300                   pj_pool_alloc(pool, ec->tail_cnt *  
     301                                       sizeof(ec->corr_sum[0])); 
     302    ec->tmp_corr = (float*) 
     303                   pj_pool_alloc(pool, ec->tail_cnt *  
     304                                       sizeof(ec->tmp_corr[0])); 
     305    ec->min_factor = (float*) 
     306                     pj_pool_alloc(pool, ec->tail_cnt *  
     307                                         sizeof(ec->min_factor[0])); 
     308    ec->avg_factor = (float*) 
     309                     pj_pool_alloc(pool, ec->tail_cnt *  
     310                                         sizeof(ec->avg_factor[0])); 
     311    ec->tmp_factor = (float*) 
     312                     pj_pool_alloc(pool, ec->tail_cnt *  
     313                                         sizeof(ec->tmp_factor[0])); 
     314    echo_supp_reset(ec); 
    75315 
    76316    *p_state = ec; 
     
    90330 
    91331/* 
    92  * Reset 
     332 * Hard reset 
    93333 */ 
    94334PJ_DEF(void) echo_supp_reset(void *state) 
    95335{ 
    96     PJ_UNUSED_ARG(state); 
    97     return; 
     336    unsigned i; 
     337    echo_supp *ec = (echo_supp*) state; 
     338 
     339    pj_bzero(ec->rec_hist, ec->templ_cnt * sizeof(ec->rec_hist[0])); 
     340    pj_bzero(ec->play_hist, ec->play_hist_cnt * sizeof(ec->play_hist[0])); 
     341 
     342    for (i=0; i<ec->tail_cnt; ++i) { 
     343        ec->corr_sum[i] = ec->avg_factor[i] = 0; 
     344        ec->min_factor[i] = MAX_FLOAT; 
     345    } 
     346 
     347    ec->update_cnt = 0; 
     348    ec->calc_cnt = 0; 
     349    ec->learning = PJ_TRUE; 
     350    ec->tail_index = -1; 
     351    ec->best_corr = MAX_FLOAT; 
     352    ec->talk_state = ST_NULL; 
     353    ec->last_factor = 1.0; 
     354    ec->residue = 0; 
     355    ec->running_cnt = 0; 
    98356} 
    99357 
    100358/* 
     359 * Soft reset to force the EC to re-learn without having to discard all 
     360 * rec and playback history. 
     361 */ 
     362PJ_DEF(void) echo_supp_soft_reset(void *state) 
     363{ 
     364    unsigned i; 
     365 
     366    echo_supp *ec = (echo_supp*) state; 
     367 
     368    for (i=0; i<ec->tail_cnt; ++i) { 
     369        ec->corr_sum[i] = 0; 
     370    } 
     371 
     372    ec->update_cnt = 0; 
     373    ec->calc_cnt = 0; 
     374    ec->learning = PJ_TRUE; 
     375    ec->best_corr = MAX_FLOAT; 
     376    ec->residue = 0; 
     377    ec->running_cnt = 0; 
     378 
     379    PJ_LOG(4,(THIS_FILE, "Echo suppressor soft reset. Re-learning..")); 
     380} 
     381 
     382 
     383/* Set state */ 
     384static void echo_supp_set_state(echo_supp *ec, enum talk_state state) 
     385{ 
     386    if (state != ec->talk_state) { 
     387        TRACE_((THIS_FILE, "[%03d.%03d] %s --> %s",  
     388                           (ec->update_cnt * SEGMENT_PTIME / 1000),  
     389                           ((ec->update_cnt * SEGMENT_PTIME) % 1000), 
     390                           state_names[ec->talk_state], 
     391                           state_names[state])); 
     392        ec->talk_state = state; 
     393    } 
     394} 
     395 
     396/* 
     397 * Update EC state 
     398 */ 
     399static void echo_supp_update(echo_supp *ec, pj_int16_t *rec_frm, 
     400                             const pj_int16_t *play_frm) 
     401{ 
     402    int prev_index; 
     403    unsigned i, frm_level, sum_rec_level; 
     404    float rec_corr; 
     405 
     406    ++ec->update_cnt; 
     407    if (ec->update_cnt > 0x7FFFFFFF) 
     408        ec->update_cnt = 0x7FFFFFFF; /* Detect overflow */ 
     409 
     410    /* Calculate current play frame level */ 
     411    frm_level = pjmedia_calc_avg_signal(play_frm, ec->samples_per_segment); 
     412    ++frm_level; /* to avoid division by zero */ 
     413 
     414    /* Push current frame level to the back of the play history */ 
     415    pj_array_erase(ec->play_hist, sizeof(pj_uint16_t), ec->play_hist_cnt, 0); 
     416    ec->play_hist[ec->play_hist_cnt-1] = (pj_uint16_t) frm_level; 
     417 
     418    /* Calculate level of current mic frame */ 
     419    frm_level = pjmedia_calc_avg_signal(rec_frm, ec->samples_per_segment); 
     420    ++frm_level; /* to avoid division by zero */ 
     421 
     422    /* Push to the back of the rec history */ 
     423    pj_array_erase(ec->rec_hist, sizeof(pj_uint16_t), ec->templ_cnt, 0); 
     424    ec->rec_hist[ec->templ_cnt-1] = (pj_uint16_t) frm_level; 
     425 
     426 
     427    /* Can't do the calc until the play history is full. */ 
     428    if (ec->update_cnt < ec->play_hist_cnt) 
     429        return; 
     430 
     431    /* Skip if learning is done */ 
     432    if (!ec->learning) 
     433        return; 
     434 
     435 
     436    /* Calculate rec signal pattern */ 
     437    rec_corr = 0; 
     438    sum_rec_level = 0; 
     439    for (i=0; i < ec->templ_cnt-1; ++i) { 
     440        float corr; 
     441        corr = (float)ec->rec_hist[i+1] / ec->rec_hist[i]; 
     442        rec_corr += corr; 
     443        sum_rec_level += ec->rec_hist[i]; 
     444    } 
     445    sum_rec_level += ec->rec_hist[i]; 
     446 
     447    /* Iterate through the play history and calculate the signal correlation 
     448     * for every tail position in the play_hist. Save the result in temporary 
     449     * array since we may bail out early if the conversation state is not good 
     450     * to detect echo. 
     451     */ 
     452    for (i=0; i < ec->tail_cnt; ++i) { 
     453        unsigned j, end, sum_play_level, ulaw; 
     454        float play_corr = 0, corr_diff; 
     455 
     456        sum_play_level = 0; 
     457        for (j=i, end=i+ec->templ_cnt-1; j<end; ++j) { 
     458            float corr; 
     459            corr = (float)ec->play_hist[j+1] / ec->play_hist[j]; 
     460            play_corr += corr; 
     461            sum_play_level += ec->play_hist[j]; 
     462        } 
     463        sum_play_level += ec->play_hist[j]; 
     464 
     465        /* Bail out if remote isn't talking */ 
     466        ulaw = pjmedia_linear2ulaw(sum_play_level/ec->templ_cnt) ^ 0xFF; 
     467        if (ulaw < MIN_SIGNAL_ULAW) { 
     468            echo_supp_set_state(ec, ST_REM_SILENT); 
     469            return; 
     470        } 
     471 
     472        /* Bail out if local user is talking */ 
     473        if (sum_rec_level >= sum_play_level) { 
     474            echo_supp_set_state(ec, ST_LOCAL_TALK); 
     475            return; 
     476        } 
     477 
     478        /* Also bail out if we suspect there's a doubletalk */ 
     479        ulaw = pjmedia_linear2ulaw(sum_rec_level/ec->templ_cnt) ^ 0xFF; 
     480        if (ulaw > MIN_SIGNAL_ULAW) { 
     481            echo_supp_set_state(ec, ST_DOUBLETALK); 
     482            return; 
     483        } 
     484 
     485        /* Calculate correlation and save to temporary array */ 
     486        corr_diff = fabs(play_corr - rec_corr); 
     487        ec->tmp_corr[i] = corr_diff; 
     488 
     489        /* Also calculate the gain factor between mic and speaker level */ 
     490        ec->tmp_factor[i] = (float)sum_rec_level / sum_play_level; 
     491        pj_assert(ec->tmp_factor[i] < 1); 
     492    } 
     493 
     494    /* We seem to have good signal, we can update the EC state */ 
     495    echo_supp_set_state(ec, ST_REM_TALK); 
     496 
     497    /* Accummulate the correlation value to the history and at the same 
     498     * time find the tail index of the best correlation. 
     499     */ 
     500    prev_index = ec->tail_index; 
     501    for (i=1; i<ec->tail_cnt-1; ++i) { 
     502        float *p = &ec->corr_sum[i], sum; 
     503 
     504        /* Accummulate correlation value  for this tail position */ 
     505        ec->corr_sum[i] += ec->tmp_corr[i]; 
     506 
     507        /* Update the min and avg gain factor for this tail position */ 
     508        if (ec->tmp_factor[i] < ec->min_factor[i]) 
     509            ec->min_factor[i] = ec->tmp_factor[i]; 
     510        ec->avg_factor[i] = ((ec->avg_factor[i] * ec->tail_cnt) +  
     511                                    ec->tmp_factor[i]) / 
     512                            (ec->tail_cnt + 1); 
     513 
     514        /* To get the best correlation, also include the correlation 
     515         * value of the neighbouring tail locations. 
     516         */ 
     517        sum = *(p-1) + (*p)*2 + *(p+1); 
     518        //sum = *p; 
     519 
     520        /* See if we have better correlation value */ 
     521        if (sum < ec->best_corr) { 
     522            ec->tail_index = i; 
     523            ec->best_corr = sum; 
     524        } 
     525    } 
     526 
     527    if (ec->tail_index != prev_index) { 
     528        unsigned duration; 
     529        int imin, iavg; 
     530 
     531        duration = ec->update_cnt * SEGMENT_PTIME; 
     532        imin = (int)(ec->min_factor[ec->tail_index] * 1000); 
     533        iavg = (int)(ec->avg_factor[ec->tail_index] * 1000); 
     534 
     535        PJ_LOG(4,(THIS_FILE,  
     536                  "Echo suppressor updated at t=%03d.%03ds, echo tail=%d msec" 
     537                  ", factor min/avg=%d.%03d/%d.%03d", 
     538                  (duration/1000), (duration%1000), 
     539                  (ec->tail_cnt-ec->tail_index) * SEGMENT_PTIME, 
     540                  imin/1000, imin%1000, 
     541                  iavg/1000, iavg%1000)); 
     542 
     543    } 
     544 
     545    ++ec->calc_cnt; 
     546 
     547    if (ec->calc_cnt > ec->max_calc) { 
     548        unsigned duration; 
     549        int imin, iavg; 
     550 
     551 
     552        ec->learning = PJ_FALSE; 
     553        ec->running_cnt = 0; 
     554 
     555        duration = ec->update_cnt * SEGMENT_PTIME; 
     556        imin = (int)(ec->min_factor[ec->tail_index] * 1000); 
     557        iavg = (int)(ec->avg_factor[ec->tail_index] * 1000); 
     558 
     559        PJ_LOG(4,(THIS_FILE,  
     560                  "Echo suppressor learning done at t=%03d.%03ds, tail=%d ms" 
     561                  ", factor min/avg=%d.%03d/%d.%03d", 
     562                  (duration/1000), (duration%1000), 
     563                  (ec->tail_cnt-ec->tail_index) * SEGMENT_PTIME, 
     564                  imin/1000, imin%1000, 
     565                  iavg/1000, iavg%1000)); 
     566    } 
     567 
     568} 
     569 
     570 
     571/* Amplify frame */ 
     572static void amplify_frame(pj_int16_t *frm, unsigned length,  
     573                          pj_ufloat_t factor) 
     574{ 
     575    unsigned i; 
     576 
     577    for (i=0; i<length; ++i) { 
     578        frm[i] = (pj_int16_t)pj_ufloat_mul_i(frm[i], factor); 
     579    } 
     580} 
     581 
     582/*  
    101583 * Perform echo cancellation. 
    102584 */ 
     
    107589                                           void *reserved ) 
    108590{ 
     591    unsigned i, N; 
    109592    echo_supp *ec = (echo_supp*) state; 
    110     pj_bool_t silence; 
    111593 
    112594    PJ_UNUSED_ARG(options); 
    113595    PJ_UNUSED_ARG(reserved); 
    114596 
    115     silence = pjmedia_silence_det_detect(ec->sd, play_frm,  
    116                                          ec->samples_per_frame, NULL); 
    117  
    118     if (!silence) { 
    119 #if defined(PJMEDIA_ECHO_SUPPRESS_FACTOR) && PJMEDIA_ECHO_SUPPRESS_FACTOR!=0 
    120         unsigned i; 
    121         for (i=0; i<ec->samples_per_frame; ++i) { 
    122             rec_frm[i] = (pj_int16_t)(rec_frm[i] >>  
    123                                       PJMEDIA_ECHO_SUPPRESS_FACTOR); 
    124         } 
    125 #else 
    126         pjmedia_zero_samples(rec_frm, ec->samples_per_frame); 
    127 #endif 
     597    /* Calculate number of segments. This should be okay even if 
     598     * samples_per_frame is not a multiply of samples_per_segment, since 
     599     * we only calculate level. 
     600     */ 
     601    N = ec->samples_per_frame / ec->samples_per_segment; 
     602    pj_assert(N>0); 
     603    for (i=0; i<N; ++i) { 
     604        unsigned pos = i * ec->samples_per_segment; 
     605        echo_supp_update(ec, rec_frm+pos, play_frm+pos); 
     606    } 
     607 
     608    if (ec->tail_index < 0) { 
     609        /* Not ready */ 
     610    } else { 
     611        unsigned lookup_cnt, rec_level=0, play_level=0; 
     612        unsigned tail_cnt; 
     613        float factor; 
     614 
     615        /* How many previous segments to lookup */ 
     616        lookup_cnt = SIGNAL_LOOKUP_MSEC / SEGMENT_PTIME; 
     617        if (lookup_cnt > ec->templ_cnt) 
     618            lookup_cnt = ec->templ_cnt; 
     619 
     620        /* Lookup in recording history to get maximum mic level, to see 
     621         * if local user is currently talking 
     622         */ 
     623        for (i=ec->templ_cnt - lookup_cnt; i < ec->templ_cnt; ++i) { 
     624            if (ec->rec_hist[i] > rec_level) 
     625                rec_level = ec->rec_hist[i]; 
     626        } 
     627        rec_level = pjmedia_linear2ulaw(rec_level) ^ 0xFF; 
     628 
     629        /* Calculate the detected tail length, in # of segments */ 
     630        tail_cnt = (ec->tail_cnt - ec->tail_index); 
     631 
     632        /* Lookup in playback history to get max speaker level, to see 
     633         * if remote user is currently talking 
     634         */ 
     635        for (i=ec->play_hist_cnt -lookup_cnt -tail_cnt;  
     636             i<ec->play_hist_cnt-tail_cnt; ++i)  
     637        { 
     638            if (ec->play_hist[i] > play_level) 
     639                play_level = ec->play_hist[i]; 
     640        } 
     641        play_level = pjmedia_linear2ulaw(play_level) ^ 0xFF; 
     642 
     643        if (rec_level >= MIN_SIGNAL_ULAW) { 
     644            if (play_level < MIN_SIGNAL_ULAW) { 
     645                /* Mic is talking, speaker is idle. Let mic signal pass as is. 
     646                 */ 
     647                factor = 1.0; 
     648                echo_supp_set_state(ec, ST_LOCAL_TALK); 
     649            } else { 
     650                /* Seems that both are talking. Scale the mic signal 
     651                 * down a little bit to reduce echo, while allowing both 
     652                 * parties to talk at the same time. 
     653                 */ 
     654                factor = (float)(ec->avg_factor[ec->tail_index] * 2); 
     655                echo_supp_set_state(ec, ST_DOUBLETALK); 
     656            } 
     657        } else { 
     658            if (play_level < MIN_SIGNAL_ULAW) { 
     659                /* Both mic and speaker seems to be idle. Also scale the 
     660                 * mic signal down with average factor to reduce low power 
     661                 * echo. 
     662                 */ 
     663                factor = ec->avg_factor[ec->tail_index] * 3 / 2; 
     664                echo_supp_set_state(ec, ST_REM_SILENT); 
     665            } else { 
     666                /* Mic is idle, but there's something playing in speaker. 
     667                 * Scale the mic down to minimum 
     668                 */ 
     669                factor = ec->min_factor[ec->tail_index] / 2; 
     670                echo_supp_set_state(ec, ST_REM_TALK); 
     671            } 
     672        } 
     673 
     674        /* Smoothen the transition */ 
     675        if (factor > ec->last_factor) 
     676            factor = (factor + ec->last_factor) / 2; 
     677        else 
     678            factor = (factor + ec->last_factor*9) / 10; 
     679 
     680        /* Amplify frame */ 
     681        amplify_frame(rec_frm, ec->samples_per_frame,  
     682                      pj_ufloat_from_float(factor)); 
     683        ec->last_factor = factor; 
     684 
     685        if (ec->talk_state == ST_REM_TALK) { 
     686            unsigned level, recalc_cnt; 
     687 
     688            /* Get the adjusted frame signal level */ 
     689            level = pjmedia_calc_avg_signal(rec_frm, ec->samples_per_frame); 
     690            level = pjmedia_linear2ulaw(level) ^ 0xFF; 
     691 
     692            /* Accumulate average echo residue to see the ES effectiveness */ 
     693            ec->residue = ((ec->residue * ec->running_cnt) + level) /  
     694                          (ec->running_cnt + 1); 
     695 
     696            ++ec->running_cnt; 
     697 
     698            /* Check if we need to re-learn */ 
     699            recalc_cnt = CHECK_PERIOD * ec->clock_rate / ec->samples_per_frame; 
     700            if (ec->running_cnt > recalc_cnt) { 
     701                int iresidue; 
     702 
     703                iresidue = (int)(ec->residue*1000); 
     704 
     705                PJ_LOG(5,(THIS_FILE, "Echo suppressor residue = %d.%03d", 
     706                          iresidue/1000, iresidue%1000)); 
     707 
     708                if (ec->residue > MAX_RESIDUE && !ec->learning) { 
     709                    echo_supp_soft_reset(ec); 
     710                    ec->residue = 0; 
     711                } else { 
     712                    ec->running_cnt = 0; 
     713                    ec->residue = 0; 
     714                } 
     715            } 
     716        } 
    128717    } 
    129718 
  • pjproject/trunk/pjsip-apps/src/samples/aectest.c

    r2198 r2199  
    5757"  -d  The delay between playback and capture in ms. Default is zero.\n" 
    5858"  -l  Set the echo tail length in ms. Default is 200 ms            \n" 
     59"  -r  Set repeat count (default=1)                                 \n" 
    5960"  -a  Algorithm: 0=default, 1=speex, 3=echo suppress               \n"; 
    6061 
     
    9293    unsigned tail_ms = TAIL_LENGTH; 
    9394    pj_timestamp t0, t1; 
    94     int c; 
     95    int i, repeat=1, c; 
    9596 
    9697    pj_optind = 0; 
    97     while ((c=pj_getopt(argc, argv, "d:l:a:")) !=-1) { 
     98    while ((c=pj_getopt(argc, argv, "d:l:a:r:")) !=-1) { 
    9899        switch (c) { 
    99100        case 'd': 
     
    122123            } 
    123124            break; 
     125        case 'r': 
     126            repeat = atoi(pj_optarg); 
     127            if (repeat < 1) { 
     128                puts("Invalid algorithm"); 
     129                puts(desc); 
     130                return 1; 
     131            } 
     132            break; 
    124133        } 
    125134    } 
     
    211220    rec_frame.buf = pj_pool_alloc(pool, wav_play->info.samples_per_frame<<1); 
    212221    pj_get_timestamp(&t0); 
    213     for (;;) { 
    214         play_frame.size = wav_play->info.samples_per_frame << 1; 
    215         status = pjmedia_port_get_frame(wav_play, &play_frame); 
    216         if (status != PJ_SUCCESS) 
    217             break; 
    218  
    219         status = pjmedia_echo_playback(ec, (short*)play_frame.buf); 
    220  
    221         rec_frame.size = wav_play->info.samples_per_frame << 1; 
    222         status = pjmedia_port_get_frame(wav_rec, &rec_frame); 
    223         if (status != PJ_SUCCESS) 
    224             break; 
    225  
    226         status = pjmedia_echo_capture(ec, (short*)rec_frame.buf, 0); 
    227  
    228         //status = pjmedia_echo_cancel(ec, (short*)rec_frame.buf,  
    229         //                           (short*)play_frame.buf, 0, NULL); 
    230  
    231         pjmedia_port_put_frame(wav_out, &rec_frame); 
     222    for (i=0; i < repeat; ++i) { 
     223        for (;;) { 
     224            play_frame.size = wav_play->info.samples_per_frame << 1; 
     225            status = pjmedia_port_get_frame(wav_play, &play_frame); 
     226            if (status != PJ_SUCCESS) 
     227                break; 
     228 
     229            status = pjmedia_echo_playback(ec, (short*)play_frame.buf); 
     230 
     231            rec_frame.size = wav_play->info.samples_per_frame << 1; 
     232            status = pjmedia_port_get_frame(wav_rec, &rec_frame); 
     233            if (status != PJ_SUCCESS) 
     234                break; 
     235 
     236            status = pjmedia_echo_capture(ec, (short*)rec_frame.buf, 0); 
     237 
     238            //status = pjmedia_echo_cancel(ec, (short*)rec_frame.buf,  
     239            //                       (short*)play_frame.buf, 0, NULL); 
     240 
     241            pjmedia_port_put_frame(wav_out, &rec_frame); 
     242        } 
     243 
     244        pjmedia_wav_player_port_set_pos(wav_play, 0); 
     245        pjmedia_wav_player_port_set_pos(wav_rec, 0); 
    232246    } 
    233247    pj_get_timestamp(&t1); 
     
    257271    /* Shutdown PJLIB */ 
    258272    pj_shutdown(); 
     273 
     274#if 0 
     275    { 
     276        char s[10]; 
     277        puts("ENTER to quit"); 
     278        fgets(s, sizeof(s), stdin); 
     279    } 
     280#endif 
    259281 
    260282    /* Done. */ 
Note: See TracChangeset for help on using the changeset viewer.