Changeset 2222 for pjproject/trunk/pjmedia/src/pjmedia/silencedet.c
- Timestamp:
- Aug 19, 2008 11:04:32 AM (16 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
pjproject/trunk/pjmedia/src/pjmedia/silencedet.c
r2215 r2222 1 1 /* $Id$ */ 2 2 /* 3 * Copyright (C) 2003-200 8Benny Prijono <benny@prijono.org>3 * Copyright (C) 2003-2007 Benny Prijono <benny@prijono.org> 4 4 * 5 5 * This program is free software; you can redistribute it and/or modify … … 24 24 #include <pj/pool.h> 25 25 26 27 26 #define THIS_FILE "silencedet.c" 28 27 28 #if 0 29 # define TRACE_(x) PJ_LOG(3,x) 30 #else 31 # define TRACE_(x) 32 #endif 33 34 /** 35 * This enumeration specifies operation mode of silence detector 36 */ 29 37 typedef enum pjmedia_silence_det_mode { 30 38 VAD_MODE_NONE, … … 33 41 } pjmedia_silence_det_mode; 34 42 35 43 /** 44 * Default settings 45 */ 46 #define DEF_RECALC_ON_VOICED 4000 /* Time to recalculate threshold 47 in voiced condition, in ms */ 48 #define DEF_RECALC_ON_SILENCE 2000 /* Time to recalculate threshold 49 in silence condition, in ms. */ 50 #define DEF_BEFORE_SILENCE 400 /* Silence time before really changing 51 state into SILENCE, in ms. */ 52 #define DEF_THRESHOLD 1000 /* Default threshold. */ 53 54 /** 55 * This enumeration specifies the states of the silence detector. 56 */ 57 enum pjmedia_silence_det_state { 58 STATE_SILENCE, 59 STATE_START_SILENCE, 60 STATE_VOICED 61 }; 36 62 37 63 /** … … 42 68 char objname[PJ_MAX_OBJ_NAME]; /**< VAD name. */ 43 69 44 int mode; /**< VAD mode. */ 45 unsigned ptime; /**< Frame time, in msec. */ 46 47 unsigned min_signal_cnt; /**< # of signal frames.before talk burst */ 48 unsigned min_silence_cnt; /**< # of silence frames before silence. */ 49 unsigned recalc_cnt; /**< # of frames before adaptive recalc. */ 50 51 pj_bool_t in_talk; /**< In talk burst? */ 52 unsigned cur_cnt; /**< # of frames in current mode. */ 53 unsigned signal_cnt; /**< # of signal frames received. */ 54 unsigned silence_cnt; /**< # of silence frames received */ 55 unsigned cur_threshold; /**< Current silence threshold. */ 56 unsigned weakest_signal; /**< Weakest signal detected. */ 57 unsigned loudest_silence; /**< Loudest silence detected. */ 70 int mode; /**< VAD mode. */ 71 unsigned ptime; /**< Frame time, in msec. */ 72 73 unsigned threshold; /**< Current threshold level. */ 74 unsigned sum_level; /**< Total sum of recent level. */ 75 unsigned sum_cnt; /**< Number of level summed. */ 76 unsigned silence_timer; /**< Silence condition timer. */ 77 unsigned voiced_timer; /**< Voiced condition timer. */ 78 79 enum pjmedia_silence_det_state state;/**< Silence detector state. */ 80 unsigned recalc_on_voiced; /**< Setting of time to recalc 81 threshold in voiced condition. */ 82 unsigned recalc_on_silence; /**< Setting of time to recalc 83 threshold in silence condition.*/ 84 unsigned before_silence; /**< Setting of silence time before 85 really changing state into SILENCE, 86 in ms. */ 58 87 }; 59 88 … … 71 100 sd = PJ_POOL_ZALLOC_T(pool, pjmedia_silence_det); 72 101 73 pj_ansi_s trncpy(sd->objname, THIS_FILE, PJ_MAX_OBJ_NAME);102 pj_ansi_snprintf(sd->objname, PJ_MAX_OBJ_NAME, THIS_FILE, sd); 74 103 sd->objname[PJ_MAX_OBJ_NAME-1] = '\0'; 75 104 76 105 sd->ptime = samples_per_frame * 1000 / clock_rate; 77 sd->signal_cnt = 0;78 sd->silence_cnt = 0;79 sd->weakest_signal = 0xFFFFFFFFUL;80 sd->loudest_silence = 0;81 106 82 107 /* Default settings */ 83 108 pjmedia_silence_det_set_params(sd, -1, -1, -1); 84 109 85 /* Restart in fixed, silent mode */ 86 sd->in_talk = PJ_FALSE; 110 /* Restart in adaptive, silent mode */ 87 111 pjmedia_silence_det_set_adaptive( sd, -1 ); 88 112 … … 102 126 } 103 127 104 105 128 PJ_DEF(pj_status_t) pjmedia_silence_det_set_adaptive(pjmedia_silence_det *sd, 106 129 int threshold) … … 109 132 110 133 if (threshold < 0) 111 threshold = PJMEDIA_SILENCE_DET_THRESHOLD;134 threshold = DEF_THRESHOLD; 112 135 113 136 sd->mode = VAD_MODE_ADAPTIVE; 114 sd-> cur_threshold = threshold;137 sd->threshold = threshold; 115 138 116 139 return PJ_SUCCESS; … … 123 146 124 147 if (threshold < 0) 125 threshold = PJMEDIA_SILENCE_DET_THRESHOLD;148 threshold = DEF_THRESHOLD; 126 149 127 150 sd->mode = VAD_MODE_FIXED; 128 sd-> cur_threshold = threshold;151 sd->threshold = threshold; 129 152 130 153 return PJ_SUCCESS; … … 132 155 133 156 PJ_DEF(pj_status_t) pjmedia_silence_det_set_params( pjmedia_silence_det *sd, 134 int min_silence,135 int min_signal,136 int recalc_time )157 int before_silence, 158 int recalc_time1, 159 int recalc_time2) 137 160 { 138 161 PJ_ASSERT_RETURN(sd, PJ_EINVAL); 139 162 140 if ( min_silence == -1)141 min_silence = 500;142 if ( min_signal< 0)143 min_signal = sd->ptime;144 if ( recalc_time < 0)145 recalc_time = 2000;146 147 sd-> min_signal_cnt = min_signal / sd->ptime;148 sd-> min_silence_cnt = min_silence / sd->ptime;149 sd-> recalc_cnt = recalc_time / sd->ptime;163 if (recalc_time1 < 0) 164 recalc_time1 = DEF_RECALC_ON_VOICED; 165 if (recalc_time2 < 0) 166 recalc_time2 = DEF_RECALC_ON_SILENCE; 167 if (before_silence < 0) 168 before_silence = DEF_BEFORE_SILENCE; 169 170 sd->recalc_on_voiced = recalc_time1; 171 sd->recalc_on_silence = recalc_time2; 172 sd->before_silence = before_silence; 150 173 151 174 return PJ_SUCCESS; … … 187 210 pj_uint32_t level) 188 211 { 189 pj_bool_t have_signal; 190 191 /* Always return false if VAD is disabled */ 212 int avg_recent_level; 213 192 214 if (sd->mode == VAD_MODE_NONE) 193 215 return PJ_FALSE; 194 216 195 /* Convert PCM level to ulaw */ 196 level = pjmedia_linear2ulaw(level) ^ 0xff; 197 198 /* Do we have signal? */ 199 have_signal = level > sd->cur_threshold; 200 201 /* We we're in transition between silence and signel, increment the 202 * current frame counter. We will only switch mode when we have enough 203 * frames. 204 */ 205 if (sd->in_talk != have_signal) { 206 unsigned limit; 207 208 sd->cur_cnt++; 209 210 limit = (sd->in_talk ? sd->min_silence_cnt : 211 sd->min_signal_cnt); 212 213 if (sd->cur_cnt > limit) { 214 215 /* Swap mode */ 216 sd->in_talk = !sd->in_talk; 217 218 /* Restart adaptive cur_threshold measurements */ 219 sd->weakest_signal = 0xFFFFFFFFUL; 220 sd->loudest_silence = 0; 221 sd->signal_cnt = 0; 222 sd->silence_cnt = 0; 223 sd->cur_cnt = 0; 217 if (sd->mode == VAD_MODE_FIXED) 218 return (level < sd->threshold); 219 220 /* Calculating recent level */ 221 sd->sum_level += level; 222 ++sd->sum_cnt; 223 avg_recent_level = (sd->sum_level / sd->sum_cnt); 224 225 if (level > sd->threshold) { 226 sd->silence_timer = 0; 227 sd->voiced_timer += sd->ptime; 228 229 switch(sd->state) { 230 case STATE_VOICED: 231 if (sd->voiced_timer > sd->recalc_on_voiced) { 232 /* Voiced for long time (>recalc_on_voiced), current 233 * threshold seems to be too low. 234 */ 235 sd->threshold = (avg_recent_level + sd->threshold) >> 1; 236 TRACE_((THIS_FILE,"Re-adjust threshold (in talk burst)" 237 "to %d", sd->threshold)); 238 239 sd->voiced_timer = 0; 240 241 /* Reset sig_level */ 242 sd->sum_level = avg_recent_level; 243 sd->sum_cnt = 1; 244 } 245 break; 246 247 case STATE_SILENCE: 248 TRACE_((THIS_FILE,"Starting talk burst (level=%d threshold=%d)", 249 level, sd->threshold)); 250 251 case STATE_START_SILENCE: 252 sd->state = STATE_VOICED; 253 254 /* Reset sig_level */ 255 sd->sum_level = level; 256 sd->sum_cnt = 1; 257 258 break; 259 260 default: 261 pj_assert(0); 262 break; 224 263 } 225 226 264 } else { 227 /* Reset frame count */ 228 sd->cur_cnt = 0; 265 sd->voiced_timer = 0; 266 sd->silence_timer += sd->ptime; 267 268 switch(sd->state) { 269 case STATE_SILENCE: 270 if (sd->silence_timer >= sd->recalc_on_silence) { 271 sd->threshold = avg_recent_level << 1; 272 TRACE_((THIS_FILE,"Re-adjust threshold (in silence)" 273 "to %d", sd->threshold)); 274 275 sd->silence_timer = 0; 276 277 /* Reset sig_level */ 278 sd->sum_level = avg_recent_level; 279 sd->sum_cnt = 1; 280 } 281 break; 282 283 case STATE_VOICED: 284 sd->state = STATE_START_SILENCE; 285 286 /* Reset sig_level */ 287 sd->sum_level = level; 288 sd->sum_cnt = 1; 289 290 case STATE_START_SILENCE: 291 if (sd->silence_timer >= sd->before_silence) { 292 sd->state = STATE_SILENCE; 293 sd->threshold = avg_recent_level << 1; 294 TRACE_((THIS_FILE,"Starting silence (level=%d " 295 "threshold=%d)", level, sd->threshold)); 296 297 /* Reset sig_level */ 298 sd->sum_level = avg_recent_level; 299 sd->sum_cnt = 1; 300 } 301 break; 302 303 default: 304 pj_assert(0); 305 break; 306 } 229 307 } 230 231 232 /* Count the number of silent and signal frames and calculate min/max */ 233 if (have_signal) { 234 if (level < sd->weakest_signal) 235 sd->weakest_signal = level; 236 sd->signal_cnt++; 237 } 238 else { 239 if (level > sd->loudest_silence) 240 sd->loudest_silence = level; 241 sd->silence_cnt++; 242 } 243 244 /* See if we have had enough frames to look at proportions of 245 * silence/signal frames. 246 */ 247 if ((sd->signal_cnt + sd->silence_cnt) > sd->recalc_cnt) { 248 249 if (sd->mode == VAD_MODE_ADAPTIVE) { 250 pj_bool_t updated = PJ_TRUE; 251 unsigned pct_signal, new_threshold = sd->cur_threshold; 252 253 /* Get percentage of signal */ 254 pct_signal = sd->signal_cnt * 100 / 255 (sd->signal_cnt + sd->silence_cnt); 256 257 /* Adjust according to signal/silence proportions. */ 258 if (pct_signal > 95) { 259 new_threshold += (sd->weakest_signal+1 - sd->cur_threshold)/2; 260 } else if (pct_signal < 5) { 261 new_threshold = (sd->cur_threshold+sd->loudest_silence)/2+1; 262 } else if (pct_signal > 80) { 263 new_threshold++; 264 } else if (pct_signal < 10) { 265 new_threshold--; 266 } else { 267 updated = PJ_FALSE; 268 } 269 270 if (new_threshold > PJMEDIA_SILENCE_DET_MAX_THRESHOLD) 271 new_threshold = PJMEDIA_SILENCE_DET_MAX_THRESHOLD; 272 273 if (updated && sd->cur_threshold != new_threshold) { 274 PJ_LOG(5,(sd->objname, 275 "Vad cur_threshold updated %d-->%d. " 276 "Signal lo=%d", 277 sd->cur_threshold, new_threshold, 278 sd->weakest_signal)); 279 sd->cur_threshold = new_threshold; 280 } 281 } 282 283 /* Reset. */ 284 sd->weakest_signal = 0xFFFFFFFFUL; 285 sd->loudest_silence = 0; 286 sd->signal_cnt = 0; 287 sd->silence_cnt = 0; 288 } 289 290 return !sd->in_talk; 291 308 309 return (sd->state == STATE_SILENCE); 292 310 } 293 311
Note: See TracChangeset
for help on using the changeset viewer.