Changeset 5326


Ignore:
Timestamp:
May 31, 2016 4:28:00 AM (8 years ago)
Author:
nanang
Message:

Re #1918:

  • Fixed issue of cannot make/receive call after previous call initialization fails due to STUN error, reproducing steps:
    1. Configure an account with acc->cfg.media_stun_use set PJSUA_STUN_RETRY_ON_FAILURE.
    2. Start pjsua with STUN servers A and B configured. On startup, both STUN servers A and B are available, so PJSIP will use STUN server A
    3. Both STUN server A and B become unavailable
    4. Make an outgoing call.
    5. Pjsua first tries with STUN server A, fails
    6. Pjsua then retry with STUN server B, still fails
    7. PJSIP then aborts the call with error (which is desired!)
    8. Both STUN server A and B become available again
    9. User tries to dial or receive an incoming call, but cannot because the last STUN server status is stored and used in making/receiving call without retrying to resolve STUN server.
  • Fixed deadlock issues.
Location:
pjproject/trunk/pjsip
Files:
4 edited

Legend:

Unmodified
Added
Removed
  • pjproject/trunk/pjsip/include/pjsua-lib/pjsua.h

    r5307 r5326  
    33263326     * Control the use of STUN for the media transports. 
    33273327     * 
    3328      * Default: PJSUA_STUN_USE_DEFAULT 
     3328     * Default: PJSUA_STUN_RETRY_ON_FAILURE 
    33293329     */ 
    33303330    pjsua_stun_use              media_stun_use; 
  • pjproject/trunk/pjsip/include/pjsua-lib/pjsua_internal.h

    r5323 r5326  
    368368    pj_stun_resolve_cb   cb;        /**< App callback       */ 
    369369    pj_bool_t            blocking;  /**< Blocking?          */ 
     370    pj_thread_t         *waiter;    /**< Waiting thread     */ 
     371    pj_timer_entry       timer;     /**< Destroy timer      */ 
    370372    pj_status_t          status;    /**< Session status     */ 
    371373    pj_sockaddr          addr;      /**< Result             */ 
     
    609611 */ 
    610612/* Resolve the STUN server */ 
    611 pj_status_t resolve_stun_server(pj_bool_t wait); 
     613pj_status_t resolve_stun_server(pj_bool_t wait, pj_bool_t retry_if_cur_error); 
    612614 
    613615/**  
  • pjproject/trunk/pjsip/src/pjsua-lib/pjsua_core.c

    r5322 r5326  
    303303    cfg->register_on_acc_add = PJ_TRUE; 
    304304    cfg->mwi_expires = PJSIP_MWI_DEFAULT_EXPIRES; 
     305 
     306    cfg->media_stun_use = PJSUA_STUN_RETRY_ON_FAILURE; 
    305307} 
    306308 
     
    10501052 
    10511053    /* Start resolving STUN server */ 
    1052     status = resolve_stun_server(PJ_FALSE); 
     1054    status = resolve_stun_server(PJ_FALSE, PJ_FALSE); 
    10531055    if (status != PJ_SUCCESS && status != PJ_EPENDING) { 
    10541056        pjsua_perror(THIS_FILE, "Error resolving STUN server", status); 
     
    11571159} 
    11581160 
     1161 
     1162static void destroy_stun_resolve_cb(pj_timer_heap_t *t, pj_timer_entry *e) 
     1163{ 
     1164    pjsua_stun_resolve *sess = (pjsua_stun_resolve*)e->user_data; 
     1165    PJ_UNUSED_ARG(t); 
     1166 
     1167    PJSUA_LOCK(); 
     1168    pj_list_erase(sess); 
     1169    PJSUA_UNLOCK(); 
     1170 
     1171    pj_assert(sess->stun_sock==NULL); 
     1172    pj_pool_release(sess->pool); 
     1173} 
     1174 
     1175 
    11591176static void destroy_stun_resolve(pjsua_stun_resolve *sess) 
    11601177{ 
     1178    pj_time_val timeout = {0, 0}; 
     1179 
    11611180    sess->destroy_flag = PJ_TRUE; 
    1162     if (sess->ref_cnt > 0) 
     1181 
     1182    /* If the STUN resolution session is blocking, only the waiting thread 
     1183     * is allowed to destroy the session, otherwise it may cause deadlock. 
     1184     */ 
     1185    if (sess->blocking) { 
     1186        if (sess->waiter != pj_thread_this()) 
     1187            return; 
     1188 
     1189        /* Before destroying, make sure ref count is zero. */ 
     1190        while (sess->ref_cnt > 0) 
     1191            pj_thread_sleep(10); 
     1192 
     1193    } else if (sess->ref_cnt > 0) 
    11631194        return; 
    1164  
    1165     PJSUA_LOCK(); 
    11661195 
    11671196    if (sess->stun_sock) { 
     
    11761205    } 
    11771206     
    1178     pj_list_erase(sess); 
    1179  
    1180     PJSUA_UNLOCK(); 
    1181  
    1182     pj_assert(sess->stun_sock==NULL); 
    1183     pj_pool_release(sess->pool); 
     1207    /* Schedule session clean up, it needs PJSUA lock and locking it here 
     1208     * may cause deadlock as this function may be called by STUN socket 
     1209     * while holding STUN socket lock, while application may wait for STUN 
     1210     * resolution while holding PJSUA lock. 
     1211     */ 
     1212    pj_timer_entry_init(&sess->timer, 0, (void*)sess, 
     1213                        &destroy_stun_resolve_cb); 
     1214    pjsua_schedule_timer(&sess->timer, &timeout); 
    11841215} 
    11851216 
     
    12241255    } 
    12251256 
    1226     stun_resolve_add_ref(sess); 
    12271257    sess->cb(&result); 
    1228     stun_resolve_dec_ref(sess); 
    12291258 
    12301259on_return: 
     
    12581287        sess->stun_sock = NULL; 
    12591288 
     1289        stun_resolve_add_ref(sess); 
     1290 
    12601291        ++sess->idx; 
    12611292        if (sess->idx >= sess->count) 
     
    12641295        resolve_stun_entry(sess); 
    12651296 
     1297        stun_resolve_dec_ref(sess); 
     1298 
    12661299        return PJ_FALSE; 
    12671300 
     
    12711304        pj_stun_sock_get_info(stun_sock, &ssi); 
    12721305        pj_memcpy(&sess->addr, &ssi.srv_addr, sizeof(sess->addr)); 
     1306 
     1307        stun_resolve_add_ref(sess); 
    12731308 
    12741309        sess->status = PJ_SUCCESS; 
     
    12781313        stun_resolve_complete(sess); 
    12791314 
     1315        stun_resolve_dec_ref(sess); 
     1316 
    12801317        return PJ_FALSE; 
    12811318 
     
    12921329{ 
    12931330    pj_status_t status = PJ_EUNKNOWN; 
    1294  
    1295     stun_resolve_add_ref(sess); 
    12961331 
    12971332    /* Loop while we have entry to try */ 
     
    13611396         * stun_sock_cb() 
    13621397         */ 
    1363         goto on_return; 
     1398        return; 
    13641399    } 
    13651400 
    13661401    if (sess->idx >= sess->count) { 
    13671402        /* No more entries to try */ 
     1403        stun_resolve_add_ref(sess); 
    13681404        pj_assert(status != PJ_SUCCESS || sess->status != PJ_EPENDING); 
    13691405        if (sess->status == PJ_EPENDING) 
    13701406            sess->status = status; 
    13711407        stun_resolve_complete(sess); 
    1372     } 
    1373  
    1374 on_return: 
    1375     stun_resolve_dec_ref(sess); 
     1408        stun_resolve_dec_ref(sess); 
     1409    } 
    13761410} 
    13771411 
     
    13971431    pjsua_var.stun_status = PJ_EUNKNOWN; 
    13981432 
    1399     status = resolve_stun_server(wait); 
     1433    PJSUA_UNLOCK(); 
     1434     
     1435    status = resolve_stun_server(wait, PJ_FALSE); 
    14001436    if (wait == PJ_FALSE && status == PJ_EPENDING) 
    14011437        status = PJ_SUCCESS; 
    14021438 
    1403     PJSUA_UNLOCK(); 
    1404      
    14051439    return status; 
    14061440} 
     
    14191453    pjsua_stun_resolve *sess; 
    14201454    pj_status_t status; 
    1421     unsigned i; 
     1455    unsigned i, max_wait_ms; 
     1456    pj_timestamp start, now; 
    14221457 
    14231458    PJ_ASSERT_RETURN(count && srv && cb, PJ_EINVAL); 
     
    14331468    sess->count = count; 
    14341469    sess->blocking = wait; 
     1470    sess->waiter = pj_thread_this(); 
    14351471    sess->status = PJ_EPENDING; 
    14361472    sess->srv = (pj_str_t*) pj_pool_calloc(pool, count, sizeof(pj_str_t)); 
     
    14471483    if (!wait) 
    14481484        return PJ_SUCCESS; 
     1485 
     1486    /* Should limit the wait time to avoid deadlock. For example, 
     1487     * if app holds dlg/tsx lock, pjsua worker thread will block on 
     1488     * any dlg/tsx state change. 
     1489     */ 
     1490    max_wait_ms = count * pjsua_var.stun_cfg.rto_msec * (1 << 7); 
     1491    pj_get_timestamp(&start); 
    14491492 
    14501493    while (sess->status == PJ_EPENDING) { 
     
    14611504            pj_thread_sleep(20); 
    14621505        } 
     1506 
     1507        pj_get_timestamp(&now); 
     1508        if (pj_elapsed_msec(&start, &now) > max_wait_ms) 
     1509            sess->status = PJ_ETIMEDOUT; 
    14631510    } 
    14641511 
     
    15281575 * Resolve STUN server. 
    15291576 */ 
    1530 pj_status_t resolve_stun_server(pj_bool_t wait) 
    1531 { 
     1577pj_status_t resolve_stun_server(pj_bool_t wait, pj_bool_t retry_if_cur_error) 
     1578{ 
     1579    /* Retry resolving if currently the STUN status is error */ 
     1580    if (pjsua_var.stun_status != PJ_EPENDING && 
     1581        pjsua_var.stun_status != PJ_SUCCESS && 
     1582        retry_if_cur_error) 
     1583    { 
     1584        pjsua_var.stun_status = PJ_EUNKNOWN; 
     1585    } 
     1586 
    15321587    if (pjsua_var.stun_status == PJ_EUNKNOWN) { 
    15331588        pj_status_t status; 
     
    15571612         */ 
    15581613        if (wait) { 
    1559             pj_bool_t has_pjsua_lock = PJSUA_LOCK_IS_LOCKED(); 
    1560  
    1561             if (has_pjsua_lock) 
    1562                 PJSUA_UNLOCK(); 
     1614            unsigned max_wait_ms; 
     1615            pj_timestamp start, now; 
     1616 
     1617            /* Should limit the wait time to avoid deadlock. For example, 
     1618             * if app holds dlg/tsx lock, pjsua worker thread will block on 
     1619             * any dlg/tsx state change. 
     1620             */ 
     1621            max_wait_ms = pjsua_var.ua_cfg.stun_srv_cnt * 
     1622                          pjsua_var.stun_cfg.rto_msec * (1 << 7); 
     1623            pj_get_timestamp(&start); 
    15631624 
    15641625            while (pjsua_var.stun_status == PJ_EPENDING) {               
     
    15751636                    pj_thread_sleep(10); 
    15761637                } 
     1638 
     1639                pj_get_timestamp(&now); 
     1640                if (pj_elapsed_msec(&start, &now) > max_wait_ms) 
     1641                    return PJ_ETIMEDOUT; 
    15771642            } 
    1578             if (has_pjsua_lock) 
    1579                 PJSUA_LOCK(); 
    15801643        } 
    15811644    } 
     
    19982061 
    19992062    /* Make sure STUN server resolution has completed */ 
    2000     status = resolve_stun_server(PJ_TRUE); 
     2063    status = resolve_stun_server(PJ_TRUE, PJ_TRUE); 
    20012064    if (status != PJ_SUCCESS) { 
    20022065        pjsua_perror(THIS_FILE, "Error resolving STUN server", status); 
     
    27852848 
    27862849    /* Make sure STUN server resolution has completed */ 
    2787     status = resolve_stun_server(PJ_TRUE); 
     2850    status = resolve_stun_server(PJ_TRUE, PJ_TRUE); 
    27882851    if (status != PJ_SUCCESS) { 
    27892852        pjsua_var.nat_status = status; 
  • pjproject/trunk/pjsip/src/pjsua-lib/pjsua_media.c

    r5311 r5326  
    255255    /* Make sure STUN server resolution has completed */ 
    256256    if (!use_ipv6 && pjsua_media_acc_is_using_stun(call_med->call->acc_id)) { 
    257         status = resolve_stun_server(PJ_TRUE); 
     257        pj_bool_t retry_stun = (acc->cfg.media_stun_use & 
     258                                PJSUA_STUN_RETRY_ON_FAILURE) == 
     259                                PJSUA_STUN_RETRY_ON_FAILURE; 
     260        status = resolve_stun_server(PJ_TRUE, retry_stun); 
    258261        if (status != PJ_SUCCESS) { 
    259262            pjsua_perror(THIS_FILE, "Error resolving STUN server", status); 
     
    391394 
    392395            if (status != PJ_SUCCESS && pjsua_var.ua_cfg.stun_srv_cnt > 1 && 
    393                 ((acc->cfg.media_stun_use & PJSUA_STUN_RETRY_ON_FAILURE)!=0)) 
     396                ((acc->cfg.media_stun_use & PJSUA_STUN_RETRY_ON_FAILURE)== 
     397                  PJSUA_STUN_RETRY_ON_FAILURE)) 
    394398            { 
    395399                pj_str_t srv =  
     
    419423                status=pjsua_update_stun_servers(pjsua_var.ua_cfg.stun_srv_cnt, 
    420424                                                 pjsua_var.ua_cfg.stun_srv, 
    421                                                  PJ_FALSE); 
    422  
    423                 if (status == PJ_SUCCESS) 
    424                     status = resolve_stun_server(PJ_TRUE); 
    425  
     425                                                 PJ_TRUE); 
    426426                if (status == PJ_SUCCESS) { 
    427427                    if (pjsua_var.stun_srv.addr.sa_family != 0) { 
     
    829829    /* Make sure STUN server resolution has completed */ 
    830830    if (pjsua_media_acc_is_using_stun(call_med->call->acc_id)) { 
    831         status = resolve_stun_server(PJ_TRUE); 
     831        pj_bool_t retry_stun = (acc_cfg->media_stun_use & 
     832                                PJSUA_STUN_RETRY_ON_FAILURE) == 
     833                                PJSUA_STUN_RETRY_ON_FAILURE; 
     834        status = resolve_stun_server(PJ_TRUE, retry_stun); 
    832835        if (status != PJ_SUCCESS) { 
    833836            pjsua_perror(THIS_FILE, "Error resolving STUN server", status); 
Note: See TracChangeset for help on using the changeset viewer.