- Timestamp:
- Jul 26, 2006 5:04:54 PM (18 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
pjproject/trunk/pjmedia/src/pjmedia-codec/speex/filters_bfin.h
r278 r628 33 33 */ 34 34 35 #include <stdio.h>36 37 35 #define OVERRIDE_NORMALIZE16 38 36 int normalize16(const spx_sig_t *x, spx_word16_t *y, spx_sig_t max_scale, int len) … … 40 38 spx_sig_t max_val=1; 41 39 int sig_shift; 42 43 40 __asm__ 44 41 ( … … 68 65 "I0 = %0;\n\t" 69 66 "L0 = 0;\n\t" 70 "I1 = %1;\n\t" 71 "L1 = 0;\n\t" 67 "P1 = %1;\n\t" 72 68 "R0 = [I0++];\n\t" 73 "LOOP norm_shift%= LC0 = %3 >> 1;\n\t"69 "LOOP norm_shift%= LC0 = %3;\n\t" 74 70 "LOOP_BEGIN norm_shift%=;\n\t" 75 "R1 = ASHIFT R0 by %2.L || R2 = [I0++];\n\t" 76 "R3 = ASHIFT R2 by %2.L || R0 = [I0++];\n\t" 77 "R3 = PACK(R3.L, R1.L);\n\t" 78 "[I1++] = R3;\n\t" 71 "R1 = ASHIFT R0 by %2.L || R0 = [I0++];\n\t" 72 "W[P1++] = R1;\n\t" 79 73 "LOOP_END norm_shift%=;\n\t" 80 : : "a" (x), "a" (y), "d" (-sig_shift), "a" (len) 81 : "I0", "L0", "I1", "L1", "R0", "R1", "R2", "R3", "memory" 74 "R1 = ASHIFT R0 by %2.L;\n\t" 75 "W[P1++] = R1;\n\t" 76 : : "a" (x), "a" (y), "d" (-sig_shift), "a" (len-1) 77 : "I0", "L0", "P1", "R0", "R1", "memory" 82 78 ); 83 79 return sig_shift; … … 104 100 "P0 = %3;\n\t" 105 101 "I0 = P0;\n\t" 106 "B0 = P0;\n\t" 102 "B0 = P0;\n\t" /* numden */ 107 103 "L0 = 0;\n\t" 108 104 109 "P2 = %0;\n\t" 105 "P2 = %0;\n\t" /* Fused xy */ 110 106 "I2 = P2;\n\t" 111 107 "L2 = 0;\n\t" 112 108 113 "P4 = %6;\n\t" 114 "P0 = %1;\n\t" 115 "P1 = %2;\n\t" 109 "P4 = %6;\n\t" /* mem */ 110 "P0 = %1;\n\t" /* _x */ 111 "P1 = %2;\n\t" /* _y */ 116 112 117 113 /* First sample */ 118 114 "R1 = [P4++];\n\t" 119 "R1 <<= 1;\n\t" 120 "R2 = [P0++];\n\t" 115 "R1 <<= 1;\n\t" /* shift mem */ 116 "R2 = [P0++];\n\t" /* load x[0] */ 121 117 "R1 = R1 + R2;\n\t" 122 "[P1++] = R1;\n\t" 118 "[P1++] = R1;\n\t" /* store y[0] */ 123 119 "R1 <<= 2;\n\t" 124 120 "R2 <<= 2;\n\t" 125 "R2 = PACK(R1.H, R2.H);\n\t" 121 "R2 = PACK(R1.H, R2.H);\n\t" /* pack x16 and y16 */ 126 122 "[P2] = R2;\n\t" 127 123 … … 148 144 "A0 += A1;\n\t" 149 145 "R4 = A0;\n\t" 150 "R4 <<= 1;\n\t" 151 "R2 = [P0++];\n\t" 146 "R4 <<= 1;\n\t" /* shift mem */ 147 "R2 = [P0++];\n\t" /* load x */ 152 148 "R4 = R4 + R2;\n\t" 153 "[P1++] = R4;\n\t" 149 "[P1++] = R4;\n\t" /* store y */ 154 150 "R4 <<= 2;\n\t" 155 151 "R2 <<= 2;\n\t" 156 "R2 = PACK(R4.H, R2.H);\n\t" 152 "R2 = PACK(R4.H, R2.H);\n\t" /* pack x16 and y16 */ 157 153 "[P2] = R2;\n\t" 158 154 … … 162 158 "R0 = %5;\n\t" 163 159 "R0 <<= 1;\n\t" 164 "I0 = B0;\n\t" 160 "I0 = B0;\n\t" /* numden */ 165 161 "R0 <<= 1;\n\t" 166 162 "L0 = R0;\n\t" 167 163 168 "R0 = %5;\n\t" 169 "R2 = %4;\n\t" 164 "R0 = %5;\n\t" /* org */ 165 "R2 = %4;\n\t" /* N */ 170 166 "R2 = R2 - R0;\n\t" 171 "R4 = [I0++];\n\t" 167 "R4 = [I0++];\n\t" /* numden */ 172 168 "LC0 = R2;\n\t" 173 169 "P3 = R0;\n\t" … … 177 173 "M0 = R0;\n\t" 178 174 "A1 = A0 = 0;\n\t" 179 "R5 = [I2--];\n\t" 175 "R5 = [I2--];\n\t" /* load xy */ 180 176 "LOOP filter_mid%= LC0;\n\t" 181 177 "LOOP_BEGIN filter_mid%=;\n\t" … … 185 181 "LOOP_END filter_mid_inner%=;\n\t" 186 182 "R0 = (A0 += A1) || I2 += M0;\n\t" 187 "R0 = R0 << 1 || R5 = [P0++];\n\t" 183 "R0 = R0 << 1 || R5 = [P0++];\n\t" /* load x */ 188 184 "R0 = R0 + R5;\n\t" 189 "R0 = R0 << 2 || [P1++] = R0;\n\t" 185 "R0 = R0 << 2 || [P1++] = R0;\n\t" /* shift y | store y */ 190 186 "R5 = R5 << 2;\n\t" 191 187 "R5 = PACK(R0.H, R5.H);\n\t" 188 "A1 = A0 = 0 || [I2--] = R5\n\t" 189 "LOOP_END filter_mid%=;\n\t" 190 "I2 += 4;\n\t" 191 "P2 = I2;\n\t" 192 /* Update memory */ 193 "P4 = %6;\n\t" 194 "R0 = %5;\n\t" 195 "LC0 = R0;\n\t" 196 "P0 = B0;\n\t" 197 "A1 = A0 = 0;\n\t" 198 "LOOP mem_update%= LC0;\n\t" 199 "LOOP_BEGIN mem_update%=;\n\t" 200 "I2 = P2;\n\t" 201 "I0 = P0;\n\t" 202 "P0 += 4;\n\t" 203 "R0 = LC0;\n\t" 204 "LC1 = R0;\n\t" 205 "R5 = [I2--] || R4 = [I0++];\n\t" 206 "LOOP mem_accum%= LC1;\n\t" 207 "LOOP_BEGIN mem_accum%=;\n\t" 208 "A1 -= R4.H*R5.H, A0 += R4.L*R5.L (IS) || R4 = [I0++] || R5 = [I2--];\n\t" 209 "LOOP_END mem_accum%=;\n\t" 210 "R0 = (A0 += A1);\n\t" 211 "A1 = A0 = 0 || [P4++] = R0;\n\t" 212 "LOOP_END mem_update%=;\n\t" 213 "L0 = 0;\n\t" 214 : : "m" (xy), "m" (_x), "m" (_y), "m" (numden), "m" (N), "m" (ord), "m" (mem) 215 : "A0", "A1", "R0", "R1", "R2", "R3", "R4", "R5", "P0", "P1", "P2", "P3", "P4", "B0", "I0", "I2", "L0", "L2", "M0", "memory" 216 ); 217 218 } 219 220 221 #define OVERRIDE_FILTER_MEM16 222 void filter_mem16(const spx_word16_t *_x, const spx_coef_t *num, const spx_coef_t *den, spx_word16_t *_y, int N, int ord, spx_mem_t *mem, char *stack) 223 { 224 VARDECL(spx_word32_t *xy2); 225 VARDECL(spx_word32_t *numden_a); 226 spx_word32_t *xy; 227 spx_word16_t *numden; 228 int i; 229 230 ALLOC(xy2, (N+1), spx_word32_t); 231 ALLOC(numden_a, (2*ord+2), spx_word32_t); 232 xy = xy2+1; 233 numden = (spx_word16_t*) numden_a; 234 235 for (i=0;i<ord;i++) 236 { 237 numden[2*i] = num[i]; 238 numden[2*i+1] = den[i]; 239 } 240 __asm__ __volatile__ 241 ( 242 /* Register setup */ 243 "R0 = %5;\n\t" /*ord */ 244 245 "P0 = %3;\n\t" 246 "I0 = P0;\n\t" 247 "B0 = P0;\n\t" /* numden */ 248 "L0 = 0;\n\t" 249 250 "P2 = %0;\n\t" /* Fused xy */ 251 "I2 = P2;\n\t" 252 "L2 = 0;\n\t" 253 254 "P4 = %6;\n\t" /* mem */ 255 "P0 = %1;\n\t" /* _x */ 256 "P1 = %2;\n\t" /* _y */ 257 258 /* First sample */ 259 "R1 = [P4++];\n\t" 260 "R1 <<= 3;\n\t" /* shift mem */ 261 "R1.L = R1 (RND);\n\t" 262 "R2 = W[P0++];\n\t" /* load x[0] */ 263 "R1.L = R1.L + R2.L;\n\t" 264 "W[P1++] = R1;\n\t" /* store y[0] */ 265 "R2 = PACK(R1.L, R2.L);\n\t" /* pack x16 and y16 */ 266 "[P2] = R2;\n\t" 267 268 /* Samples 1 to ord-1 (using memory) */ 269 "R0 += -1;\n\t" 270 "R3 = 0;\n\t" 271 "LC0 = R0;\n\t" 272 "LOOP filter_start%= LC0;\n\t" 273 "LOOP_BEGIN filter_start%=;\n\t" 274 "R3 += 1;\n\t" 275 "LC1 = R3;\n\t" 276 277 "R1 = [P4++];\n\t" 278 "A1 = R1;\n\t" 279 "A0 = 0;\n\t" 280 "I0 = B0;\n\t" 281 "I2 = P2;\n\t" 282 "P2 += 4;\n\t" 283 "R4 = [I0++] || R5 = [I2--];\n\t" 284 "LOOP filter_start_inner%= LC1;\n\t" 285 "LOOP_BEGIN filter_start_inner%=;\n\t" 286 "A1 -= R4.H*R5.H, A0 += R4.L*R5.L (IS) || R4 = [I0++] || R5 = [I2--];\n\t" 287 "LOOP_END filter_start_inner%=;\n\t" 288 "A0 += A1;\n\t" 289 "R4 = A0;\n\t" 290 "R4 <<= 3;\n\t" /* shift mem */ 291 "R4.L = R4 (RND);\n\t" 292 "R2 = W[P0++];\n\t" /* load x */ 293 "R4.L = R4.L + R2.L;\n\t" 294 "W[P1++] = R4;\n\t" /* store y */ 295 //"R4 <<= 2;\n\t" 296 //"R2 <<= 2;\n\t" 297 "R2 = PACK(R4.L, R2.L);\n\t" /* pack x16 and y16 */ 298 "[P2] = R2;\n\t" 299 300 "LOOP_END filter_start%=;\n\t" 301 302 /* Samples ord to N*/ 303 "R0 = %5;\n\t" 304 "R0 <<= 1;\n\t" 305 "I0 = B0;\n\t" /* numden */ 306 "R0 <<= 1;\n\t" 307 "L0 = R0;\n\t" 308 309 "R0 = %5;\n\t" /* org */ 310 "R2 = %4;\n\t" /* N */ 311 "R2 = R2 - R0;\n\t" 312 "R4 = [I0++];\n\t" /* numden */ 313 "LC0 = R2;\n\t" 314 "P3 = R0;\n\t" 315 "R0 <<= 2;\n\t" 316 "R0 += 8;\n\t" 317 "I2 = P2;\n\t" 318 "M0 = R0;\n\t" 319 "A1 = A0 = 0;\n\t" 320 "R5 = [I2--];\n\t" /* load xy */ 321 "LOOP filter_mid%= LC0;\n\t" 322 "LOOP_BEGIN filter_mid%=;\n\t" 323 "LOOP filter_mid_inner%= LC1=P3;\n\t" 324 "LOOP_BEGIN filter_mid_inner%=;\n\t" 325 "A1 -= R4.H*R5.H, A0 += R4.L*R5.L (IS) || R4 = [I0++] || R5 = [I2--];\n\t" 326 "LOOP_END filter_mid_inner%=;\n\t" 327 "R0 = (A0 += A1) || I2 += M0;\n\t" 328 "R0 = R0 << 3 || R5 = W[P0++];\n\t" /* load x */ 329 "R0.L = R0 (RND);\n\t" 330 "R0.L = R0.L + R5.L;\n\t" 331 "R5 = PACK(R0.L, R5.L) || W[P1++] = R0;\n\t" /* shift y | store y */ 192 332 "A1 = A0 = 0 || [I2--] = R5\n\t" 193 333 "LOOP_END filter_mid%=;\n\t" … … 347 487 } 348 488 489 490 #define OVERRIDE_IIR_MEM16 491 void iir_mem16(const spx_word16_t *_x, const spx_coef_t *den, spx_word16_t *_y, int N, int ord, spx_mem_t *mem, char *stack) 492 { 493 VARDECL(spx_word16_t *y); 494 spx_word16_t *yy; 495 496 ALLOC(y, (N+2), spx_word16_t); 497 yy = y+2; 498 499 __asm__ __volatile__ 500 ( 501 /* Register setup */ 502 "R0 = %5;\n\t" /*ord */ 503 504 "P1 = %3;\n\t" 505 "I1 = P1;\n\t" 506 "B1 = P1;\n\t" 507 "L1 = 0;\n\t" 508 509 "P3 = %0;\n\t" 510 "I3 = P3;\n\t" 511 "L3 = 0;\n\t" 512 513 "P4 = %6;\n\t" 514 "P0 = %1;\n\t" 515 "P1 = %2;\n\t" 516 517 /* First sample */ 518 "R1 = [P4++];\n\t" 519 "R1 = R1 << 3 (S);\n\t" 520 "R1.L = R1 (RND);\n\t" 521 "R2 = W[P0++];\n\t" 522 "R1 = R1 + R2;\n\t" 523 "W[P1++] = R1;\n\t" 524 "W[P3] = R1;\n\t" 525 526 /* Samples 1 to ord-1 (using memory) */ 527 "R0 += -1;\n\t" 528 "R3 = 0;\n\t" 529 "LC0 = R0;\n\t" 530 "LOOP filter_start%= LC0;\n\t" 531 "LOOP_BEGIN filter_start%=;\n\t" 532 "R3 += 1;\n\t" 533 "LC1 = R3;\n\t" 534 535 "R1 = [P4++];\n\t" 536 "A1 = R1;\n\t" 537 "I1 = B1;\n\t" 538 "I3 = P3;\n\t" 539 "P3 += 2;\n\t" 540 "LOOP filter_start_inner%= LC1;\n\t" 541 "LOOP_BEGIN filter_start_inner%=;\n\t" 542 "R4.L = W[I1++];\n\t" 543 "R5.L = W[I3--];\n\t" 544 "A1 -= R4.L*R5.L (IS);\n\t" 545 "LOOP_END filter_start_inner%=;\n\t" 546 547 "R1 = A1;\n\t" 548 "R1 <<= 3;\n\t" 549 "R1.L = R1 (RND);\n\t" 550 "R2 = W[P0++];\n\t" 551 "R1 = R1 + R2;\n\t" 552 "W[P1++] = R1;\n\t" 553 "W[P3] = R1;\n\t" 554 "LOOP_END filter_start%=;\n\t" 555 556 /* Samples ord to N*/ 557 "R0 = %5;\n\t" 558 "R0 <<= 1;\n\t" 559 "I1 = B1;\n\t" 560 "L1 = R0;\n\t" 561 562 "R0 = %5;\n\t" 563 "R2 = %4;\n\t" 564 "R2 = R2 - R0;\n\t" 565 "R4.L = W[I1++];\n\t" 566 "LC0 = R2;\n\t" 567 "LOOP filter_mid%= LC0;\n\t" 568 "LOOP_BEGIN filter_mid%=;\n\t" 569 "LC1 = R0;\n\t" 570 "A1 = 0;\n\t" 571 "I3 = P3;\n\t" 572 "P3 += 2;\n\t" 573 "R5.L = W[I3--];\n\t" 574 "LOOP filter_mid_inner%= LC1;\n\t" 575 "LOOP_BEGIN filter_mid_inner%=;\n\t" 576 "A1 -= R4.L*R5.L (IS) || R4.L = W[I1++] || R5.L = W[I3--];\n\t" 577 "LOOP_END filter_mid_inner%=;\n\t" 578 "R1 = A1;\n\t" 579 "R1 = R1 << 3 || R2 = W[P0++];\n\t" 580 "R1.L = R1 (RND);\n\t" 581 "R1 = R1 + R2;\n\t" 582 "W[P1++] = R1;\n\t" 583 "W[P3] = R1;\n\t" 584 "LOOP_END filter_mid%=;\n\t" 585 586 /* Update memory */ 587 "P4 = %6;\n\t" 588 "R0 = %5;\n\t" 589 "LC0 = R0;\n\t" 590 "P1 = B1;\n\t" 591 "LOOP mem_update%= LC0;\n\t" 592 "LOOP_BEGIN mem_update%=;\n\t" 593 "A0 = 0;\n\t" 594 "I3 = P3;\n\t" 595 "I1 = P1;\n\t" 596 "P1 += 2;\n\t" 597 "R0 = LC0;\n\t" 598 "LC1=R0;\n\t" 599 "R5.L = W[I3--] || R4.L = W[I1++];\n\t" 600 "LOOP mem_accum%= LC1;\n\t" 601 "LOOP_BEGIN mem_accum%=;\n\t" 602 "A0 -= R4.L*R5.L (IS) || R4.L = W[I1++] || R5.L = W[I3--];\n\t" 603 "LOOP_END mem_accum%=;\n\t" 604 "R0 = A0;\n\t" 605 "[P4++] = R0;\n\t" 606 "LOOP_END mem_update%=;\n\t" 607 "L1 = 0;\n\t" 608 : : "m" (yy), "m" (_x), "m" (_y), "m" (den), "m" (N), "m" (ord), "m" (mem) 609 : "A0", "A1", "R0", "R1", "R2", "R3", "R4", "R5", "P0", "P1", "P2", "P3", "P4", "B1", "I1", "I3", "L1", "L3", "memory" 610 ); 611 612 } 613 614 349 615 #define OVERRIDE_FIR_MEM2 350 616 void fir_mem2(const spx_sig_t *x, const spx_coef_t *num, spx_sig_t *y, int N, int ord, spx_mem_t *mem) … … 357 623 den[i] = 0; 358 624 filter_mem2(x, num, den, y, N, ord, mem); 625 } 626 627 #define OVERRIDE_FIR_MEM16 628 void fir_mem16(const spx_word16_t *x, const spx_coef_t *num, spx_word16_t *y, int N, int ord, spx_mem_t *mem, char *stack) 629 { 630 int i; 631 spx_coef_t den2[12]; 632 spx_coef_t *den; 633 den = (spx_coef_t*)((((int)den2)+4)&0xfffffffc); 634 for (i=0;i<10;i++) 635 den[i] = 0; 636 filter_mem16(x, num, den, y, N, ord, mem, stack); 359 637 } 360 638
Note: See TracChangeset
for help on using the changeset viewer.