Changeset 5699 for pjproject/trunk/third_party/yuv/source/compare_neon64.cc
- Timestamp:
- Nov 21, 2017 9:25:11 AM (6 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
pjproject/trunk/third_party/yuv/source/compare_neon64.cc
r5633 r5699 25 25 uint32 HammingDistance_NEON(const uint8* src_a, const uint8* src_b, int count) { 26 26 uint32 diff; 27 asm volatile 28 "movi v4.8h, #0 \n"27 asm volatile( 28 "movi v4.8h, #0 \n" 29 29 30 "1:\n"31 "ld1 {v0.16b, v1.16b}, [%0], #32 \n"32 "ld1 {v2.16b, v3.16b}, [%1], #32 \n"33 "eor v0.16b, v0.16b, v2.16b \n"34 "eor v1.16b, v1.16b, v3.16b \n"35 "cnt v0.16b, v0.16b \n"36 "cnt v1.16b, v1.16b \n"37 "subs %w2, %w2, #32 \n"38 "add v0.16b, v0.16b, v1.16b \n"39 "uadalp v4.8h, v0.16b \n"40 "b.gt 1b \n"30 "1: \n" 31 "ld1 {v0.16b, v1.16b}, [%0], #32 \n" 32 "ld1 {v2.16b, v3.16b}, [%1], #32 \n" 33 "eor v0.16b, v0.16b, v2.16b \n" 34 "eor v1.16b, v1.16b, v3.16b \n" 35 "cnt v0.16b, v0.16b \n" 36 "cnt v1.16b, v1.16b \n" 37 "subs %w2, %w2, #32 \n" 38 "add v0.16b, v0.16b, v1.16b \n" 39 "uadalp v4.8h, v0.16b \n" 40 "b.gt 1b \n" 41 41 42 "uaddlv s4, v4.8h \n" 43 "fmov %w3, s4 \n" 44 : "+r"(src_a), 45 "+r"(src_b), 46 "+r"(count), 47 "=r"(diff) 48 : 49 : "cc", "v0", "v1", "v2", "v3", "v4"); 42 "uaddlv s4, v4.8h \n" 43 "fmov %w3, s4 \n" 44 : "+r"(src_a), "+r"(src_b), "+r"(count), "=r"(diff) 45 : 46 : "cc", "v0", "v1", "v2", "v3", "v4"); 50 47 return diff; 51 48 } … … 53 50 uint32 SumSquareError_NEON(const uint8* src_a, const uint8* src_b, int count) { 54 51 uint32 sse; 55 asm volatile 56 "eor v16.16b, v16.16b, v16.16b \n"57 "eor v18.16b, v18.16b, v18.16b \n"58 "eor v17.16b, v17.16b, v17.16b \n"59 "eor v19.16b, v19.16b, v19.16b \n"52 asm volatile( 53 "eor v16.16b, v16.16b, v16.16b \n" 54 "eor v18.16b, v18.16b, v18.16b \n" 55 "eor v17.16b, v17.16b, v17.16b \n" 56 "eor v19.16b, v19.16b, v19.16b \n" 60 57 61 "1:\n"62 "ld1 {v0.16b}, [%0], #16 \n"63 "ld1 {v1.16b}, [%1], #16 \n"64 "subs %w2, %w2, #16 \n"65 "usubl v2.8h, v0.8b, v1.8b \n"66 "usubl2 v3.8h, v0.16b, v1.16b \n"67 "smlal v16.4s, v2.4h, v2.4h \n"68 "smlal v17.4s, v3.4h, v3.4h \n"69 "smlal2 v18.4s, v2.8h, v2.8h \n"70 "smlal2 v19.4s, v3.8h, v3.8h \n"71 "b.gt 1b \n"58 "1: \n" 59 "ld1 {v0.16b}, [%0], #16 \n" 60 "ld1 {v1.16b}, [%1], #16 \n" 61 "subs %w2, %w2, #16 \n" 62 "usubl v2.8h, v0.8b, v1.8b \n" 63 "usubl2 v3.8h, v0.16b, v1.16b \n" 64 "smlal v16.4s, v2.4h, v2.4h \n" 65 "smlal v17.4s, v3.4h, v3.4h \n" 66 "smlal2 v18.4s, v2.8h, v2.8h \n" 67 "smlal2 v19.4s, v3.8h, v3.8h \n" 68 "b.gt 1b \n" 72 69 73 "add v16.4s, v16.4s, v17.4s \n" 74 "add v18.4s, v18.4s, v19.4s \n" 75 "add v19.4s, v16.4s, v18.4s \n" 76 "addv s0, v19.4s \n" 77 "fmov %w3, s0 \n" 78 : "+r"(src_a), 79 "+r"(src_b), 80 "+r"(count), 81 "=r"(sse) 82 : 83 : "cc", "v0", "v1", "v2", "v3", "v16", "v17", "v18", "v19"); 70 "add v16.4s, v16.4s, v17.4s \n" 71 "add v18.4s, v18.4s, v19.4s \n" 72 "add v19.4s, v16.4s, v18.4s \n" 73 "addv s0, v19.4s \n" 74 "fmov %w3, s0 \n" 75 : "+r"(src_a), "+r"(src_b), "+r"(count), "=r"(sse) 76 : 77 : "cc", "v0", "v1", "v2", "v3", "v16", "v17", "v18", "v19"); 84 78 return sse; 85 79 }
Note: See TracChangeset
for help on using the changeset viewer.