Changeset 5633 for pjproject/trunk/third_party/yuv/source/compare_neon.cc
- Timestamp:
- Jul 28, 2017 2:51:44 AM (7 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
pjproject/trunk/third_party/yuv/source/compare_neon.cc
r5358 r5633 22 22 !defined(__aarch64__) 23 23 24 // 256 bits at a time 25 // uses short accumulator which restricts count to 131 KB 26 uint32 HammingDistance_NEON(const uint8* src_a, const uint8* src_b, int count) { 27 uint32 diff; 28 29 asm volatile ( 30 "vmov.u16 q4, #0 \n" // accumulator 31 32 "1: \n" 33 "vld1.8 {q0, q1}, [%0]! \n" 34 "vld1.8 {q2, q3}, [%1]! \n" 35 "veor.32 q0, q0, q2 \n" 36 "veor.32 q1, q1, q3 \n" 37 "vcnt.i8 q0, q0 \n" 38 "vcnt.i8 q1, q1 \n" 39 "subs %2, %2, #32 \n" 40 "vadd.u8 q0, q0, q1 \n" // 16 byte counts 41 "vpadal.u8 q4, q0 \n" // 8 shorts 42 "bgt 1b \n" 43 44 "vpaddl.u16 q0, q4 \n" // 4 ints 45 "vpadd.u32 d0, d0, d1 \n" 46 "vpadd.u32 d0, d0, d0 \n" 47 "vmov.32 %3, d0[0] \n" 48 49 : "+r"(src_a), 50 "+r"(src_b), 51 "+r"(count), 52 "=r"(diff) 53 : 54 : "cc", "q0", "q1", "q2", "q3", "q4"); 55 return diff; 56 } 57 24 58 uint32 SumSquareError_NEON(const uint8* src_a, const uint8* src_b, int count) { 25 volatileuint32 sse;59 uint32 sse; 26 60 asm volatile ( 27 61 "vmov.u8 q8, #0 \n" … … 31 65 32 66 "1: \n" 33 MEMACCESS(0)34 67 "vld1.8 {q0}, [%0]! \n" 35 MEMACCESS(1)36 68 "vld1.8 {q1}, [%1]! \n" 37 69 "subs %2, %2, #16 \n"
Note: See TracChangeset
for help on using the changeset viewer.