Changeset 5699 for pjproject/trunk/third_party/yuv/source/compare_neon.cc
- Timestamp:
- Nov 21, 2017 9:25:11 AM (6 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
pjproject/trunk/third_party/yuv/source/compare_neon.cc
r5633 r5699 27 27 uint32 diff; 28 28 29 asm volatile 30 "vmov.u16 q4, #0 \n" // accumulator29 asm volatile( 30 "vmov.u16 q4, #0 \n" // accumulator 31 31 32 "1:\n"33 "vld1.8 {q0, q1}, [%0]! \n"34 "vld1.8 {q2, q3}, [%1]! \n"35 "veor.32 q0, q0, q2 \n"36 "veor.32 q1, q1, q3 \n"37 "vcnt.i8 q0, q0 \n"38 "vcnt.i8 q1, q1 \n"39 "subs %2, %2, #32 \n"40 "vadd.u8 q0, q0, q1 \n" // 16 byte counts41 "vpadal.u8 q4, q0 \n" // 8 shorts42 "bgt 1b \n"32 "1: \n" 33 "vld1.8 {q0, q1}, [%0]! \n" 34 "vld1.8 {q2, q3}, [%1]! \n" 35 "veor.32 q0, q0, q2 \n" 36 "veor.32 q1, q1, q3 \n" 37 "vcnt.i8 q0, q0 \n" 38 "vcnt.i8 q1, q1 \n" 39 "subs %2, %2, #32 \n" 40 "vadd.u8 q0, q0, q1 \n" // 16 byte counts 41 "vpadal.u8 q4, q0 \n" // 8 shorts 42 "bgt 1b \n" 43 43 44 "vpaddl.u16 q0, q4 \n" // 4 ints 45 "vpadd.u32 d0, d0, d1 \n" 46 "vpadd.u32 d0, d0, d0 \n" 47 "vmov.32 %3, d0[0] \n" 48 49 : "+r"(src_a), 50 "+r"(src_b), 51 "+r"(count), 52 "=r"(diff) 53 : 54 : "cc", "q0", "q1", "q2", "q3", "q4"); 44 "vpaddl.u16 q0, q4 \n" // 4 ints 45 "vpadd.u32 d0, d0, d1 \n" 46 "vpadd.u32 d0, d0, d0 \n" 47 "vmov.32 %3, d0[0] \n" 48 49 : "+r"(src_a), "+r"(src_b), "+r"(count), "=r"(diff) 50 : 51 : "cc", "q0", "q1", "q2", "q3", "q4"); 55 52 return diff; 56 53 } … … 58 55 uint32 SumSquareError_NEON(const uint8* src_a, const uint8* src_b, int count) { 59 56 uint32 sse; 60 asm volatile 61 "vmov.u8 q8, #0 \n"62 "vmov.u8 q10, #0 \n"63 "vmov.u8 q9, #0 \n"64 "vmov.u8 q11, #0 \n"57 asm volatile( 58 "vmov.u8 q8, #0 \n" 59 "vmov.u8 q10, #0 \n" 60 "vmov.u8 q9, #0 \n" 61 "vmov.u8 q11, #0 \n" 65 62 66 "1:\n"67 "vld1.8 {q0}, [%0]! \n"68 "vld1.8 {q1}, [%1]! \n"69 "subs %2, %2, #16 \n"70 "vsubl.u8 q2, d0, d2 \n"71 "vsubl.u8 q3, d1, d3 \n"72 "vmlal.s16 q8, d4, d4 \n"73 "vmlal.s16 q9, d6, d6 \n"74 "vmlal.s16 q10, d5, d5 \n"75 "vmlal.s16 q11, d7, d7 \n"76 "bgt 1b \n"63 "1: \n" 64 "vld1.8 {q0}, [%0]! \n" 65 "vld1.8 {q1}, [%1]! \n" 66 "subs %2, %2, #16 \n" 67 "vsubl.u8 q2, d0, d2 \n" 68 "vsubl.u8 q3, d1, d3 \n" 69 "vmlal.s16 q8, d4, d4 \n" 70 "vmlal.s16 q9, d6, d6 \n" 71 "vmlal.s16 q10, d5, d5 \n" 72 "vmlal.s16 q11, d7, d7 \n" 73 "bgt 1b \n" 77 74 78 "vadd.u32 q8, q8, q9 \n" 79 "vadd.u32 q10, q10, q11 \n" 80 "vadd.u32 q11, q8, q10 \n" 81 "vpaddl.u32 q1, q11 \n" 82 "vadd.u64 d0, d2, d3 \n" 83 "vmov.32 %3, d0[0] \n" 84 : "+r"(src_a), 85 "+r"(src_b), 86 "+r"(count), 87 "=r"(sse) 88 : 89 : "memory", "cc", "q0", "q1", "q2", "q3", "q8", "q9", "q10", "q11"); 75 "vadd.u32 q8, q8, q9 \n" 76 "vadd.u32 q10, q10, q11 \n" 77 "vadd.u32 q11, q8, q10 \n" 78 "vpaddl.u32 q1, q11 \n" 79 "vadd.u64 d0, d2, d3 \n" 80 "vmov.32 %3, d0[0] \n" 81 : "+r"(src_a), "+r"(src_b), "+r"(count), "=r"(sse) 82 : 83 : "memory", "cc", "q0", "q1", "q2", "q3", "q8", "q9", "q10", "q11"); 90 84 return sse; 91 85 }
Note: See TracChangeset
for help on using the changeset viewer.