Changeset 5633 for pjproject/trunk/third_party/yuv/source/compare_neon64.cc
- Timestamp:
- Jul 28, 2017 2:51:44 AM (7 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
pjproject/trunk/third_party/yuv/source/compare_neon64.cc
r5358 r5633 21 21 #if !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__) 22 22 23 // 256 bits at a time 24 // uses short accumulator which restricts count to 131 KB 25 uint32 HammingDistance_NEON(const uint8* src_a, const uint8* src_b, int count) { 26 uint32 diff; 27 asm volatile ( 28 "movi v4.8h, #0 \n" 29 30 "1: \n" 31 "ld1 {v0.16b, v1.16b}, [%0], #32 \n" 32 "ld1 {v2.16b, v3.16b}, [%1], #32 \n" 33 "eor v0.16b, v0.16b, v2.16b \n" 34 "eor v1.16b, v1.16b, v3.16b \n" 35 "cnt v0.16b, v0.16b \n" 36 "cnt v1.16b, v1.16b \n" 37 "subs %w2, %w2, #32 \n" 38 "add v0.16b, v0.16b, v1.16b \n" 39 "uadalp v4.8h, v0.16b \n" 40 "b.gt 1b \n" 41 42 "uaddlv s4, v4.8h \n" 43 "fmov %w3, s4 \n" 44 : "+r"(src_a), 45 "+r"(src_b), 46 "+r"(count), 47 "=r"(diff) 48 : 49 : "cc", "v0", "v1", "v2", "v3", "v4"); 50 return diff; 51 } 52 23 53 uint32 SumSquareError_NEON(const uint8* src_a, const uint8* src_b, int count) { 24 volatileuint32 sse;54 uint32 sse; 25 55 asm volatile ( 26 56 "eor v16.16b, v16.16b, v16.16b \n" … … 30 60 31 61 "1: \n" 32 MEMACCESS(0)33 62 "ld1 {v0.16b}, [%0], #16 \n" 34 MEMACCESS(1)35 63 "ld1 {v1.16b}, [%1], #16 \n" 36 64 "subs %w2, %w2, #16 \n"
Note: See TracChangeset
for help on using the changeset viewer.