Changeset 5699 for pjproject/trunk/third_party/yuv/source/row_gcc.cc
- Timestamp:
- Nov 21, 2017 9:25:11 AM (6 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
pjproject/trunk/third_party/yuv/source/row_gcc.cc
r5633 r5699 39 39 127, -84, -43, 0, 127, -84, -43, 0}; 40 40 41 static vec8 kARGBToV = { 42 -18, -94, 112, 0, -18, -94, 112, 0, -18, -94, 112, 0, -18, -94, 112, 0, 43 }; 41 static vec8 kARGBToV = {-18, -94, 112, 0, -18, -94, 112, 0, 42 -18, -94, 112, 0, -18, -94, 112, 0}; 44 43 45 44 static vec8 kARGBToVJ = {-20, -107, 127, 0, -20, -107, 127, 0, … … 2754 2753 } 2755 2754 #endif // HAS_MERGEUVROW_SSE2 2755 2756 // Use scale to convert lsb formats to msb, depending how many bits there are: 2757 // 128 = 9 bits 2758 // 64 = 10 bits 2759 // 16 = 12 bits 2760 // 1 = 16 bits 2761 #ifdef HAS_MERGEUVROW_16_AVX2 2762 void MergeUVRow_16_AVX2(const uint16* src_u, 2763 const uint16* src_v, 2764 uint16* dst_uv, 2765 int scale, 2766 int width) { 2767 // clang-format off 2768 asm volatile ( 2769 "vmovd %4,%%xmm3 \n" 2770 "vpunpcklwd %%xmm3,%%xmm3,%%xmm3 \n" 2771 "vbroadcastss %%xmm3,%%ymm3 \n" 2772 "sub %0,%1 \n" 2773 2774 // 16 pixels per loop. 2775 LABELALIGN 2776 "1: \n" 2777 "vmovdqu (%0),%%ymm0 \n" 2778 "vmovdqu (%0,%1,1),%%ymm1 \n" 2779 "add $0x20,%0 \n" 2780 2781 "vpmullw %%ymm3,%%ymm0,%%ymm0 \n" 2782 "vpmullw %%ymm3,%%ymm1,%%ymm1 \n" 2783 "vpunpcklwd %%ymm1,%%ymm0,%%ymm2 \n" // mutates 2784 "vpunpckhwd %%ymm1,%%ymm0,%%ymm0 \n" 2785 "vextractf128 $0x0,%%ymm2,(%2) \n" 2786 "vextractf128 $0x0,%%ymm0,0x10(%2) \n" 2787 "vextractf128 $0x1,%%ymm2,0x20(%2) \n" 2788 "vextractf128 $0x1,%%ymm0,0x30(%2) \n" 2789 "add $0x40,%2 \n" 2790 "sub $0x10,%3 \n" 2791 "jg 1b \n" 2792 "vzeroupper \n" 2793 : "+r"(src_u), // %0 2794 "+r"(src_v), // %1 2795 "+r"(dst_uv), // %2 2796 "+r"(width) // %3 2797 : "r"(scale) // %4 2798 : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3"); 2799 // clang-format on 2800 } 2801 #endif // HAS_MERGEUVROW_AVX2 2802 2803 #ifdef HAS_MULTIPLYROW_16_AVX2 2804 void MultiplyRow_16_AVX2(const uint16* src_y, 2805 uint16* dst_y, 2806 int scale, 2807 int width) { 2808 // clang-format off 2809 asm volatile ( 2810 "vmovd %3,%%xmm3 \n" 2811 "vpunpcklwd %%xmm3,%%xmm3,%%xmm3 \n" 2812 "vbroadcastss %%xmm3,%%ymm3 \n" 2813 "sub %0,%1 \n" 2814 2815 // 16 pixels per loop. 2816 LABELALIGN 2817 "1: \n" 2818 "vmovdqu (%0),%%ymm0 \n" 2819 "vmovdqu 0x20(%0),%%ymm1 \n" 2820 "vpmullw %%ymm3,%%ymm0,%%ymm0 \n" 2821 "vpmullw %%ymm3,%%ymm1,%%ymm1 \n" 2822 "vmovdqu %%ymm0,(%0,%1) \n" 2823 "vmovdqu %%ymm1,0x20(%0,%1) \n" 2824 "add $0x40,%0 \n" 2825 "sub $0x20,%2 \n" 2826 "jg 1b \n" 2827 "vzeroupper \n" 2828 : "+r"(src_y), // %0 2829 "+r"(dst_y), // %1 2830 "+r"(width) // %2 2831 : "r"(scale) // %3 2832 : "memory", "cc", "xmm0", "xmm1", "xmm3"); 2833 // clang-format on 2834 } 2835 #endif // HAS_MULTIPLYROW_16_AVX2 2836 2837 #ifdef HAS_SPLITRGBROW_SSSE3 2838 2839 // Shuffle table for converting RGB to Planar. 2840 static uvec8 kShuffleMaskRGBToR0 = {0u, 3u, 6u, 9u, 12u, 15u, 2841 128u, 128u, 128u, 128u, 128u, 128u, 2842 128u, 128u, 128u, 128u}; 2843 static uvec8 kShuffleMaskRGBToR1 = {128u, 128u, 128u, 128u, 128u, 128u, 2844 2u, 5u, 8u, 11u, 14u, 128u, 2845 128u, 128u, 128u, 128u}; 2846 static uvec8 kShuffleMaskRGBToR2 = {128u, 128u, 128u, 128u, 128u, 128u, 2847 128u, 128u, 128u, 128u, 128u, 1u, 2848 4u, 7u, 10u, 13u}; 2849 2850 static uvec8 kShuffleMaskRGBToG0 = {1u, 4u, 7u, 10u, 13u, 128u, 2851 128u, 128u, 128u, 128u, 128u, 128u, 2852 128u, 128u, 128u, 128u}; 2853 static uvec8 kShuffleMaskRGBToG1 = {128u, 128u, 128u, 128u, 128u, 0u, 2854 3u, 6u, 9u, 12u, 15u, 128u, 2855 128u, 128u, 128u, 128u}; 2856 static uvec8 kShuffleMaskRGBToG2 = {128u, 128u, 128u, 128u, 128u, 128u, 2857 128u, 128u, 128u, 128u, 128u, 2u, 2858 5u, 8u, 11u, 14u}; 2859 2860 static uvec8 kShuffleMaskRGBToB0 = {2u, 5u, 8u, 11u, 14u, 128u, 2861 128u, 128u, 128u, 128u, 128u, 128u, 2862 128u, 128u, 128u, 128u}; 2863 static uvec8 kShuffleMaskRGBToB1 = {128u, 128u, 128u, 128u, 128u, 1u, 2864 4u, 7u, 10u, 13u, 128u, 128u, 2865 128u, 128u, 128u, 128u}; 2866 static uvec8 kShuffleMaskRGBToB2 = {128u, 128u, 128u, 128u, 128u, 128u, 2867 128u, 128u, 128u, 128u, 0u, 3u, 2868 6u, 9u, 12u, 15u}; 2869 2870 void SplitRGBRow_SSSE3(const uint8* src_rgb, 2871 uint8* dst_r, 2872 uint8* dst_g, 2873 uint8* dst_b, 2874 int width) { 2875 asm volatile ( 2876 LABELALIGN 2877 "1: \n" 2878 "movdqu " MEMACCESS(0) ",%%xmm0 \n" 2879 "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n" 2880 "movdqu " MEMACCESS2(0x20,0) ",%%xmm2 \n" 2881 "pshufb %5, %%xmm0 \n" 2882 "pshufb %6, %%xmm1 \n" 2883 "pshufb %7, %%xmm2 \n" 2884 "por %%xmm1,%%xmm0 \n" 2885 "por %%xmm2,%%xmm0 \n" 2886 "movdqu %%xmm0," MEMACCESS(1) " \n" 2887 "lea " MEMLEA(0x10,1) ",%1 \n" 2888 2889 "movdqu " MEMACCESS(0) ",%%xmm0 \n" 2890 "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n" 2891 "movdqu " MEMACCESS2(0x20,0) ",%%xmm2 \n" 2892 "pshufb %8, %%xmm0 \n" 2893 "pshufb %9, %%xmm1 \n" 2894 "pshufb %10, %%xmm2 \n" 2895 "por %%xmm1,%%xmm0 \n" 2896 "por %%xmm2,%%xmm0 \n" 2897 "movdqu %%xmm0," MEMACCESS(2) " \n" 2898 "lea " MEMLEA(0x10,2) ",%2 \n" 2899 2900 "movdqu " MEMACCESS(0) ",%%xmm0 \n" 2901 "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n" 2902 "movdqu " MEMACCESS2(0x20,0) ",%%xmm2 \n" 2903 "pshufb %11, %%xmm0 \n" 2904 "pshufb %12, %%xmm1 \n" 2905 "pshufb %13, %%xmm2 \n" 2906 "por %%xmm1,%%xmm0 \n" 2907 "por %%xmm2,%%xmm0 \n" 2908 "movdqu %%xmm0," MEMACCESS(3) " \n" 2909 "lea " MEMLEA(0x10,3) ",%3 \n" 2910 "lea " MEMLEA(0x30,0) ",%0 \n" 2911 "sub $0x10,%4 \n" 2912 "jg 1b \n" 2913 : "+r"(src_rgb), // %0 2914 "+r"(dst_r), // %1 2915 "+r"(dst_g), // %2 2916 "+r"(dst_b), // %3 2917 "+r"(width) // %4 2918 : "m"(kShuffleMaskRGBToR0), // %5 2919 "m"(kShuffleMaskRGBToR1), // %6 2920 "m"(kShuffleMaskRGBToR2), // %7 2921 "m"(kShuffleMaskRGBToG0), // %8 2922 "m"(kShuffleMaskRGBToG1), // %9 2923 "m"(kShuffleMaskRGBToG2), // %10 2924 "m"(kShuffleMaskRGBToB0), // %11 2925 "m"(kShuffleMaskRGBToB1), // %12 2926 "m"(kShuffleMaskRGBToB2) // %13 2927 : "memory", "cc", NACL_R14 2928 "xmm0", "xmm1", "xmm2" 2929 ); 2930 } 2931 #endif // HAS_SPLITRGBROW_SSSE3 2932 2933 #ifdef HAS_MERGERGBROW_SSSE3 2934 2935 // Shuffle table for converting RGB to Planar. 2936 static uvec8 kShuffleMaskRToRGB0 = {0u, 128u, 128u, 1u, 128u, 128u, 2937 2u, 128u, 128u, 3u, 128u, 128u, 2938 4u, 128u, 128u, 5u}; 2939 static uvec8 kShuffleMaskGToRGB0 = {128u, 0u, 128u, 128u, 1u, 128u, 2940 128u, 2u, 128u, 128u, 3u, 128u, 2941 128u, 4u, 128u, 128u}; 2942 static uvec8 kShuffleMaskBToRGB0 = {128u, 128u, 0u, 128u, 128u, 1u, 2943 128u, 128u, 2u, 128u, 128u, 3u, 2944 128u, 128u, 4u, 128u}; 2945 2946 static uvec8 kShuffleMaskGToRGB1 = {5u, 128u, 128u, 6u, 128u, 128u, 2947 7u, 128u, 128u, 8u, 128u, 128u, 2948 9u, 128u, 128u, 10u}; 2949 static uvec8 kShuffleMaskBToRGB1 = {128u, 5u, 128u, 128u, 6u, 128u, 2950 128u, 7u, 128u, 128u, 8u, 128u, 2951 128u, 9u, 128u, 128u}; 2952 static uvec8 kShuffleMaskRToRGB1 = {128u, 128u, 6u, 128u, 128u, 7u, 2953 128u, 128u, 8u, 128u, 128u, 9u, 2954 128u, 128u, 10u, 128u}; 2955 2956 static uvec8 kShuffleMaskBToRGB2 = {10u, 128u, 128u, 11u, 128u, 128u, 2957 12u, 128u, 128u, 13u, 128u, 128u, 2958 14u, 128u, 128u, 15u}; 2959 static uvec8 kShuffleMaskRToRGB2 = {128u, 11u, 128u, 128u, 12u, 128u, 2960 128u, 13u, 128u, 128u, 14u, 128u, 2961 128u, 15u, 128u, 128u}; 2962 static uvec8 kShuffleMaskGToRGB2 = {128u, 128u, 11u, 128u, 128u, 12u, 2963 128u, 128u, 13u, 128u, 128u, 14u, 2964 128u, 128u, 15u, 128u}; 2965 2966 void MergeRGBRow_SSSE3(const uint8* src_r, 2967 const uint8* src_g, 2968 const uint8* src_b, 2969 uint8* dst_rgb, 2970 int width) { 2971 asm volatile ( 2972 LABELALIGN 2973 "1: \n" 2974 "movdqu " MEMACCESS(0) ",%%xmm0 \n" 2975 "movdqu " MEMACCESS(1) ",%%xmm1 \n" 2976 "movdqu " MEMACCESS(2) ",%%xmm2 \n" 2977 "pshufb %5, %%xmm0 \n" 2978 "pshufb %6, %%xmm1 \n" 2979 "pshufb %7, %%xmm2 \n" 2980 "por %%xmm1,%%xmm0 \n" 2981 "por %%xmm2,%%xmm0 \n" 2982 "movdqu %%xmm0," MEMACCESS(3) " \n" 2983 2984 "movdqu " MEMACCESS(0) ",%%xmm0 \n" 2985 "movdqu " MEMACCESS(1) ",%%xmm1 \n" 2986 "movdqu " MEMACCESS(2) ",%%xmm2 \n" 2987 "pshufb %8, %%xmm0 \n" 2988 "pshufb %9, %%xmm1 \n" 2989 "pshufb %10, %%xmm2 \n" 2990 "por %%xmm1,%%xmm0 \n" 2991 "por %%xmm2,%%xmm0 \n" 2992 "movdqu %%xmm0," MEMACCESS2(16, 3) " \n" 2993 2994 "movdqu " MEMACCESS(0) ",%%xmm0 \n" 2995 "movdqu " MEMACCESS(1) ",%%xmm1 \n" 2996 "movdqu " MEMACCESS(2) ",%%xmm2 \n" 2997 "pshufb %11, %%xmm0 \n" 2998 "pshufb %12, %%xmm1 \n" 2999 "pshufb %13, %%xmm2 \n" 3000 "por %%xmm1,%%xmm0 \n" 3001 "por %%xmm2,%%xmm0 \n" 3002 "movdqu %%xmm0," MEMACCESS2(32, 3) " \n" 3003 3004 "lea " MEMLEA(0x10,0) ",%0 \n" 3005 "lea " MEMLEA(0x10,1) ",%1 \n" 3006 "lea " MEMLEA(0x10,2) ",%2 \n" 3007 "lea " MEMLEA(0x30,3) ",%3 \n" 3008 "sub $0x10,%4 \n" 3009 "jg 1b \n" 3010 : "+r"(src_r), // %0 3011 "+r"(src_g), // %1 3012 "+r"(src_b), // %2 3013 "+r"(dst_rgb), // %3 3014 "+r"(width) // %4 3015 : "m"(kShuffleMaskRToRGB0), // %5 3016 "m"(kShuffleMaskGToRGB0), // %6 3017 "m"(kShuffleMaskBToRGB0), // %7 3018 "m"(kShuffleMaskRToRGB1), // %8 3019 "m"(kShuffleMaskGToRGB1), // %9 3020 "m"(kShuffleMaskBToRGB1), // %10 3021 "m"(kShuffleMaskRToRGB2), // %11 3022 "m"(kShuffleMaskGToRGB2), // %12 3023 "m"(kShuffleMaskBToRGB2) // %13 3024 : "memory", "cc", NACL_R14 3025 "xmm0", "xmm1", "xmm2" 3026 ); 3027 } 3028 #endif // HAS_MERGERGBROW_SSSE3 2756 3029 2757 3030 #ifdef HAS_COPYROW_SSE2 … … 5454 5727 static float kScaleBias = 1.9259299444e-34f; 5455 5728 void HalfFloatRow_SSE2(const uint16* src, uint16* dst, float scale, int width) { 5729 scale *= kScaleBias; 5456 5730 asm volatile ( 5457 5731 "pshufd $0x0,%3,%%xmm4 \n" … … 5480 5754 "+r"(dst), // %1 5481 5755 "+r"(width) // %2 5482 : "x"(scale * kScaleBias) // %3 5756 #if defined(__x86_64__) 5757 : "x"(scale) // %3 5758 #else 5759 : "m"(scale) // %3 5760 #endif 5483 5761 : "memory", "cc", 5484 5762 "xmm2", "xmm3", "xmm4", "xmm5" … … 5489 5767 #ifdef HAS_HALFFLOATROW_AVX2 5490 5768 void HalfFloatRow_AVX2(const uint16* src, uint16* dst, float scale, int width) { 5769 scale *= kScaleBias; 5491 5770 asm volatile ( 5492 5771 "vbroadcastss %3, %%ymm4 \n" … … 5516 5795 "+r"(dst), // %1 5517 5796 "+r"(width) // %2 5518 : "x"(scale * kScaleBias) // %3 5797 #if defined(__x86_64__) 5798 : "x"(scale) // %3 5799 #else 5800 : "m"(scale) // %3 5801 #endif 5519 5802 : "memory", "cc", 5520 5803 "xmm2", "xmm3", "xmm4", "xmm5" … … 5549 5832 "+r"(dst), // %1 5550 5833 "+r"(width) // %2 5834 #if defined(__x86_64__) 5551 5835 : "x"(scale) // %3 5836 #else 5837 : "m"(scale) // %3 5838 #endif 5552 5839 : "memory", "cc", 5553 5840 "xmm2", "xmm3", "xmm4"
Note: See TracChangeset
for help on using the changeset viewer.