Context Navigation

← Previous Change
Next Change →

row_win.cc

Timestamp:

Nov 21, 2017 9:25:11 AM (6 years ago)

Author:

riza

Message:

Close #2065: Update libyuv to fix linker error when building libyuv as dll on Visual Studio 2015.

File:

: 1 edited

pjproject/trunk/third_party/yuv/source/row_win.cc (modified) (39 diffs)

Legend:

: Unmodified
: Added
: Removed

pjproject/trunk/third_party/yuv/source/row_win.cc

-                      r5633
+                      r5699
     pavgb      xmm2, xmm4
     // step 2 - convert to U and V
     // from here down is very similar to Y code except
     // instead of 16 different pixels, its 8 pixels of U and 8 of V
+        // step 2 - convert to U and V
+        // from here down is very similar to Y code except
+        // instead of 16 different pixels, its 8 pixels of U and 8 of V
     movdqa     xmm1, xmm0
     movdqa     xmm3, xmm2
 …
     paddb      xmm0, xmm5  // -> unsigned
     // step 3 - store 8 U and 8 V values
+        // step 3 - store 8 U and 8 V values
     movlps     qword ptr [edx], xmm0  // U
     movhps     qword ptr [edx + edi], xmm0  // V
 …
     pavgb      xmm2, xmm4
     // step 2 - convert to U and V
     // from here down is very similar to Y code except
     // instead of 16 different pixels, its 8 pixels of U and 8 of V
+        // step 2 - convert to U and V
+        // from here down is very similar to Y code except
+        // instead of 16 different pixels, its 8 pixels of U and 8 of V
     movdqa     xmm1, xmm0
     movdqa     xmm3, xmm2
 …
     packsswb   xmm0, xmm1
     // step 3 - store 8 U and 8 V values
+        // step 3 - store 8 U and 8 V values
     movlps     qword ptr [edx], xmm0  // U
     movhps     qword ptr [edx + edi], xmm0  // V
 …
     vpavgb     ymm2, ymm2, ymm4  // mutated by vshufps
     // step 2 - convert to U and V
     // from here down is very similar to Y code except
     // instead of 32 different pixels, its 16 pixels of U and 16 of V
+        // step 2 - convert to U and V
+        // from here down is very similar to Y code except
+        // instead of 32 different pixels, its 16 pixels of U and 16 of V
     vpmaddubsw ymm1, ymm0, ymm7  // U
     vpmaddubsw ymm3, ymm2, ymm7
 …
     vpaddb     ymm0, ymm0, ymm5  // -> unsigned
     // step 3 - store 16 U and 16 V values
+        // step 3 - store 16 U and 16 V values
     vextractf128 [edx], ymm0, 0  // U
     vextractf128 [edx + edi], ymm0, 1  // V
 …
     vpavgb     ymm2, ymm2, ymm4  // mutated by vshufps
     // step 2 - convert to U and V
     // from here down is very similar to Y code except
     // instead of 32 different pixels, its 16 pixels of U and 16 of V
+        // step 2 - convert to U and V
+        // from here down is very similar to Y code except
+        // instead of 32 different pixels, its 16 pixels of U and 16 of V
     vpmaddubsw ymm1, ymm0, ymm7  // U
     vpmaddubsw ymm3, ymm2, ymm7
 …
     vpshufb    ymm0, ymm0, ymmword ptr kShufARGBToUV_AVX  // for vshufps/vphaddw
     // step 3 - store 16 U and 16 V values
+        // step 3 - store 16 U and 16 V values
     vextractf128 [edx], ymm0, 0  // U
     vextractf128 [edx + edi], ymm0, 1  // V
 …
     pavgb      xmm2, xmm4
     // step 2 - convert to U and V
     // from here down is very similar to Y code except
     // instead of 16 different pixels, its 8 pixels of U and 8 of V
+        // step 2 - convert to U and V
+        // from here down is very similar to Y code except
+        // instead of 16 different pixels, its 8 pixels of U and 8 of V
     movdqa     xmm1, xmm0
     movdqa     xmm3, xmm2
 …
     paddb      xmm0, xmm5  // -> unsigned
     // step 3 - store 8 U and 8 V values
+        // step 3 - store 8 U and 8 V values
     movlps     qword ptr [edx], xmm0  // U
     movhps     qword ptr [edx + edi], xmm0  // V
 …
     pavgb      xmm2, xmm4
     // step 2 - convert to U and V
     // from here down is very similar to Y code except
     // instead of 16 different pixels, its 8 pixels of U and 8 of V
+        // step 2 - convert to U and V
+        // from here down is very similar to Y code except
+        // instead of 16 different pixels, its 8 pixels of U and 8 of V
     movdqa     xmm1, xmm0
     movdqa     xmm3, xmm2
 …
     paddb      xmm0, xmm5  // -> unsigned
     // step 3 - store 8 U and 8 V values
+        // step 3 - store 8 U and 8 V values
     movlps     qword ptr [edx], xmm0  // U
     movhps     qword ptr [edx + edi], xmm0  // V
 …
     pavgb      xmm2, xmm4
     // step 2 - convert to U and V
     // from here down is very similar to Y code except
     // instead of 16 different pixels, its 8 pixels of U and 8 of V
+        // step 2 - convert to U and V
+        // from here down is very similar to Y code except
+        // instead of 16 different pixels, its 8 pixels of U and 8 of V
     movdqa     xmm1, xmm0
     movdqa     xmm3, xmm2
 …
     paddb      xmm0, xmm5  // -> unsigned
     // step 3 - store 8 U and 8 V values
+        // step 3 - store 8 U and 8 V values
     movlps     qword ptr [edx], xmm0  // U
     movhps     qword ptr [edx + edi], xmm0  // V
 …
     packuswb   xmm0, xmm0        // G
     // Step 2: Weave into ARGB
+        // Step 2: Weave into ARGB
     punpcklbw  xmm0, xmm0  // GG
     movdqa     xmm1, xmm0
 …
     vpackuswb  ymm0, ymm0, ymm0        // G.  still mutated: 3120
     // TODO(fbarchard): Weave alpha with unpack.
     // Step 2: Weave into ARGB
+        // TODO(fbarchard): Weave alpha with unpack.
+        // Step 2: Weave into ARGB
     vpunpcklbw ymm1, ymm0, ymm0  // GG - mutates
     vpermq     ymm1, ymm1, 0xd8
 …
     sub        edi, esi
     // 8 pixel loop.
+        // 8 pixel loop.
   convertloop8:
     movq       xmm0, qword ptr [esi]  // alpha
 …
     sub         edi, esi
     // 32 pixel loop.
+        // 32 pixel loop.
   convertloop32:
     vmovdqu     ymm0, [esi]  // alpha
 …
     jl         convertloop4b  // less than 4 pixels?
     // 4 pixel loop.
+        // 4 pixel loop.
   convertloop4:
     movdqu     xmm3, [eax]  // src argb
 …
     jl         convertloop1b
     // 1 pixel loop.
+        // 1 pixel loop.
   convertloop1:
     movd       xmm3, [eax]  // src argb
 …
     packssdw   xmm5, xmm5  // 16 bit shorts
     // 4 pixel loop small blocks.
+        // 4 pixel loop small blocks.
   s4:
         // top left
 …
     jmp        l4b
     // 4 pixel loop
+            // 4 pixel loop
   l4:
         // top left
 …
     jl         l1b
     // 1 pixel loop
+        // 1 pixel loop
   l1:
     movdqu     xmm0, [eax]
 …
     jne        l4b
     // 4 pixel loop
+        // 4 pixel loop
   l4:
     movdqu     xmm2, [eax]  // 4 argb pixels 16 bytes.
 …
     jl         l1b
     // 1 pixel loop
+        // 1 pixel loop
   l1:
     movd       xmm2, dword ptr [eax]  // 1 argb pixel 4 bytes.
 …
     jl         l4b
     // setup for 4 pixel loop
+        // setup for 4 pixel loop
     pshufd     xmm7, xmm7, 0x44  // dup dudv
     pshufd     xmm5, xmm5, 0  // dup 4, stride
 …
     addps      xmm4, xmm4  // dudv *= 4
     // 4 pixel loop
+        // 4 pixel loop
   l4:
     cvttps2dq  xmm0, xmm2  // x, y float to int first 2
 …
     jl         l1b
     // 1 pixel loop
+        // 1 pixel loop
   l1:
     cvttps2dq  xmm0, xmm2  // x, y float to int
 …
     jmp        xloop99
     // Blend 50 / 50.
+        // Blend 50 / 50.
  xloop50:
    vmovdqu    ymm0, [esi]
 …
    jmp        xloop99
     // Blend 100 / 0 - Copy row unchanged.
+        // Blend 100 / 0 - Copy row unchanged.
  xloop100:
    rep movsb
 …
     mov        eax, [esp + 8 + 20]  // source_y_fraction (0..255)
     sub        edi, esi
     // Dispatch to specialized filters if applicable.
+        // Dispatch to specialized filters if applicable.
     cmp        eax, 0
     je         xloop100  // 0 /256.  Blend 100 / 0.
 …
     jmp        xloop99
     // Blend 50 / 50.
+        // Blend 50 / 50.
   xloop50:
     movdqu     xmm0, [esi]
 …
     jmp        xloop99
     // Blend 100 / 0 - Copy row unchanged.
+        // Blend 100 / 0 - Copy row unchanged.
   xloop100:
     movdqu     xmm0, [esi]
 …
     je         shuf_2103
     // TODO(fbarchard): Use one source pointer and 3 offsets.
+        // TODO(fbarchard): Use one source pointer and 3 offsets.
   shuf_any1:
     movzx      ebx, byte ptr [esi]
 …
     pxor       xmm3, xmm3  // 0 constant for zero extending bytes to ints.
     // 2 pixel loop.
+        // 2 pixel loop.
  convertloop:
         //    pmovzxbd  xmm0, dword ptr [eax]  // BGRA pixel
 …
     sub        edx, eax
     // 8 pixel loop.
+        // 8 pixel loop.
  convertloop:
     movdqu      xmm2, xmmword ptr [eax]  // 8 shorts
 …
     sub        edx, eax
     // 16 pixel loop.
+        // 16 pixel loop.
  convertloop:
     vmovdqu     ymm2, [eax]  // 16 shorts
 …
     sub        edx, eax
     // 16 pixel loop.
+        // 16 pixel loop.
  convertloop:
     vpmovzxwd   ymm2, xmmword ptr [eax]  // 8 shorts -> 8 ints
 …
     pxor       xmm5, xmm5
     // 4 pixel loop.
+        // 4 pixel loop.
   convertloop:
     movdqu     xmm0, xmmword ptr [eax]  // generate luma ptr

Note: See TracChangeset for help on using the changeset viewer.

Context Navigation

Changeset 5699 for pjproject/trunk/third_party/yuv/source/row_win.cc

Legend:

pjproject/trunk/third_party/yuv/source/row_win.cc

Download in other formats: