swscale/x86/output: Remove obsolete MMXEXT function

Possible now that the SSE2 function is available
even when the stack is not aligned.

Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
This commit is contained in:
Andreas Rheinhardt
2026-04-10 04:23:23 +02:00
parent 8a7c1f7fb8
commit 761b6f2359
2 changed files with 11 additions and 61 deletions
+11 -52
View File
@@ -112,23 +112,10 @@ SECTION .text
;-----------------------------------------------------------------------------
%macro yuv2planeX_mainloop 2
.pixelloop_%2:
%assign %%i 0
; the rep here is for the 8-bit output MMX case, where dither covers
; 8 pixels but we can only handle 2 pixels per register, and thus 4
; pixels per iteration. In order to not have to keep track of where
; we are w.r.t. dithering, we unroll the MMX/8-bit loop x2.
%if %1 == 8
%assign %%repcnt 16/mmsize
%else
%assign %%repcnt 1
%endif
%rep %%repcnt
%if %1 == 8
%if ARCH_X86_32
mova m2, [rsp+mmsize*(0+%%i)]
mova m1, [rsp+mmsize*(1+%%i)]
mova m2, [rsp]
mova m1, [rsp+mmsize]
%else ; x86-64
mova m2, m8
mova m1, m_dith
@@ -142,7 +129,7 @@ SECTION .text
%else
movsx cntr_reg, fltsizem
%endif
.filterloop_%2_ %+ %%i:
.filterloop_%2:
; input pixels
mov r6, [srcq+gprsize*cntr_reg-2*gprsize]
%if %1 == 16
@@ -189,7 +176,7 @@ SECTION .text
%endif ; %1 == 8/9/10/16
sub cntr_reg, 2
jg .filterloop_%2_ %+ %%i
jg .filterloop_%2
%if %1 == 16
psrad m2, 31 - %1
@@ -210,10 +197,10 @@ SECTION .text
%else ; %1 == 9/10
%if cpuflag(sse4)
packusdw m2, m1
%else ; mmxext/sse2
%else ; sse2
packssdw m2, m1
pmaxsw m2, m6
%endif ; mmxext/sse2/sse4/avx
%endif ; sse2/sse4/avx
pminsw m2, [yuv2yuvX_%1_upper]
%endif ; %1 == 9/10/16
mov%2 [dstq+r5*2], m2
@@ -222,8 +209,6 @@ SECTION .text
add r5, mmsize/2
sub wd, mmsize/2
%assign %%i %%i+2
%endrep
jg .pixelloop_%2
%endmacro
@@ -267,14 +252,9 @@ cglobal yuv2planeX_%1, %3, 8, %2, -STACK_SIZE, filter, fltsize, src, dst, w, dit
movq m_dith, [ditherq] ; dither
test offsetd, offsetd
jz .no_rot
%if mmsize == 16
punpcklqdq m_dith, m_dith
psrldq m_dith, 3
%else
PALIGNR m_dith, m_dith, 3, m0
%endif ; mmsize == 16
.no_rot:
%if mmsize == 16
punpcklbw m_dith, m6
%if ARCH_X86_64
punpcklwd m8, m_dith, m6
@@ -289,45 +269,24 @@ cglobal yuv2planeX_%1, %3, 8, %2, -STACK_SIZE, filter, fltsize, src, dst, w, dit
mova [rsp+ 0], m5
mova [rsp+16], m_dith
%endif
%else ; mmsize == 8
punpcklbw m5, m_dith, m6
punpckhbw m_dith, m6
punpcklwd m4, m5, m6
punpckhwd m5, m6
punpcklwd m3, m_dith, m6
punpckhwd m_dith, m6
pslld m4, 12
pslld m5, 12
pslld m3, 12
pslld m_dith, 12
mova [rsp+ 0], m4
mova [rsp+ 8], m5
mova [rsp+16], m3
mova [rsp+24], m_dith
%endif ; mmsize == 8/16
%endif ; %1 == 8
xor r5, r5
%if mmsize == 8 || %1 == 8
%if %1 == 8
yuv2planeX_mainloop %1, a
%else ; mmsize == 16
%else ; %1 != 8
test dstq, 15
jnz .unaligned
yuv2planeX_mainloop %1, a
RET
.unaligned:
yuv2planeX_mainloop %1, u
%endif ; mmsize == 8/16
%endif ; %1 == 8
RET
%endmacro
%if ARCH_X86_32 && HAVE_ALIGNED_STACK == 0
INIT_MMX mmxext
yuv2planeX_fn 8, 0, 7
%endif
INIT_XMM sse2
yuv2planeX_fn 8, 10, 7
yuv2planeX_fn 9, 7, 5
@@ -368,12 +327,12 @@ yuv2planeX_fn 10, 7, 5
%if cpuflag(sse4) ; avx/sse4
packusdw m0, m1
packusdw m2, m3
%else ; mmx/sse2
%else ; sse2
packssdw m0, m1
packssdw m2, m3
paddw m0, m5
paddw m2, m5
%endif ; mmx/sse2/sse4/avx
%endif ; sse2/sse4/avx
mov%2 [dstq+wq*2+mmsize*0], m0
mov%2 [dstq+wq*2+mmsize*1], m2
%else ; %1 == 9/10
-9
View File
@@ -267,7 +267,6 @@ void ff_yuv2planeX_ ## size ## _ ## opt(const int16_t *filter, int filterSize, \
VSCALEX_FUNC(9, opt); \
VSCALEX_FUNC(10, opt)
VSCALEX_FUNC(8, mmxext);
VSCALEX_FUNCS(sse2);
VSCALEX_FUNCS(sse4);
VSCALEX_FUNC(16, sse4);
@@ -509,14 +508,6 @@ av_cold void ff_sws_init_swscale_x86(SwsInternal *c)
c->yuv2planeX = yuv2yuvX_avx2;
#endif
}
#if ARCH_X86_32 && !HAVE_ALIGNED_STACK
// The better yuv2planeX_8 functions need aligned stack on x86-32,
// so we use MMXEXT in this case if they are not available.
if (EXTERNAL_MMXEXT(cpu_flags)) {
if (c->dstBpc == 8 && !c->use_mmx_vfilter)
c->yuv2planeX = ff_yuv2planeX_8_mmxext;
}
#endif /* ARCH_X86_32 && !HAVE_ALIGNED_STACK */
#define ASSIGN_SCALE_FUNC2(hscalefn, filtersize, opt1, opt2) do { \
if (c->srcBpc == 8) { \