x86: move XOP emulation code back to x86inc
Only two functions that use xop multiply-accumulate instructions where the first operand is the same as the fourth actually took advantage of the macros. This further reduces differences with x264's x86inc. Reviewed-by: Ronald S. Bultje <rsbultje@gmail.com> Signed-off-by: James Almer <jamrial@gmail.com>
This commit is contained in:
@@ -176,7 +176,12 @@ cglobal resample_common_%1, 1, 7, 2, ctx, phase_shift, dst, frac, \
|
||||
.inner_loop:
|
||||
movu m1, [srcq+min_filter_count_x4q*1]
|
||||
%ifidn %1, int16
|
||||
PMADCSWD m0, m1, [filterq+min_filter_count_x4q*1], m0, m1
|
||||
%if cpuflag(xop)
|
||||
vpmadcswd m0, m1, [filterq+min_filter_count_x4q*1], m0
|
||||
%else
|
||||
pmaddwd m1, [filterq+min_filter_count_x4q*1]
|
||||
paddd m0, m1
|
||||
%endif
|
||||
%else ; float/double
|
||||
%if cpuflag(fma4) || cpuflag(fma3)
|
||||
fmaddp%4 m0, m1, [filterq+min_filter_count_x4q*1], m0
|
||||
|
||||
Reference in New Issue
Block a user