x86: move XOP emulation code back to x86inc

Only two functions that use xop multiply-accumulate instructions where the
first operand is the same as the fourth actually took advantage of the macros.

This further reduces differences with x264's x86inc.

Reviewed-by: Ronald S. Bultje <rsbultje@gmail.com>
Signed-off-by: James Almer <jamrial@gmail.com>
This commit is contained in:
James Almer
2015-08-03 03:28:37 -03:00
parent 2ca0ed9cfd
commit 5750d6c5e9
4 changed files with 31 additions and 20 deletions
+6 -1
View File
@@ -176,7 +176,12 @@ cglobal resample_common_%1, 1, 7, 2, ctx, phase_shift, dst, frac, \
.inner_loop:
movu m1, [srcq+min_filter_count_x4q*1]
%ifidn %1, int16
PMADCSWD m0, m1, [filterq+min_filter_count_x4q*1], m0, m1
%if cpuflag(xop)
vpmadcswd m0, m1, [filterq+min_filter_count_x4q*1], m0
%else
pmaddwd m1, [filterq+min_filter_count_x4q*1]
paddd m0, m1
%endif
%else ; float/double
%if cpuflag(fma4) || cpuflag(fma3)
fmaddp%4 m0, m1, [filterq+min_filter_count_x4q*1], m0