Compare commits
1 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| c361fa9e21 |
@@ -1,23 +0,0 @@
|
||||
exclude: ^tests/ref/
|
||||
|
||||
repos:
|
||||
- repo: https://github.com/pre-commit/pre-commit-hooks
|
||||
rev: v5.0.0
|
||||
hooks:
|
||||
- id: check-case-conflict
|
||||
- id: check-executables-have-shebangs
|
||||
- id: check-illegal-windows-names
|
||||
- id: check-shebang-scripts-are-executable
|
||||
- id: check-yaml
|
||||
- id: end-of-file-fixer
|
||||
- id: fix-byte-order-marker
|
||||
- id: mixed-line-ending
|
||||
- id: trailing-whitespace
|
||||
- repo: local
|
||||
hooks:
|
||||
- id: aarch64-asm-indent
|
||||
name: fix aarch64 assembly indentation
|
||||
files: ^.*/aarch64/.*\.S$
|
||||
language: script
|
||||
entry: ./tools/check_arm_indent.sh --apply
|
||||
pass_filenames: false
|
||||
@@ -1,29 +0,0 @@
|
||||
name: Lint
|
||||
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- release/4.4
|
||||
pull_request:
|
||||
|
||||
jobs:
|
||||
lint:
|
||||
name: Pre-Commit
|
||||
runs-on: utilities
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
- name: Install pre-commit CI
|
||||
id: install
|
||||
run: |
|
||||
python3 -m venv ~/pre-commit
|
||||
~/pre-commit/bin/pip install --upgrade pip setuptools
|
||||
~/pre-commit/bin/pip install pre-commit
|
||||
echo "envhash=$({ python3 --version && cat .forgejo/pre-commit/config.yaml; } | sha256sum | cut -d' ' -f1)" >> $FORGEJO_OUTPUT
|
||||
- name: Cache
|
||||
uses: actions/cache@v4
|
||||
with:
|
||||
path: ~/.cache/pre-commit
|
||||
key: pre-commit-${{ steps.install.outputs.envhash }}
|
||||
- name: Run pre-commit CI
|
||||
run: ~/pre-commit/bin/pre-commit run -c .forgejo/pre-commit/config.yaml --show-diff-on-failure --color=always --all-files
|
||||
@@ -1,80 +0,0 @@
|
||||
name: Test
|
||||
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- release/4.4
|
||||
pull_request:
|
||||
|
||||
jobs:
|
||||
run_fate:
|
||||
name: Fate (${{ matrix.runner }}, ${{ matrix.shared }}, ${{ matrix.bits }} bit)
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
runner: [linux-aarch64]
|
||||
shared: ['static']
|
||||
bits: ['64']
|
||||
include:
|
||||
- runner: linux-amd64
|
||||
shared: 'static'
|
||||
bits: '32'
|
||||
- runner: linux-amd64
|
||||
shared: 'shared'
|
||||
bits: '64'
|
||||
runs-on: ${{ matrix.runner }}
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
- name: Configure
|
||||
run: |
|
||||
./configure --enable-gpl --enable-nonfree --enable-memory-poisoning --assert-level=2 \
|
||||
$([ "${{ matrix.bits }}" != "32" ] || echo --arch=x86_32 --extra-cflags=-m32 --extra-cxxflags=-m32 --extra-ldflags=-m32) \
|
||||
$([ "${{ matrix.shared }}" != "shared" ] || echo --enable-shared --disable-static) \
|
||||
|| CFGRES=$? && CFGRES=$?
|
||||
cat ffbuild/config.log
|
||||
exit $CFGRES
|
||||
- name: Build
|
||||
run: make -j$(nproc)
|
||||
- name: Restore Cached Fate-Suite
|
||||
id: cache
|
||||
uses: actions/cache/restore@v4
|
||||
with:
|
||||
path: fate-suite
|
||||
key: fate-suite
|
||||
restore-keys: |
|
||||
fate-suite-
|
||||
- name: Sync Fate-Suite
|
||||
id: fate
|
||||
run: |
|
||||
make fate-rsync SAMPLES=$PWD/fate-suite
|
||||
echo "hash=$(find fate-suite -type f -printf "%P %s %T@\n" | sort | sha256sum | cut -d' ' -f1)" >> $FORGEJO_OUTPUT
|
||||
- name: Cache Fate-Suite
|
||||
uses: actions/cache/save@v4
|
||||
if: ${{ format('fate-suite-{0}', steps.fate.outputs.hash) != steps.cache.outputs.cache-matched-key }}
|
||||
with:
|
||||
path: fate-suite
|
||||
key: fate-suite-${{ steps.fate.outputs.hash }}
|
||||
- name: Run Fate
|
||||
run: LD_LIBRARY_PATH="$(printf "%s:" "$PWD"/lib*)$PWD" make fate fate-build SAMPLES=$PWD/fate-suite -j$(nproc)
|
||||
compile_only:
|
||||
name: Fate (Win64, Build-Only)
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
image: ["ghcr.io/btbn/ffmpeg-builds/win64-gpl-4.4:latest"]
|
||||
runs-on: linux-amd64
|
||||
container: ${{ matrix.image }}
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
- name: Configure
|
||||
run: |
|
||||
./configure --pkg-config-flags="--static" $FFBUILD_TARGET_FLAGS $FF_CONFIGURE \
|
||||
--cc="$CC" --cxx="$CXX" --ar="$AR" --ranlib="$RANLIB" --nm="$NM" \
|
||||
--extra-cflags="$FF_CFLAGS" --extra-cxxflags="$FF_CXXFLAGS" \
|
||||
--extra-libs="$FF_LIBS" --extra-ldflags="$FF_LDFLAGS" --extra-ldexeflags="$FF_LDEXEFLAGS"
|
||||
- name: Build
|
||||
run: make -j$(nproc)
|
||||
- name: Run Fate
|
||||
run: make -j$(nproc) fate-build
|
||||
+9
-9
@@ -55,7 +55,7 @@ modified by someone else and passed on, the recipients should know
|
||||
that what they have is not the original version, so that the original
|
||||
author's reputation will not be affected by problems that might be
|
||||
introduced by others.
|
||||
|
||||
|
||||
Finally, software patents pose a constant threat to the existence of
|
||||
any free program. We wish to make sure that a company cannot
|
||||
effectively restrict the users of a free program by obtaining a
|
||||
@@ -111,7 +111,7 @@ modification follow. Pay close attention to the difference between a
|
||||
"work based on the library" and a "work that uses the library". The
|
||||
former contains code derived from the library, whereas the latter must
|
||||
be combined with the library in order to run.
|
||||
|
||||
|
||||
GNU LESSER GENERAL PUBLIC LICENSE
|
||||
TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
|
||||
|
||||
@@ -158,7 +158,7 @@ Library.
|
||||
You may charge a fee for the physical act of transferring a copy,
|
||||
and you may at your option offer warranty protection in exchange for a
|
||||
fee.
|
||||
|
||||
|
||||
2. You may modify your copy or copies of the Library or any portion
|
||||
of it, thus forming a work based on the Library, and copy and
|
||||
distribute such modifications or work under the terms of Section 1
|
||||
@@ -216,7 +216,7 @@ instead of to this License. (If a newer version than version 2 of the
|
||||
ordinary GNU General Public License has appeared, then you can specify
|
||||
that version instead if you wish.) Do not make any other change in
|
||||
these notices.
|
||||
|
||||
|
||||
Once this change is made in a given copy, it is irreversible for
|
||||
that copy, so the ordinary GNU General Public License applies to all
|
||||
subsequent copies and derivative works made from that copy.
|
||||
@@ -267,7 +267,7 @@ Library will still fall under Section 6.)
|
||||
distribute the object code for the work under the terms of Section 6.
|
||||
Any executables containing that work also fall under Section 6,
|
||||
whether or not they are linked directly with the Library itself.
|
||||
|
||||
|
||||
6. As an exception to the Sections above, you may also combine or
|
||||
link a "work that uses the Library" with the Library to produce a
|
||||
work containing portions of the Library, and distribute that work
|
||||
@@ -329,7 +329,7 @@ restrictions of other proprietary libraries that do not normally
|
||||
accompany the operating system. Such a contradiction means you cannot
|
||||
use both them and the Library together in an executable that you
|
||||
distribute.
|
||||
|
||||
|
||||
7. You may place library facilities that are a work based on the
|
||||
Library side-by-side in a single library together with other library
|
||||
facilities not covered by this License, and distribute such a combined
|
||||
@@ -370,7 +370,7 @@ subject to these terms and conditions. You may not impose any further
|
||||
restrictions on the recipients' exercise of the rights granted herein.
|
||||
You are not responsible for enforcing compliance by third parties with
|
||||
this License.
|
||||
|
||||
|
||||
11. If, as a consequence of a court judgment or allegation of patent
|
||||
infringement or for any other reason (not limited to patent issues),
|
||||
conditions are imposed on you (whether by court order, agreement or
|
||||
@@ -422,7 +422,7 @@ conditions either of that version or of any later version published by
|
||||
the Free Software Foundation. If the Library does not specify a
|
||||
license version number, you may choose any version ever published by
|
||||
the Free Software Foundation.
|
||||
|
||||
|
||||
14. If you wish to incorporate parts of the Library into other free
|
||||
programs whose distribution conditions are incompatible with these,
|
||||
write to the author to ask for permission. For software which is
|
||||
@@ -456,7 +456,7 @@ SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
|
||||
DAMAGES.
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
||||
|
||||
How to Apply These Terms to Your New Libraries
|
||||
|
||||
If you develop a new library, and you want it to be of the greatest
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
See the Git history of the project (https://git.ffmpeg.org/ffmpeg) to
|
||||
See the Git history of the project (git://source.ffmpeg.org/ffmpeg) to
|
||||
get the names of people who have contributed to FFmpeg.
|
||||
|
||||
To check the log, you can type the command "git log" in the FFmpeg
|
||||
source directory, or browse the online repository at
|
||||
https://git.ffmpeg.org/ffmpeg
|
||||
http://source.ffmpeg.org.
|
||||
|
||||
@@ -15,11 +15,3 @@ NOTICE
|
||||
------
|
||||
|
||||
- Non system dependencies (e.g. libx264, libvpx) are disabled by default.
|
||||
|
||||
NOTICE for Package Maintainers
|
||||
------------------------------
|
||||
|
||||
- It is recommended to build FFmpeg twice, first with minimal external dependencies so
|
||||
that 3rd party packages, which depend on FFmpegs libavutil/libavfilter/libavcodec/libavformat
|
||||
can then be built. And last build FFmpeg with full dependancies (which may in turn depend on
|
||||
some of these 3rd party packages). This avoids circular dependencies during build.
|
||||
|
||||
+3
-6
@@ -583,12 +583,10 @@ wm4
|
||||
Releases
|
||||
========
|
||||
|
||||
7.0 Michael Niedermayer
|
||||
6.1 Michael Niedermayer
|
||||
5.1 Michael Niedermayer
|
||||
4.4 Michael Niedermayer
|
||||
3.4 Michael Niedermayer
|
||||
2.8 Michael Niedermayer
|
||||
2.7 Michael Niedermayer
|
||||
2.6 Michael Niedermayer
|
||||
2.5 Michael Niedermayer
|
||||
|
||||
If you want to maintain an older release, please contact us
|
||||
|
||||
@@ -617,7 +615,6 @@ Jean Delvare 7CA6 9F44 60F1 BDC4 1FD2 C858 A552 6B9B B3CD 4E6A
|
||||
Loren Merritt ABD9 08F4 C920 3F65 D8BE 35D7 1540 DAA7 060F 56DE
|
||||
Lynne FE50 139C 6805 72CA FD52 1F8D A2FE A5F0 3F03 4464
|
||||
Michael Niedermayer 9FF2 128B 147E F673 0BAD F133 611E C787 040B 0FAB
|
||||
DD1E C9E8 DE08 5C62 9B3E 1846 B18E 8928 B394 8D64
|
||||
Nicolas George 24CE 01CE 9ACC 5CEB 74D8 8D9D B063 D997 36E5 4C93
|
||||
Nikolay Aleksandrov 8978 1D8C FB71 588E 4B27 EAA8 C4F0 B5FC E011 13B1
|
||||
Panagiotis Issaris 6571 13A3 33D9 3726 F728 AA98 F643 B12E ECF3 E029
|
||||
|
||||
@@ -1,15 +0,0 @@
|
||||
|
||||
┌────────────────────────────────────┐
|
||||
│ RELEASE NOTES for FFmpeg 4.4 "Rao" │
|
||||
└────────────────────────────────────┘
|
||||
|
||||
The FFmpeg Project proudly presents FFmpeg 4.4 "Rao", about 10
|
||||
months after the release of FFmpeg 4.3.
|
||||
|
||||
A complete Changelog is available at the root of the project, and the
|
||||
complete Git history on https://git.ffmpeg.org/gitweb/ffmpeg.git
|
||||
|
||||
We hope you will like this release as much as we enjoyed working on it, and
|
||||
as usual, if you have any questions about it, or any FFmpeg related topic,
|
||||
feel free to join us on the #ffmpeg IRC channel (on irc.libera.chat) or ask
|
||||
on the mailing-lists.
|
||||
@@ -416,9 +416,7 @@ Advanced options (experts only):
|
||||
--enable-hardcoded-tables use hardcoded tables instead of runtime generation
|
||||
--disable-safe-bitstream-reader
|
||||
disable buffer boundary checking in bitreaders
|
||||
(This disables some security checks and can cause undefined behavior,
|
||||
crashes and arbitrary code execution, it may be faster, but
|
||||
should only be used with trusted input)
|
||||
(faster, but may crash)
|
||||
--sws-max-filter-size=N the max filter size swscale uses [$sws_max_filter_size_default]
|
||||
|
||||
Optimization options (experts only):
|
||||
@@ -538,7 +536,7 @@ die(){
|
||||
|
||||
If you think configure made a mistake, make sure you are using the latest
|
||||
version from Git. If the latest version fails, report the problem to the
|
||||
ffmpeg-user@ffmpeg.org mailing list or IRC #ffmpeg on irc.libera.chat.
|
||||
ffmpeg-user@ffmpeg.org mailing list or IRC #ffmpeg on irc.freenode.net.
|
||||
EOF
|
||||
if disabled logging; then
|
||||
cat <<EOF
|
||||
@@ -1737,6 +1735,7 @@ EXTERNAL_LIBRARY_GPL_LIST="
|
||||
EXTERNAL_LIBRARY_NONFREE_LIST="
|
||||
decklink
|
||||
libfdk_aac
|
||||
openssl
|
||||
libtls
|
||||
"
|
||||
|
||||
@@ -1828,7 +1827,6 @@ EXTERNAL_LIBRARY_LIST="
|
||||
mediacodec
|
||||
openal
|
||||
opengl
|
||||
openssl
|
||||
pocketsphinx
|
||||
vapoursynth
|
||||
"
|
||||
@@ -2062,7 +2060,6 @@ ARCH_EXT_LIST_PPC="
|
||||
ldbrx
|
||||
power8
|
||||
ppc4xx
|
||||
vec_xl
|
||||
vsx
|
||||
"
|
||||
|
||||
@@ -2343,7 +2340,6 @@ HAVE_LIST="
|
||||
opencl_vaapi_intel_media
|
||||
perl
|
||||
pod2man
|
||||
posix_ioctl
|
||||
texi2html
|
||||
"
|
||||
|
||||
@@ -2553,7 +2549,6 @@ altivec_deps="ppc"
|
||||
dcbzl_deps="ppc"
|
||||
ldbrx_deps="ppc"
|
||||
ppc4xx_deps="ppc"
|
||||
vec_xl_deps="altivec"
|
||||
vsx_deps="altivec"
|
||||
power8_deps="vsx"
|
||||
|
||||
@@ -2766,7 +2761,6 @@ indeo3_decoder_select="hpeldsp"
|
||||
indeo4_decoder_select="ividsp"
|
||||
indeo5_decoder_select="ividsp"
|
||||
interplay_video_decoder_select="hpeldsp"
|
||||
ipu_decoder_select="mpegvideo"
|
||||
jpegls_decoder_select="mjpeg_decoder"
|
||||
jv_decoder_select="blockdsp"
|
||||
lagarith_decoder_select="llviddsp"
|
||||
@@ -3273,7 +3267,7 @@ librav1e_encoder_deps="librav1e"
|
||||
librav1e_encoder_select="extract_extradata_bsf"
|
||||
librsvg_decoder_deps="librsvg"
|
||||
libshine_encoder_deps="libshine"
|
||||
libshine_encoder_select="audio_frame_queue mpegaudioheader"
|
||||
libshine_encoder_select="audio_frame_queue"
|
||||
libspeex_decoder_deps="libspeex"
|
||||
libspeex_encoder_deps="libspeex"
|
||||
libspeex_encoder_select="audio_frame_queue"
|
||||
@@ -3370,7 +3364,6 @@ opus_muxer_select="ogg_muxer"
|
||||
psp_muxer_select="mov_muxer"
|
||||
rtp_demuxer_select="sdp_demuxer"
|
||||
rtp_muxer_select="golomb jpegtables"
|
||||
rtp_mpegts_muxer_select="mpegts_muxer rtp_muxer"
|
||||
rtpdec_select="asf_demuxer jpegtables mov_demuxer mpegts_demuxer rm_demuxer rtp_protocol srtp"
|
||||
rtsp_demuxer_select="http_protocol rtpdec"
|
||||
rtsp_muxer_select="rtp_muxer http_protocol rtp_protocol rtpenc_chain"
|
||||
@@ -3711,23 +3704,23 @@ cws2fws_extralibs="zlib_extralibs"
|
||||
|
||||
# libraries, in any order
|
||||
avcodec_deps="avutil"
|
||||
avcodec_suggest="libm stdatomic"
|
||||
avcodec_suggest="libm"
|
||||
avcodec_select="null_bsf"
|
||||
avdevice_deps="avformat avcodec avutil"
|
||||
avdevice_suggest="libm stdatomic"
|
||||
avdevice_suggest="libm"
|
||||
avfilter_deps="avutil"
|
||||
avfilter_suggest="libm stdatomic"
|
||||
avfilter_suggest="libm"
|
||||
avformat_deps="avcodec avutil"
|
||||
avformat_suggest="libm network zlib stdatomic"
|
||||
avformat_suggest="libm network zlib"
|
||||
avresample_deps="avutil"
|
||||
avresample_suggest="libm"
|
||||
avutil_suggest="clock_gettime ffnvcodec libm libdrm libmfx opencl user32 vaapi vulkan videotoolbox corefoundation corevideo coremedia bcrypt stdatomic"
|
||||
avutil_suggest="clock_gettime ffnvcodec libm libdrm libmfx opencl user32 vaapi vulkan videotoolbox corefoundation corevideo coremedia bcrypt"
|
||||
postproc_deps="avutil gpl"
|
||||
postproc_suggest="libm stdatomic"
|
||||
postproc_suggest="libm"
|
||||
swresample_deps="avutil"
|
||||
swresample_suggest="libm libsoxr stdatomic"
|
||||
swresample_suggest="libm libsoxr"
|
||||
swscale_deps="avutil"
|
||||
swscale_suggest="libm stdatomic"
|
||||
swscale_suggest="libm"
|
||||
|
||||
avcodec_extralibs="pthreads_extralibs iconv_extralibs dxva2_extralibs"
|
||||
avfilter_extralibs="pthreads_extralibs"
|
||||
@@ -5374,7 +5367,6 @@ case $target_os in
|
||||
;;
|
||||
netbsd)
|
||||
disable symver
|
||||
enable section_data_rel_ro
|
||||
oss_indev_extralibs="-lossaudio"
|
||||
oss_outdev_extralibs="-lossaudio"
|
||||
enabled gcc || check_ldflags -Wl,-zmuldefs
|
||||
@@ -5393,7 +5385,6 @@ case $target_os in
|
||||
disable symver
|
||||
;;
|
||||
freebsd)
|
||||
enable section_data_rel_ro
|
||||
;;
|
||||
bsd/os)
|
||||
add_extralibs -lpoll -lgnugetopt
|
||||
@@ -5970,11 +5961,6 @@ elif enabled ppc; then
|
||||
check_cpp_condition power8 "altivec.h" "defined(_ARCH_PWR8)"
|
||||
fi
|
||||
|
||||
if enabled altivec; then
|
||||
check_cc vec_xl altivec.h "const unsigned char *y1i = { 0 };
|
||||
vector unsigned char y0 = vec_xl(0, y1i);"
|
||||
fi
|
||||
|
||||
elif enabled x86; then
|
||||
|
||||
check_builtin rdtsc intrin.h "__rdtsc()"
|
||||
@@ -6198,14 +6184,7 @@ check_headers asm/types.h
|
||||
# it seems there are versions of clang in some distros that try to use the
|
||||
# gcc headers, which explodes for stdatomic
|
||||
# so we also check that atomics actually work here
|
||||
#
|
||||
# some configurations also require linking to libatomic, so try
|
||||
# both with -latomic and without
|
||||
for LATOMIC in "-latomic" ""; do
|
||||
check_builtin stdatomic stdatomic.h \
|
||||
"atomic_int foo, bar = ATOMIC_VAR_INIT(-1); atomic_store(&foo, 0); foo += bar" \
|
||||
$LATOMIC && eval stdatomic_extralibs="\$LATOMIC" && break
|
||||
done
|
||||
check_builtin stdatomic stdatomic.h "atomic_int foo, bar = ATOMIC_VAR_INIT(-1); atomic_store(&foo, 0); foo += bar"
|
||||
|
||||
check_lib advapi32 "windows.h" RegCloseKey -ladvapi32
|
||||
check_lib bcrypt "windows.h bcrypt.h" BCryptGenRandom -lbcrypt &&
|
||||
@@ -6543,10 +6522,7 @@ enabled omx_rpi && { test_code cc OMX_Core.h OMX_IndexConfigBrcmVideoR
|
||||
die "ERROR: OpenMAX IL headers from raspberrypi/firmware not found"; } &&
|
||||
enable omx
|
||||
enabled omx && require_headers OMX_Core.h
|
||||
enabled openssl && { { check_pkg_config openssl "openssl >= 3.0.0" openssl/ssl.h OPENSSL_init_ssl &&
|
||||
{ enabled gplv3 || ! enabled gpl || enabled nonfree || die "ERROR: OpenSSL >=3.0.0 requires --enable-version3"; }; } ||
|
||||
{ enabled gpl && ! enabled nonfree && die "ERROR: OpenSSL <3.0.0 is incompatible with the gpl"; } ||
|
||||
check_pkg_config openssl openssl openssl/ssl.h OPENSSL_init_ssl ||
|
||||
enabled openssl && { check_pkg_config openssl openssl openssl/ssl.h OPENSSL_init_ssl ||
|
||||
check_pkg_config openssl openssl openssl/ssl.h SSL_library_init ||
|
||||
check_lib openssl openssl/ssl.h OPENSSL_init_ssl -lssl -lcrypto ||
|
||||
check_lib openssl openssl/ssl.h SSL_library_init -lssl -lcrypto ||
|
||||
@@ -6577,7 +6553,7 @@ fi
|
||||
|
||||
if enabled sdl2; then
|
||||
SDL2_CONFIG="${cross_prefix}sdl2-config"
|
||||
test_pkg_config sdl2 "sdl2 >= 2.0.1 sdl2 < 3.0.0" SDL_events.h SDL_PollEvent
|
||||
test_pkg_config sdl2 "sdl2 >= 2.0.1 sdl2 < 2.1.0" SDL_events.h SDL_PollEvent
|
||||
if disabled sdl2 && "${SDL2_CONFIG}" --version > /dev/null 2>&1; then
|
||||
sdl2_cflags=$("${SDL2_CONFIG}" --cflags)
|
||||
sdl2_extralibs=$("${SDL2_CONFIG}" --libs)
|
||||
@@ -6621,15 +6597,13 @@ enabled makeinfo \
|
||||
disabled makeinfo_html && texi2html --help 2> /dev/null | grep -q 'init-file' && enable texi2html || disable texi2html
|
||||
perl -v > /dev/null 2>&1 && enable perl || disable perl
|
||||
pod2man --help > /dev/null 2>&1 && enable pod2man || disable pod2man
|
||||
rsync --help 2> /dev/null | grep -q 'contimeout=' && enable rsync_contimeout || disable rsync_contimeout
|
||||
|
||||
check_headers linux/fb.h
|
||||
check_headers linux/videodev2.h
|
||||
test_code cc linux/videodev2.h "struct v4l2_frmsizeenum vfse; vfse.discrete.width = 0;" && enable_sanitized struct_v4l2_frmivalenum_discrete
|
||||
test_code cc sys/ioctl.h "int ioctl(int, int, ...)" && enable posix_ioctl
|
||||
rsync --help 2> /dev/null | grep -q 'contimeout' && enable rsync_contimeout || disable rsync_contimeout
|
||||
|
||||
# check V4L2 codecs available in the API
|
||||
if enabled v4l2_m2m; then
|
||||
check_headers linux/fb.h
|
||||
check_headers linux/videodev2.h
|
||||
test_code cc linux/videodev2.h "struct v4l2_frmsizeenum vfse; vfse.discrete.width = 0;" && enable_sanitized struct_v4l2_frmivalenum_discrete
|
||||
check_cc v4l2_m2m linux/videodev2.h "int i = V4L2_CAP_VIDEO_M2M_MPLANE | V4L2_CAP_VIDEO_M2M | V4L2_BUF_FLAG_LAST;"
|
||||
check_cc vc1_v4l2_m2m linux/videodev2.h "int i = V4L2_PIX_FMT_VC1_ANNEX_G;"
|
||||
check_cc mpeg1_v4l2_m2m linux/videodev2.h "int i = V4L2_PIX_FMT_MPEG1;"
|
||||
@@ -6674,7 +6648,7 @@ enabled alsa && { check_pkg_config alsa alsa "alsa/asoundlib.h" snd_pcm_htimesta
|
||||
enabled libjack &&
|
||||
require_pkg_config libjack jack jack/jack.h jack_port_get_latency_range
|
||||
|
||||
enabled sndio && check_pkg_config sndio sndio sndio.h sio_open
|
||||
enabled sndio && check_lib sndio sndio.h sio_open -lsndio
|
||||
|
||||
if enabled libcdio; then
|
||||
check_pkg_config libcdio libcdio_paranoia "cdio/cdda.h cdio/paranoia.h" cdio_cddap_open ||
|
||||
@@ -6775,7 +6749,7 @@ enabled vulkan &&
|
||||
|
||||
if enabled x86; then
|
||||
case $target_os in
|
||||
freebsd|mingw32*|mingw64*|win32|win64|linux|cygwin*)
|
||||
mingw32*|mingw64*|win32|win64|linux|cygwin*)
|
||||
;;
|
||||
*)
|
||||
disable ffnvcodec cuvid nvdec nvenc
|
||||
@@ -7374,7 +7348,6 @@ if enabled ppc; then
|
||||
echo "POWER8 enabled ${power8-no}"
|
||||
echo "PPC 4xx optimizations ${ppc4xx-no}"
|
||||
echo "dcbzl available ${dcbzl-no}"
|
||||
echo "vec_xl available ${vec_xl-no}"
|
||||
fi
|
||||
echo "debug symbols ${debug-no}"
|
||||
echo "strip symbols ${stripping-no}"
|
||||
@@ -7527,6 +7500,7 @@ LD_LIB=$LD_LIB
|
||||
LD_PATH=$LD_PATH
|
||||
DLLTOOL=$dlltool
|
||||
WINDRES=$windres
|
||||
DEPWINDRES=$dep_cc
|
||||
DOXYGEN=$doxygen
|
||||
LDFLAGS=$LDFLAGS
|
||||
LDEXEFLAGS=$LDEXEFLAGS
|
||||
@@ -7609,7 +7583,7 @@ cat > $TMPH <<EOF
|
||||
#define FFMPEG_CONFIG_H
|
||||
#define FFMPEG_CONFIGURATION "$(c_escape $FFMPEG_CONFIGURATION)"
|
||||
#define FFMPEG_LICENSE "$(c_escape $license)"
|
||||
#define CONFIG_THIS_YEAR 2026
|
||||
#define CONFIG_THIS_YEAR 2021
|
||||
#define FFMPEG_DATADIR "$(eval c_escape $datadir)"
|
||||
#define AVCONV_DATADIR "$(eval c_escape $datadir)"
|
||||
#define CC_IDENT "$(c_escape ${cc_ident:-Unknown compiler})"
|
||||
|
||||
+1
-1
@@ -38,7 +38,7 @@ PROJECT_NAME = FFmpeg
|
||||
# could be handy for archiving the generated documentation or if some version
|
||||
# control system is used.
|
||||
|
||||
PROJECT_NUMBER = 4.4.7
|
||||
PROJECT_NUMBER =
|
||||
|
||||
# Using the PROJECT_BRIEF tag one can provide an optional one line description
|
||||
# for a project that appears at the top of each page and should give viewer a
|
||||
|
||||
+2
-2
@@ -3,9 +3,9 @@
|
||||
The FFmpeg developers.
|
||||
|
||||
For details about the authorship, see the Git history of the project
|
||||
(https://git.ffmpeg.org/ffmpeg), e.g. by typing the command
|
||||
(git://source.ffmpeg.org/ffmpeg), e.g. by typing the command
|
||||
@command{git log} in the FFmpeg source directory, or browsing the
|
||||
online repository at @url{https://git.ffmpeg.org/ffmpeg}.
|
||||
online repository at @url{http://source.ffmpeg.org}.
|
||||
|
||||
Maintainers for the specific components are listed in the file
|
||||
@file{MAINTAINERS} in the source code tree.
|
||||
|
||||
Vendored
+1
-1
File diff suppressed because one or more lines are too long
@@ -63,3 +63,4 @@ make -j<num>
|
||||
make -k
|
||||
Continue build in case of errors, this is useful for the regression tests
|
||||
sometimes but note that it will still not run all reg tests.
|
||||
|
||||
|
||||
@@ -327,13 +327,6 @@ segment index to start live streams at (negative values are from the end).
|
||||
@item allowed_extensions
|
||||
',' separated list of file extensions that hls is allowed to access.
|
||||
|
||||
@item extension_picky
|
||||
This blocks disallowed extensions from probing
|
||||
It also requires all available segments to have matching extensions to the format
|
||||
except mpegts, which is always allowed.
|
||||
It is recommended to set the whitelists correctly instead of depending on extensions
|
||||
Enabled by default.
|
||||
|
||||
@item max_reload
|
||||
Maximum number of times a insufficient list is attempted to be reloaded.
|
||||
Default value is 1000.
|
||||
|
||||
@@ -88,3 +88,4 @@ lead to this decision.
|
||||
|
||||
The decisions from the TC are final, until the matters are reopened after
|
||||
no less than one year.
|
||||
|
||||
|
||||
@@ -762,25 +762,6 @@ In case you need finer control over how valgrind is invoked, use the
|
||||
@code{--target-exec='valgrind <your_custom_valgrind_options>} option in
|
||||
your configure line instead.
|
||||
|
||||
@anchor{Maintenance}
|
||||
@chapter Maintenance process
|
||||
|
||||
@anchor{MAINTAINERS}
|
||||
@section MAINTAINERS
|
||||
|
||||
The developers maintaining each part of the codebase are listed in @file{MAINTAINERS}.
|
||||
Being listed in @file{MAINTAINERS}, gives one the right to have git write access to
|
||||
the specific repository.
|
||||
|
||||
@anchor{Becoming a maintainer}
|
||||
@section Becoming a maintainer
|
||||
|
||||
People add themselves to @file{MAINTAINERS} by sending a patch like any other code
|
||||
change. These get reviewed by the community like any other patch. It is expected
|
||||
that, if someone has an objection to a new maintainer, she is willing to object
|
||||
in public with her full name and is willing to take over maintainership for the area.
|
||||
|
||||
|
||||
@anchor{Release process}
|
||||
@chapter Release process
|
||||
|
||||
|
||||
@@ -96,7 +96,6 @@ int main(int argc, char *argv[])
|
||||
avio_ctx = avio_alloc_context(avio_ctx_buffer, avio_ctx_buffer_size,
|
||||
0, &bd, &read_packet, NULL, NULL);
|
||||
if (!avio_ctx) {
|
||||
av_freep(&avio_ctx_buffer);
|
||||
ret = AVERROR(ENOMEM);
|
||||
goto end;
|
||||
}
|
||||
|
||||
@@ -127,10 +127,6 @@ int main(int argc, char **argv)
|
||||
outfilename = argv[2];
|
||||
|
||||
pkt = av_packet_alloc();
|
||||
if (!pkt) {
|
||||
fprintf(stderr, "Could not allocate AVPacket\n");
|
||||
exit(1); /* or proper cleanup and returning */
|
||||
}
|
||||
|
||||
/* find the MPEG audio decoder */
|
||||
codec = avcodec_find_decoder(AV_CODEC_ID_MP2);
|
||||
@@ -164,7 +160,7 @@ int main(int argc, char **argv)
|
||||
}
|
||||
outfile = fopen(outfilename, "wb");
|
||||
if (!outfile) {
|
||||
fprintf(stderr, "Could not open %s\n", outfilename);
|
||||
av_free(c);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
|
||||
@@ -137,9 +137,11 @@ static int decode_packet(AVCodecContext *dec, const AVPacket *pkt)
|
||||
ret = output_audio_frame(frame);
|
||||
|
||||
av_frame_unref(frame);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
}
|
||||
|
||||
return ret;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int open_codec_context(int *stream_idx,
|
||||
|
||||
@@ -350,7 +350,8 @@ static int write_audio_frame(AVFormatContext *oc, OutputStream *ost)
|
||||
if (frame) {
|
||||
/* convert samples from native format to destination codec format, using the resampler */
|
||||
/* compute destination number of samples */
|
||||
dst_nb_samples = swr_get_delay(ost->swr_ctx, c->sample_rate) + frame->nb_samples;
|
||||
dst_nb_samples = av_rescale_rnd(swr_get_delay(ost->swr_ctx, c->sample_rate) + frame->nb_samples,
|
||||
c->sample_rate, c->sample_rate, AV_ROUND_UP);
|
||||
av_assert0(dst_nb_samples == frame->nb_samples);
|
||||
|
||||
/* when we pass a frame to the encoder, it may keep a reference to it
|
||||
|
||||
@@ -91,10 +91,6 @@ static int encode_write(AVCodecContext *avctx, AVFrame *frame, FILE *fout)
|
||||
enc_pkt->stream_index = 0;
|
||||
ret = fwrite(enc_pkt->data, enc_pkt->size, 1, fout);
|
||||
av_packet_unref(enc_pkt);
|
||||
if (ret != enc_pkt->size) {
|
||||
ret = AVERROR(errno);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
end:
|
||||
|
||||
@@ -218,8 +218,10 @@ static int dec_enc(AVPacket *pkt, AVCodec *enc_codec)
|
||||
|
||||
fail:
|
||||
av_frame_free(&frame);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
}
|
||||
return ret;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int main(int argc, char **argv)
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
slot= # some unique identifier
|
||||
repo=https://git.ffmpeg.org/ffmpeg.git # the source repository
|
||||
repo=git://source.ffmpeg.org/ffmpeg.git # the source repository
|
||||
#branch=release/2.6 # the branch to test
|
||||
samples= # path to samples directory
|
||||
workdir= # directory in which to do all the work
|
||||
@@ -11,21 +11,16 @@ ignore_tests=
|
||||
# the following are optional and map to configure options
|
||||
arch=
|
||||
cpu=
|
||||
toolchain=
|
||||
cross_prefix=
|
||||
as=
|
||||
cc=
|
||||
cxx=
|
||||
ld=
|
||||
nm=
|
||||
target_os=
|
||||
sysroot=
|
||||
target_exec=
|
||||
target_path=
|
||||
target_samples=
|
||||
extra_cflags=
|
||||
extra_cxxflags=
|
||||
extra_objcflags=
|
||||
extra_ldflags=
|
||||
extra_libs=
|
||||
extra_conf= # extra configure options not covered above
|
||||
|
||||
+3
-23
@@ -53,7 +53,7 @@ Most distribution and operating system provide a package for it.
|
||||
@section Cloning the source tree
|
||||
|
||||
@example
|
||||
git clone https://git.ffmpeg.org/ffmpeg.git <target>
|
||||
git clone git://source.ffmpeg.org/ffmpeg <target>
|
||||
@end example
|
||||
|
||||
This will put the FFmpeg sources into the directory @var{<target>}.
|
||||
@@ -143,7 +143,7 @@ git log <filename(s)>
|
||||
@end example
|
||||
|
||||
You may also use the graphical tools like @command{gitview} or @command{gitk}
|
||||
or the web interface available at @url{https://git.ffmpeg.org/ffmpeg.git}.
|
||||
or the web interface available at @url{http://source.ffmpeg.org/}.
|
||||
|
||||
@section Checking source tree status
|
||||
|
||||
@@ -187,18 +187,11 @@ to make sure you don't have untracked files or deletions.
|
||||
git add [-i|-p|-A] <filenames/dirnames>
|
||||
@end example
|
||||
|
||||
Make sure you have told Git your name, email address and GPG key
|
||||
Make sure you have told Git your name and email address
|
||||
|
||||
@example
|
||||
git config --global user.name "My Name"
|
||||
git config --global user.email my@@email.invalid
|
||||
git config --global user.signingkey ABCDEF0123245
|
||||
@end example
|
||||
|
||||
Enable signing all commits or use -S
|
||||
|
||||
@example
|
||||
git config --global commit.gpgsign true
|
||||
@end example
|
||||
|
||||
Use @option{--global} to set the global configuration for all your Git checkouts.
|
||||
@@ -400,19 +393,6 @@ git checkout -b svn_23456 $SHA1
|
||||
where @var{$SHA1} is the commit hash from the @command{git log} output.
|
||||
|
||||
|
||||
@chapter gpg key generation
|
||||
|
||||
If you have no gpg key yet, we recommend that you create a ed25519 based key as it
|
||||
is small, fast and secure. Especially it results in small signatures in git.
|
||||
|
||||
@example
|
||||
gpg --default-new-key-algo "ed25519/cert,sign+cv25519/encr" --quick-generate-key "human@@server.com"
|
||||
@end example
|
||||
|
||||
When generating a key, make sure the email specified matches the email used in git as some sites like
|
||||
github consider mismatches a reason to declare such commits unverified. After generating a key you
|
||||
can add it to the MAINTAINER file and upload it to a keyserver.
|
||||
|
||||
@chapter Pre-push checklist
|
||||
|
||||
Once you have a set of commits that you feel are ready for pushing,
|
||||
|
||||
@@ -157,3 +157,4 @@ PFD[32] would for example be signed 32 bit little-endian IEEE float
|
||||
@item XVID @tab non-compliant MPEG-4 generated by old Xvid
|
||||
@item XVIX @tab non-compliant MPEG-4 generated by old Xvid with interlacing bug
|
||||
@end multitable
|
||||
|
||||
|
||||
+19
-103
@@ -20,45 +20,8 @@
|
||||
# License along with FFmpeg; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
|
||||
# Texinfo 7.0 changed the syntax of various functions.
|
||||
# Provide a shim for older versions.
|
||||
sub ff_set_from_init_file($$) {
|
||||
my $key = shift;
|
||||
my $value = shift;
|
||||
if (exists &{'texinfo_set_from_init_file'}) {
|
||||
texinfo_set_from_init_file($key, $value);
|
||||
} else {
|
||||
set_from_init_file($key, $value);
|
||||
}
|
||||
}
|
||||
|
||||
sub ff_get_conf($) {
|
||||
my $key = shift;
|
||||
if (exists &{'texinfo_get_conf'}) {
|
||||
texinfo_get_conf($key);
|
||||
} else {
|
||||
get_conf($key);
|
||||
}
|
||||
}
|
||||
|
||||
sub get_formatting_function($$) {
|
||||
my $obj = shift;
|
||||
my $func = shift;
|
||||
|
||||
my $sub = $obj->can('formatting_function');
|
||||
if ($sub) {
|
||||
return $obj->formatting_function($func);
|
||||
} else {
|
||||
return $obj->{$func};
|
||||
}
|
||||
}
|
||||
|
||||
# determine texinfo version
|
||||
my $program_version_num = version->declare(ff_get_conf('PACKAGE_VERSION'))->numify;
|
||||
my $program_version_6_8 = $program_version_num >= 6.008000;
|
||||
|
||||
# no navigation elements
|
||||
ff_set_from_init_file('HEADERS', 0);
|
||||
set_from_init_file('HEADERS', 0);
|
||||
|
||||
sub ffmpeg_heading_command($$$$$)
|
||||
{
|
||||
@@ -92,7 +55,7 @@ sub ffmpeg_heading_command($$$$$)
|
||||
$element = $command->{'parent'};
|
||||
}
|
||||
if ($element) {
|
||||
$result .= &{get_formatting_function($self, 'format_element_header')}($self, $cmdname,
|
||||
$result .= &{$self->{'format_element_header'}}($self, $cmdname,
|
||||
$command, $element);
|
||||
}
|
||||
|
||||
@@ -149,11 +112,7 @@ sub ffmpeg_heading_command($$$$$)
|
||||
$cmdname
|
||||
= $Texinfo::Common::level_to_structuring_command{$cmdname}->[$heading_level];
|
||||
}
|
||||
# format_heading_text expects an array of headings for texinfo >= 7.0
|
||||
if ($program_version_num >= 7.000000) {
|
||||
$heading = [$heading];
|
||||
}
|
||||
$result .= &{get_formatting_function($self,'format_heading_text')}(
|
||||
$result .= &{$self->{'format_heading_text'}}(
|
||||
$self, $cmdname, $heading,
|
||||
$heading_level +
|
||||
$self->get_conf('CHAPTER_HEADER_LEVEL') - 1, $command);
|
||||
@@ -168,18 +127,14 @@ foreach my $command (keys(%Texinfo::Common::sectioning_commands), 'node') {
|
||||
}
|
||||
|
||||
# print the TOC where @contents is used
|
||||
if ($program_version_6_8) {
|
||||
ff_set_from_init_file('CONTENTS_OUTPUT_LOCATION', 'inline');
|
||||
} else {
|
||||
ff_set_from_init_file('INLINE_CONTENTS', 1);
|
||||
}
|
||||
set_from_init_file('INLINE_CONTENTS', 1);
|
||||
|
||||
# make chapters <h2>
|
||||
ff_set_from_init_file('CHAPTER_HEADER_LEVEL', 2);
|
||||
set_from_init_file('CHAPTER_HEADER_LEVEL', 2);
|
||||
|
||||
# Do not add <hr>
|
||||
ff_set_from_init_file('DEFAULT_RULE', '');
|
||||
ff_set_from_init_file('BIG_RULE', '');
|
||||
set_from_init_file('DEFAULT_RULE', '');
|
||||
set_from_init_file('BIG_RULE', '');
|
||||
|
||||
# Customized file beginning
|
||||
sub ffmpeg_begin_file($$$)
|
||||
@@ -196,18 +151,7 @@ sub ffmpeg_begin_file($$$)
|
||||
my ($title, $description, $encoding, $date, $css_lines,
|
||||
$doctype, $bodytext, $copying_comment, $after_body_open,
|
||||
$extra_head, $program_and_version, $program_homepage,
|
||||
$program, $generator);
|
||||
if ($program_version_num >= 7.000000) {
|
||||
($title, $description, $encoding, $date, $css_lines,
|
||||
$doctype, $bodytext, $copying_comment, $after_body_open,
|
||||
$extra_head, $program_and_version, $program_homepage,
|
||||
$program, $generator) = $self->_file_header_information($command);
|
||||
} else {
|
||||
($title, $description, $encoding, $date, $css_lines,
|
||||
$doctype, $bodytext, $copying_comment, $after_body_open,
|
||||
$extra_head, $program_and_version, $program_homepage,
|
||||
$program, $generator) = $self->_file_header_informations($command);
|
||||
}
|
||||
$program, $generator) = $self->_file_header_informations($command);
|
||||
|
||||
my $links = $self->_get_links ($filename, $element);
|
||||
|
||||
@@ -240,11 +184,7 @@ EOT
|
||||
|
||||
return $head1 . $head_title . $head2 . $head_title . $head3;
|
||||
}
|
||||
if ($program_version_6_8) {
|
||||
texinfo_register_formatting_function('format_begin_file', \&ffmpeg_begin_file);
|
||||
} else {
|
||||
texinfo_register_formatting_function('begin_file', \&ffmpeg_begin_file);
|
||||
}
|
||||
texinfo_register_formatting_function('begin_file', \&ffmpeg_begin_file);
|
||||
|
||||
sub ffmpeg_program_string($)
|
||||
{
|
||||
@@ -261,17 +201,13 @@ sub ffmpeg_program_string($)
|
||||
$self->gdt('This document was generated automatically.'));
|
||||
}
|
||||
}
|
||||
if ($program_version_6_8) {
|
||||
texinfo_register_formatting_function('format_program_string', \&ffmpeg_program_string);
|
||||
} else {
|
||||
texinfo_register_formatting_function('program_string', \&ffmpeg_program_string);
|
||||
}
|
||||
texinfo_register_formatting_function('program_string', \&ffmpeg_program_string);
|
||||
|
||||
# Customized file ending
|
||||
sub ffmpeg_end_file($)
|
||||
{
|
||||
my $self = shift;
|
||||
my $program_string = &{get_formatting_function($self,'format_program_string')}($self);
|
||||
my $program_string = &{$self->{'format_program_string'}}($self);
|
||||
my $program_text = <<EOT;
|
||||
<p style="font-size: small;">
|
||||
$program_string
|
||||
@@ -284,15 +220,11 @@ EOT
|
||||
EOT
|
||||
return $program_text . $footer;
|
||||
}
|
||||
if ($program_version_6_8) {
|
||||
texinfo_register_formatting_function('format_end_file', \&ffmpeg_end_file);
|
||||
} else {
|
||||
texinfo_register_formatting_function('end_file', \&ffmpeg_end_file);
|
||||
}
|
||||
texinfo_register_formatting_function('end_file', \&ffmpeg_end_file);
|
||||
|
||||
# Dummy title command
|
||||
# Ignore title. Title is handled through ffmpeg_begin_file().
|
||||
ff_set_from_init_file('USE_TITLEPAGE_FOR_TITLE', 1);
|
||||
set_from_init_file('USE_TITLEPAGE_FOR_TITLE', 1);
|
||||
sub ffmpeg_title($$$$)
|
||||
{
|
||||
return '';
|
||||
@@ -310,14 +242,8 @@ sub ffmpeg_float($$$$$)
|
||||
my $args = shift;
|
||||
my $content = shift;
|
||||
|
||||
my ($caption, $prepended);
|
||||
if ($program_version_num >= 7.000000) {
|
||||
($caption, $prepended) = Texinfo::Convert::Converter::float_name_caption($self,
|
||||
$command);
|
||||
} else {
|
||||
($caption, $prepended) = Texinfo::Common::float_name_caption($self,
|
||||
$command);
|
||||
}
|
||||
my ($caption, $prepended) = Texinfo::Common::float_name_caption($self,
|
||||
$command);
|
||||
my $caption_text = '';
|
||||
my $prepended_text;
|
||||
my $prepended_save = '';
|
||||
@@ -389,13 +315,8 @@ sub ffmpeg_float($$$$$)
|
||||
$caption->{'args'}->[0], 'float caption');
|
||||
}
|
||||
if ($prepended_text.$caption_text ne '') {
|
||||
if ($program_version_num >= 7.000000) {
|
||||
$prepended_text = $self->html_attribute_class('div',['float-caption']). '>'
|
||||
. $prepended_text;
|
||||
} else {
|
||||
$prepended_text = $self->_attribute_class('div','float-caption'). '>'
|
||||
. $prepended_text;
|
||||
}
|
||||
$prepended_text = $self->_attribute_class('div','float-caption'). '>'
|
||||
. $prepended_text;
|
||||
$caption_text .= '</div>';
|
||||
}
|
||||
my $html_class = '';
|
||||
@@ -408,13 +329,8 @@ sub ffmpeg_float($$$$$)
|
||||
$prepended_text = '';
|
||||
$caption_text = '';
|
||||
}
|
||||
if ($program_version_num >= 7.000000) {
|
||||
return $self->html_attribute_class('div', [$html_class]). '>' . "\n" .
|
||||
$prepended_text . $caption_text . $content . '</div>';
|
||||
} else {
|
||||
return $self->_attribute_class('div', $html_class). '>' . "\n" .
|
||||
$prepended_text . $caption_text . $content . '</div>';
|
||||
}
|
||||
return $self->_attribute_class('div', $html_class). '>' . "\n" .
|
||||
$prepended_text . $caption_text . $content . '</div>';
|
||||
}
|
||||
|
||||
texinfo_register_command_formatting('float',
|
||||
|
||||
Executable → Regular
Executable → Regular
@@ -44,3 +44,4 @@ a+b*c;
|
||||
here the reader knows that a,b,c are meant to be signed integers but for C
|
||||
standard compliance / to avoid undefined behavior they are stored in unsigned
|
||||
ints.
|
||||
|
||||
|
||||
@@ -418,4 +418,4 @@ done:
|
||||
|
||||
When all of this is done, you can submit your patch to the ffmpeg-devel
|
||||
mailing-list for review. If you need any help, feel free to come on our IRC
|
||||
channel, #ffmpeg-devel on irc.libera.chat.
|
||||
channel, #ffmpeg-devel on irc.freenode.net.
|
||||
|
||||
+1
-1
@@ -90,7 +90,7 @@ COMPILE_MSA = $(call COMPILE,CC,MSAFLAGS)
|
||||
-$(if $(ASMSTRIPFLAGS), $(STRIP) $(ASMSTRIPFLAGS) $@)
|
||||
|
||||
%.o: %.rc
|
||||
$(WINDRES) $(IFLAGS) $(foreach ARG,$(CC_DEPFLAGS),--preprocessor-arg "$(ARG)") -o $@ $<
|
||||
$(WINDRES) $(IFLAGS) --preprocessor "$(DEPWINDRES) -E -xc-header -DRC_INVOKED $(CC_DEPFLAGS)" -o $@ $<
|
||||
|
||||
%.i: %.c
|
||||
$(CC) $(CCFLAGS) $(CC_E) $<
|
||||
|
||||
@@ -1,5 +1,3 @@
|
||||
#!/bin/sh
|
||||
|
||||
toupper(){
|
||||
echo "$@" | tr abcdefghijklmnopqrstuvwxyz ABCDEFGHIJKLMNOPQRSTUVWXYZ
|
||||
}
|
||||
|
||||
+1
-1
@@ -537,7 +537,7 @@ static const AVOption *opt_find(void *obj, const char *name, const char *unit,
|
||||
return o;
|
||||
}
|
||||
|
||||
#define FLAGS ((o->type == AV_OPT_TYPE_FLAGS && (arg[0]=='-' || arg[0]=='+')) ? AV_DICT_APPEND : 0)
|
||||
#define FLAGS (o->type == AV_OPT_TYPE_FLAGS && (arg[0]=='-' || arg[0]=='+')) ? AV_DICT_APPEND : 0
|
||||
int opt_default(void *optctx, const char *opt, const char *arg)
|
||||
{
|
||||
const AVOption *o;
|
||||
|
||||
+9
-15
@@ -492,9 +492,8 @@ static int read_key(void)
|
||||
}
|
||||
//Read it
|
||||
if(nchars != 0) {
|
||||
if (read(0, &ch, 1) == 1)
|
||||
return ch;
|
||||
return 0;
|
||||
read(0, &ch, 1);
|
||||
return ch;
|
||||
}else{
|
||||
return -1;
|
||||
}
|
||||
@@ -1975,9 +1974,6 @@ static void flush_encoders(void)
|
||||
AVPacket *pkt = ost->pkt;
|
||||
int pkt_size;
|
||||
|
||||
if (!pkt)
|
||||
break;
|
||||
|
||||
switch (enc->codec_type) {
|
||||
case AVMEDIA_TYPE_AUDIO:
|
||||
desc = "audio";
|
||||
@@ -3467,7 +3463,12 @@ static int init_output_stream_encode(OutputStream *ost, AVFrame *frame)
|
||||
enc_ctx->bits_per_raw_sample = frame_bits_per_raw_sample;
|
||||
}
|
||||
|
||||
// Field order: autodetection
|
||||
if (ost->top_field_first == 0) {
|
||||
enc_ctx->field_order = AV_FIELD_BB;
|
||||
} else if (ost->top_field_first == 1) {
|
||||
enc_ctx->field_order = AV_FIELD_TT;
|
||||
}
|
||||
|
||||
if (frame) {
|
||||
if (enc_ctx->flags & (AV_CODEC_FLAG_INTERLACED_DCT | AV_CODEC_FLAG_INTERLACED_ME) &&
|
||||
ost->top_field_first >= 0)
|
||||
@@ -3482,13 +3483,6 @@ static int init_output_stream_encode(OutputStream *ost, AVFrame *frame)
|
||||
enc_ctx->field_order = AV_FIELD_PROGRESSIVE;
|
||||
}
|
||||
|
||||
// Field order: override
|
||||
if (ost->top_field_first == 0) {
|
||||
enc_ctx->field_order = AV_FIELD_BB;
|
||||
} else if (ost->top_field_first == 1) {
|
||||
enc_ctx->field_order = AV_FIELD_TT;
|
||||
}
|
||||
|
||||
if (ost->forced_keyframes) {
|
||||
if (!strncmp(ost->forced_keyframes, "expr:", 5)) {
|
||||
ret = av_expr_parse(&ost->forced_keyframes_pexpr, ost->forced_keyframes+5,
|
||||
@@ -3956,7 +3950,7 @@ static OutputStream *choose_output(void)
|
||||
ost->st->index, ost->st->id, ost->initialized, ost->inputs_done, ost->finished);
|
||||
|
||||
if (!ost->initialized && !ost->inputs_done)
|
||||
return ost->unavailable ? NULL : ost;
|
||||
return ost;
|
||||
|
||||
if (!ost->finished && opts < opts_min) {
|
||||
opts_min = opts;
|
||||
|
||||
@@ -93,7 +93,6 @@ typedef struct {
|
||||
|
||||
typedef struct OptionsContext {
|
||||
OptionGroup *g;
|
||||
int depth;
|
||||
|
||||
/* input/output options */
|
||||
int64_t start_time;
|
||||
|
||||
+2
-17
@@ -414,8 +414,6 @@ static int opt_map(void *optctx, const char *opt, const char *arg)
|
||||
for (i = 0; i < o->nb_stream_maps; i++) {
|
||||
m = &o->stream_maps[i];
|
||||
if (file_idx == m->file_index &&
|
||||
m->stream_index >= 0 &&
|
||||
m->stream_index < input_files[m->file_index]->nb_streams &&
|
||||
check_stream_specifier(input_files[m->file_index]->ctx,
|
||||
input_files[m->file_index]->ctx->streams[m->stream_index],
|
||||
*p == ':' ? p + 1 : p) > 0)
|
||||
@@ -1918,7 +1916,6 @@ static OutputStream *new_audio_stream(OptionsContext *o, AVFormatContext *oc, in
|
||||
|
||||
if (!ost->stream_copy) {
|
||||
char *sample_fmt = NULL;
|
||||
const char *apad = NULL;
|
||||
|
||||
MATCH_PER_STREAM_OPT(audio_channels, i, audio_enc->channels, oc, st);
|
||||
|
||||
@@ -1931,12 +1928,8 @@ static OutputStream *new_audio_stream(OptionsContext *o, AVFormatContext *oc, in
|
||||
|
||||
MATCH_PER_STREAM_OPT(audio_sample_rate, i, audio_enc->sample_rate, oc, st);
|
||||
|
||||
MATCH_PER_STREAM_OPT(apad, str, apad, oc, st);
|
||||
if (apad) {
|
||||
ost->apad = av_strdup(apad);
|
||||
if (!ost->apad)
|
||||
exit_program(1);
|
||||
}
|
||||
MATCH_PER_STREAM_OPT(apad, str, ost->apad, oc, st);
|
||||
ost->apad = av_strdup(ost->apad);
|
||||
|
||||
ost->avfilter = get_ost_filters(o, oc, ost);
|
||||
if (!ost->avfilter)
|
||||
@@ -3021,12 +3014,6 @@ static int opt_preset(void *optctx, const char *opt, const char *arg)
|
||||
FILE *f=NULL;
|
||||
char filename[1000], line[1000], tmp_line[1000];
|
||||
const char *codec_name = NULL;
|
||||
int depth = o->depth;
|
||||
|
||||
if (depth > 2) {
|
||||
av_log(NULL, AV_LOG_ERROR, "too deep recursion\n");
|
||||
return AVERROR(EINVAL);
|
||||
}
|
||||
|
||||
tmp_line[0] = *opt;
|
||||
tmp_line[1] = 0;
|
||||
@@ -3040,7 +3027,6 @@ static int opt_preset(void *optctx, const char *opt, const char *arg)
|
||||
exit_program(1);
|
||||
}
|
||||
|
||||
o->depth ++;
|
||||
while (fgets(line, sizeof(line), f)) {
|
||||
char *key = tmp_line, *value, *endptr;
|
||||
|
||||
@@ -3065,7 +3051,6 @@ static int opt_preset(void *optctx, const char *opt, const char *arg)
|
||||
}
|
||||
}
|
||||
|
||||
o->depth = depth;
|
||||
fclose(f);
|
||||
|
||||
return 0;
|
||||
|
||||
+2
-2
@@ -131,8 +131,8 @@ static int zero12v_decode_frame(AVCodecContext *avctx, void *data,
|
||||
u = x/2 + (uint16_t *)(pic->data[1] + line * pic->linesize[1]);
|
||||
v = x/2 + (uint16_t *)(pic->data[2] + line * pic->linesize[2]);
|
||||
memcpy(y, y_temp, sizeof(*y) * (width - x));
|
||||
memcpy(u, u_temp, sizeof(*u) * ((width - x + 1) / 2));
|
||||
memcpy(v, v_temp, sizeof(*v) * ((width - x + 1) / 2));
|
||||
memcpy(u, u_temp, sizeof(*u) * (width - x + 1) / 2);
|
||||
memcpy(v, v_temp, sizeof(*v) * (width - x + 1) / 2);
|
||||
}
|
||||
|
||||
line_end += stride;
|
||||
|
||||
@@ -886,8 +886,6 @@ static int decode_frame(AVCodecContext *avctx, void *data,
|
||||
}
|
||||
|
||||
if (i >= CFRAME_BUFFER_COUNT) {
|
||||
if (free_index < 0)
|
||||
return AVERROR_INVALIDDATA;
|
||||
i = free_index;
|
||||
f->cfrm[i].id = id;
|
||||
}
|
||||
|
||||
@@ -70,9 +70,6 @@ static int decode_frame(AVCodecContext *avctx, void *data,
|
||||
unsigned char *planemap = c->planemap;
|
||||
int ret;
|
||||
|
||||
if (buf_size < planes * height *2)
|
||||
return AVERROR_INVALIDDATA;
|
||||
|
||||
if ((ret = ff_get_buffer(avctx, frame, 0)) < 0)
|
||||
return ret;
|
||||
|
||||
|
||||
+6
-6
@@ -132,6 +132,7 @@ OBJS-$(CONFIG_MPEGVIDEOENC) += mpegvideo_enc.o mpeg12data.o \
|
||||
motion_est.o ratecontrol.o \
|
||||
mpegvideoencdsp.o
|
||||
OBJS-$(CONFIG_MSS34DSP) += mss34dsp.o
|
||||
OBJS-$(CONFIG_NVENC) += nvenc.o
|
||||
OBJS-$(CONFIG_PIXBLOCKDSP) += pixblockdsp.o
|
||||
OBJS-$(CONFIG_QPELDSP) += qpeldsp.o
|
||||
OBJS-$(CONFIG_QSV) += qsv.o
|
||||
@@ -374,9 +375,9 @@ OBJS-$(CONFIG_H264_CUVID_DECODER) += cuviddec.o
|
||||
OBJS-$(CONFIG_H264_MEDIACODEC_DECODER) += mediacodecdec.o
|
||||
OBJS-$(CONFIG_H264_MF_ENCODER) += mfenc.o mf_utils.o
|
||||
OBJS-$(CONFIG_H264_MMAL_DECODER) += mmaldec.o
|
||||
OBJS-$(CONFIG_H264_NVENC_ENCODER) += nvenc.o nvenc_h264.o
|
||||
OBJS-$(CONFIG_NVENC_ENCODER) += nvenc.o nvenc_h264.o
|
||||
OBJS-$(CONFIG_NVENC_H264_ENCODER) += nvenc.o nvenc_h264.o
|
||||
OBJS-$(CONFIG_H264_NVENC_ENCODER) += nvenc_h264.o
|
||||
OBJS-$(CONFIG_NVENC_ENCODER) += nvenc_h264.o
|
||||
OBJS-$(CONFIG_NVENC_H264_ENCODER) += nvenc_h264.o
|
||||
OBJS-$(CONFIG_H264_OMX_ENCODER) += omx.o
|
||||
OBJS-$(CONFIG_H264_QSV_DECODER) += qsvdec.o
|
||||
OBJS-$(CONFIG_H264_QSV_ENCODER) += qsvenc_h264.o
|
||||
@@ -396,8 +397,8 @@ OBJS-$(CONFIG_HEVC_AMF_ENCODER) += amfenc_hevc.o
|
||||
OBJS-$(CONFIG_HEVC_CUVID_DECODER) += cuviddec.o
|
||||
OBJS-$(CONFIG_HEVC_MEDIACODEC_DECODER) += mediacodecdec.o
|
||||
OBJS-$(CONFIG_HEVC_MF_ENCODER) += mfenc.o mf_utils.o
|
||||
OBJS-$(CONFIG_HEVC_NVENC_ENCODER) += nvenc.o nvenc_hevc.o
|
||||
OBJS-$(CONFIG_NVENC_HEVC_ENCODER) += nvenc.o nvenc_hevc.o
|
||||
OBJS-$(CONFIG_HEVC_NVENC_ENCODER) += nvenc_hevc.o
|
||||
OBJS-$(CONFIG_NVENC_HEVC_ENCODER) += nvenc_hevc.o
|
||||
OBJS-$(CONFIG_HEVC_QSV_DECODER) += qsvdec.o
|
||||
OBJS-$(CONFIG_HEVC_QSV_ENCODER) += qsvenc_hevc.o hevc_ps_enc.o \
|
||||
hevc_data.o
|
||||
@@ -874,7 +875,6 @@ OBJS-$(CONFIG_ADPCM_G726_ENCODER) += g726.o
|
||||
OBJS-$(CONFIG_ADPCM_G726LE_DECODER) += g726.o
|
||||
OBJS-$(CONFIG_ADPCM_G726LE_ENCODER) += g726.o
|
||||
OBJS-$(CONFIG_ADPCM_IMA_AMV_DECODER) += adpcm.o adpcm_data.o
|
||||
OBJS-$(CONFIG_ADPCM_IMA_AMV_ENCODER) += adpcmenc.o adpcm_data.o
|
||||
OBJS-$(CONFIG_ADPCM_IMA_ALP_DECODER) += adpcm.o adpcm_data.o
|
||||
OBJS-$(CONFIG_ADPCM_IMA_ALP_ENCODER) += adpcmenc.o adpcm_data.o
|
||||
OBJS-$(CONFIG_ADPCM_IMA_APC_DECODER) += adpcm.o adpcm_data.o
|
||||
|
||||
@@ -107,16 +107,13 @@ static void render_charset(AVCodecContext *avctx, uint8_t *charset,
|
||||
uint8_t pix;
|
||||
int lowdiff, highdiff;
|
||||
int *best_cb = c->mc_best_cb;
|
||||
uint8_t index1[256];
|
||||
uint8_t index2[256];
|
||||
uint8_t dither[256];
|
||||
static uint8_t index1[256];
|
||||
static uint8_t index2[256];
|
||||
static uint8_t dither[256];
|
||||
int i;
|
||||
int distance;
|
||||
|
||||
/* Generate lookup-tables for dither and index before looping.
|
||||
* This code relies on c->mc_luma_vals[c->mc_pal_size - 1] being
|
||||
* the maximum of all the mc_luma_vals values and on the minimum
|
||||
* being zero; this ensures that dither is properly initialized. */
|
||||
/* generate lookup-tables for dither and index before looping */
|
||||
i = 0;
|
||||
for (a=0; a < 256; a++) {
|
||||
if(i < c->mc_pal_size -1 && a == c->mc_luma_vals[i + 1]) {
|
||||
|
||||
@@ -843,25 +843,25 @@ static void search_for_ms(AACEncContext *s, ChannelElement *cpe)
|
||||
sce0->ics.swb_sizes[g],
|
||||
sce0->sf_idx[w*16+g],
|
||||
sce0->band_type[w*16+g],
|
||||
lambda / (band0->threshold + FLT_MIN), INFINITY, &b1, NULL, 0);
|
||||
lambda / band0->threshold, INFINITY, &b1, NULL, 0);
|
||||
dist1 += quantize_band_cost(s, &sce1->coeffs[start + (w+w2)*128],
|
||||
R34,
|
||||
sce1->ics.swb_sizes[g],
|
||||
sce1->sf_idx[w*16+g],
|
||||
sce1->band_type[w*16+g],
|
||||
lambda / (band1->threshold + FLT_MIN), INFINITY, &b2, NULL, 0);
|
||||
lambda / band1->threshold, INFINITY, &b2, NULL, 0);
|
||||
dist2 += quantize_band_cost(s, M,
|
||||
M34,
|
||||
sce0->ics.swb_sizes[g],
|
||||
mididx,
|
||||
midcb,
|
||||
lambda / (minthr + FLT_MIN), INFINITY, &b3, NULL, 0);
|
||||
lambda / minthr, INFINITY, &b3, NULL, 0);
|
||||
dist2 += quantize_band_cost(s, S,
|
||||
S34,
|
||||
sce1->ics.swb_sizes[g],
|
||||
sididx,
|
||||
sidcb,
|
||||
mslambda / (minthr * bmax + FLT_MIN), INFINITY, &b4, NULL, 0);
|
||||
mslambda / (minthr * bmax), INFINITY, &b4, NULL, 0);
|
||||
B0 += b1+b2;
|
||||
B1 += b3+b4;
|
||||
dist1 -= b1+b2;
|
||||
|
||||
@@ -539,9 +539,6 @@ static int output_configure(AACContext *ac,
|
||||
uint8_t id_map[TYPE_END][MAX_ELEM_ID] = {{ 0 }};
|
||||
uint8_t type_counts[TYPE_END] = { 0 };
|
||||
|
||||
if (get_new_frame && !ac->frame)
|
||||
return AVERROR_INVALIDDATA;
|
||||
|
||||
if (ac->oc[1].layout_map != layout_map) {
|
||||
memcpy(ac->oc[1].layout_map, layout_map, tags * sizeof(layout_map[0]));
|
||||
ac->oc[1].layout_map_tags = tags;
|
||||
@@ -1079,18 +1076,14 @@ static int decode_audio_specific_config_gb(AACContext *ac,
|
||||
{
|
||||
int i, ret;
|
||||
GetBitContext gbc = *gb;
|
||||
MPEG4AudioConfig m4ac_bak = *m4ac;
|
||||
|
||||
if ((i = ff_mpeg4audio_get_config_gb(m4ac, &gbc, sync_extension, avctx)) < 0) {
|
||||
*m4ac = m4ac_bak;
|
||||
if ((i = ff_mpeg4audio_get_config_gb(m4ac, &gbc, sync_extension, avctx)) < 0)
|
||||
return AVERROR_INVALIDDATA;
|
||||
}
|
||||
|
||||
if (m4ac->sampling_index > 12) {
|
||||
av_log(avctx, AV_LOG_ERROR,
|
||||
"invalid sampling rate index %d\n",
|
||||
m4ac->sampling_index);
|
||||
*m4ac = m4ac_bak;
|
||||
return AVERROR_INVALIDDATA;
|
||||
}
|
||||
if (m4ac->object_type == AOT_ER_AAC_LD &&
|
||||
@@ -1098,7 +1091,6 @@ static int decode_audio_specific_config_gb(AACContext *ac,
|
||||
av_log(avctx, AV_LOG_ERROR,
|
||||
"invalid low delay sampling rate index %d\n",
|
||||
m4ac->sampling_index);
|
||||
*m4ac = m4ac_bak;
|
||||
return AVERROR_INVALIDDATA;
|
||||
}
|
||||
|
||||
|
||||
+2
-3
@@ -28,7 +28,6 @@
|
||||
* TODOs:
|
||||
* add sane pulse detection
|
||||
***********************************/
|
||||
#include <float.h>
|
||||
|
||||
#include "libavutil/libm.h"
|
||||
#include "libavutil/float_dsp.h"
|
||||
@@ -853,7 +852,7 @@ static int aac_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
|
||||
/* Not so fast though */
|
||||
ratio = sqrtf(ratio);
|
||||
}
|
||||
s->lambda = av_clipf(s->lambda * ratio, FLT_EPSILON, 65536.f);
|
||||
s->lambda = FFMIN(s->lambda * ratio, 65536.f);
|
||||
|
||||
/* Keep iterating if we must reduce and lambda is in the sky */
|
||||
if (ratio > 0.9f && ratio < 1.1f) {
|
||||
@@ -898,7 +897,7 @@ static av_cold int aac_encode_end(AVCodecContext *avctx)
|
||||
{
|
||||
AACEncContext *s = avctx->priv_data;
|
||||
|
||||
av_log(avctx, AV_LOG_INFO, "Qavg: %.3f\n", s->lambda_count ? s->lambda_sum / s->lambda_count : NAN);
|
||||
av_log(avctx, AV_LOG_INFO, "Qavg: %.3f\n", s->lambda_sum / s->lambda_count);
|
||||
|
||||
ff_mdct_end(&s->mdct1024);
|
||||
ff_mdct_end(&s->mdct128);
|
||||
|
||||
+13
-27
@@ -173,7 +173,6 @@ void ff_aac_search_for_tns(AACEncContext *s, SingleChannelElement *sce)
|
||||
sce->ics.window_sequence[0] == LONG_START_SEQUENCE ? 0 : 2;
|
||||
const int sfb_len = sfb_end - sfb_start;
|
||||
const int coef_len = sce->ics.swb_offset[sfb_end] - sce->ics.swb_offset[sfb_start];
|
||||
const int n_filt = is8 ? 1 : order != TNS_MAX_ORDER ? 2 : 3;
|
||||
|
||||
if (coef_len <= 0 || sfb_len <= 0) {
|
||||
sce->tns.present = 0;
|
||||
@@ -181,30 +180,16 @@ void ff_aac_search_for_tns(AACEncContext *s, SingleChannelElement *sce)
|
||||
}
|
||||
|
||||
for (w = 0; w < sce->ics.num_windows; w++) {
|
||||
float en[4] = {0.0f, 0.0f, 0.0f, 0.0f};
|
||||
int oc_start = 0;
|
||||
float en[2] = {0.0f, 0.0f};
|
||||
int oc_start = 0, os_start = 0;
|
||||
int coef_start = sce->ics.swb_offset[sfb_start];
|
||||
|
||||
if (n_filt == 2) {
|
||||
for (g = sfb_start; g < sce->ics.num_swb && g <= sfb_end; g++) {
|
||||
FFPsyBand *band = &s->psy.ch[s->cur_channel].psy_bands[w*16+g];
|
||||
if (g > sfb_start + (sfb_len/2))
|
||||
en[1] += band->energy; /* End */
|
||||
else
|
||||
en[0] += band->energy; /* Start */
|
||||
}
|
||||
en[2] = en[0];
|
||||
} else {
|
||||
for (g = sfb_start; g < sce->ics.num_swb && g <= sfb_end; g++) {
|
||||
FFPsyBand *band = &s->psy.ch[s->cur_channel].psy_bands[w*16+g];
|
||||
if (g > sfb_start + (sfb_len/2) + (sfb_len/4))
|
||||
en[2] += band->energy; /* End */
|
||||
else if (g > sfb_start + (sfb_len/2) - (sfb_len/4))
|
||||
en[1] += band->energy; /* Middle */
|
||||
else
|
||||
en[0] += band->energy; /* Start */
|
||||
}
|
||||
en[3] = en[0];
|
||||
for (g = sfb_start; g < sce->ics.num_swb && g <= sfb_end; g++) {
|
||||
FFPsyBand *band = &s->psy.ch[s->cur_channel].psy_bands[w*16+g];
|
||||
if (g > sfb_start + (sfb_len/2))
|
||||
en[1] += band->energy;
|
||||
else
|
||||
en[0] += band->energy;
|
||||
}
|
||||
|
||||
/* LPC */
|
||||
@@ -214,14 +199,15 @@ void ff_aac_search_for_tns(AACEncContext *s, SingleChannelElement *sce)
|
||||
if (!order || !isfinite(gain) || gain < TNS_GAIN_THRESHOLD_LOW || gain > TNS_GAIN_THRESHOLD_HIGH)
|
||||
continue;
|
||||
|
||||
tns->n_filt[w] = n_filt;
|
||||
tns->n_filt[w] = is8 ? 1 : order != TNS_MAX_ORDER ? 2 : 3;
|
||||
for (g = 0; g < tns->n_filt[w]; g++) {
|
||||
tns->direction[w][g] = slant != 2 ? slant : en[g] < en[g + 1];
|
||||
tns->order[w][g] = order/tns->n_filt[w];
|
||||
tns->length[w][g] = sfb_len/tns->n_filt[w];
|
||||
tns->direction[w][g] = slant != 2 ? slant : en[g] < en[!g];
|
||||
tns->order[w][g] = g < tns->n_filt[w] ? order/tns->n_filt[w] : order - oc_start;
|
||||
tns->length[w][g] = g < tns->n_filt[w] ? sfb_len/tns->n_filt[w] : sfb_len - os_start;
|
||||
quantize_coefs(&coefs[oc_start], tns->coef_idx[w][g], tns->coef[w][g],
|
||||
tns->order[w][g], c_bits);
|
||||
oc_start += tns->order[w][g];
|
||||
os_start += tns->length[w][g];
|
||||
}
|
||||
count++;
|
||||
}
|
||||
|
||||
+1
-4
@@ -308,9 +308,6 @@ static av_cold int psy_3gpp_init(FFPsyContext *ctx) {
|
||||
const int bandwidth = ctx->cutoff ? ctx->cutoff : AAC_CUTOFF(ctx->avctx);
|
||||
const float num_bark = calc_bark((float)bandwidth);
|
||||
|
||||
if (bandwidth <= 0)
|
||||
return AVERROR(EINVAL);
|
||||
|
||||
ctx->model_priv_data = av_mallocz(sizeof(AacPsyContext));
|
||||
if (!ctx->model_priv_data)
|
||||
return AVERROR(ENOMEM);
|
||||
@@ -797,7 +794,7 @@ static void psy_3gpp_analyze_channel(FFPsyContext *ctx, int channel,
|
||||
|
||||
if (pe < 1.15f * desired_pe) {
|
||||
/* 6.6.1.3.6 "Final threshold modification by linearization" */
|
||||
norm_fac = norm_fac ? 1.0f / norm_fac : 0;
|
||||
norm_fac = 1.0f / norm_fac;
|
||||
for (w = 0; w < wi->num_windows*16; w += 16) {
|
||||
for (g = 0; g < num_bands; g++) {
|
||||
AacPsyBand *band = &pch->band[w+g];
|
||||
|
||||
@@ -588,7 +588,6 @@ static int sbr_make_f_derived(AACContext *ac, SpectralBandReplication *sbr)
|
||||
|
||||
if (sbr->n_q > 5) {
|
||||
av_log(ac->avctx, AV_LOG_ERROR, "Too many noise floor scale factors: %d\n", sbr->n_q);
|
||||
sbr->n_q = 1;
|
||||
return -1;
|
||||
}
|
||||
|
||||
@@ -1430,9 +1429,6 @@ static void sbr_env_estimate(AAC_FLOAT (*e_curr)[48], INTFLOAT X_high[64][40][2]
|
||||
int ilb = ch_data->t_env[e] * 2 + ENVELOPE_ADJUSTMENT_OFFSET;
|
||||
int iub = ch_data->t_env[e + 1] * 2 + ENVELOPE_ADJUSTMENT_OFFSET;
|
||||
|
||||
if (ilb >= 40)
|
||||
return;
|
||||
|
||||
for (m = 0; m < sbr->m[1]; m++) {
|
||||
AAC_FLOAT sum = sbr->dsp.sum_square(X_high[m+kx1] + ilb, iub - ilb);
|
||||
#if USE_FIXED
|
||||
@@ -1451,9 +1447,6 @@ static void sbr_env_estimate(AAC_FLOAT (*e_curr)[48], INTFLOAT X_high[64][40][2]
|
||||
int iub = ch_data->t_env[e + 1] * 2 + ENVELOPE_ADJUSTMENT_OFFSET;
|
||||
const uint16_t *table = ch_data->bs_freq_res[e + 1] ? sbr->f_tablehigh : sbr->f_tablelow;
|
||||
|
||||
if (ilb >= 40)
|
||||
return;
|
||||
|
||||
for (p = 0; p < sbr->n[ch_data->bs_freq_res[e + 1]]; p++) {
|
||||
#if USE_FIXED
|
||||
SoftFloat sum = FLOAT_0;
|
||||
|
||||
+109
-109
@@ -19,130 +19,130 @@
|
||||
#include "libavutil/aarch64/asm.S"
|
||||
|
||||
function ff_ps_add_squares_neon, export=1
|
||||
1: ld1 {v0.4s,v1.4s}, [x1], #32
|
||||
fmul v0.4s, v0.4s, v0.4s
|
||||
fmul v1.4s, v1.4s, v1.4s
|
||||
faddp v2.4s, v0.4s, v1.4s
|
||||
ld1 {v3.4s}, [x0]
|
||||
fadd v3.4s, v3.4s, v2.4s
|
||||
st1 {v3.4s}, [x0], #16
|
||||
subs w2, w2, #4
|
||||
b.gt 1b
|
||||
1: ld1 {v0.4S,v1.4S}, [x1], #32
|
||||
fmul v0.4S, v0.4S, v0.4S
|
||||
fmul v1.4S, v1.4S, v1.4S
|
||||
faddp v2.4S, v0.4S, v1.4S
|
||||
ld1 {v3.4S}, [x0]
|
||||
fadd v3.4S, v3.4S, v2.4S
|
||||
st1 {v3.4S}, [x0], #16
|
||||
subs w2, w2, #4
|
||||
b.gt 1b
|
||||
ret
|
||||
endfunc
|
||||
|
||||
function ff_ps_mul_pair_single_neon, export=1
|
||||
1: ld1 {v0.4s,v1.4s}, [x1], #32
|
||||
ld1 {v2.4s}, [x2], #16
|
||||
zip1 v3.4s, v2.4s, v2.4s
|
||||
zip2 v4.4s, v2.4s, v2.4s
|
||||
fmul v0.4s, v0.4s, v3.4s
|
||||
fmul v1.4s, v1.4s, v4.4s
|
||||
st1 {v0.4s,v1.4s}, [x0], #32
|
||||
subs w3, w3, #4
|
||||
b.gt 1b
|
||||
1: ld1 {v0.4S,v1.4S}, [x1], #32
|
||||
ld1 {v2.4S}, [x2], #16
|
||||
zip1 v3.4S, v2.4S, v2.4S
|
||||
zip2 v4.4S, v2.4S, v2.4S
|
||||
fmul v0.4S, v0.4S, v3.4S
|
||||
fmul v1.4S, v1.4S, v4.4S
|
||||
st1 {v0.4S,v1.4S}, [x0], #32
|
||||
subs w3, w3, #4
|
||||
b.gt 1b
|
||||
ret
|
||||
endfunc
|
||||
|
||||
function ff_ps_stereo_interpolate_neon, export=1
|
||||
ld1 {v0.4s}, [x2]
|
||||
ld1 {v1.4s}, [x3]
|
||||
zip1 v4.4s, v0.4s, v0.4s
|
||||
zip2 v5.4s, v0.4s, v0.4s
|
||||
zip1 v6.4s, v1.4s, v1.4s
|
||||
zip2 v7.4s, v1.4s, v1.4s
|
||||
1: ld1 {v2.2s}, [x0]
|
||||
ld1 {v3.2s}, [x1]
|
||||
fadd v4.4s, v4.4s, v6.4s
|
||||
fadd v5.4s, v5.4s, v7.4s
|
||||
mov v2.d[1], v2.d[0]
|
||||
mov v3.d[1], v3.d[0]
|
||||
fmul v2.4s, v2.4s, v4.4s
|
||||
fmla v2.4s, v3.4s, v5.4s
|
||||
st1 {v2.d}[0], [x0], #8
|
||||
st1 {v2.d}[1], [x1], #8
|
||||
subs w4, w4, #1
|
||||
b.gt 1b
|
||||
ld1 {v0.4S}, [x2]
|
||||
ld1 {v1.4S}, [x3]
|
||||
zip1 v4.4S, v0.4S, v0.4S
|
||||
zip2 v5.4S, v0.4S, v0.4S
|
||||
zip1 v6.4S, v1.4S, v1.4S
|
||||
zip2 v7.4S, v1.4S, v1.4S
|
||||
1: ld1 {v2.2S}, [x0]
|
||||
ld1 {v3.2S}, [x1]
|
||||
fadd v4.4S, v4.4S, v6.4S
|
||||
fadd v5.4S, v5.4S, v7.4S
|
||||
mov v2.D[1], v2.D[0]
|
||||
mov v3.D[1], v3.D[0]
|
||||
fmul v2.4S, v2.4S, v4.4S
|
||||
fmla v2.4S, v3.4S, v5.4S
|
||||
st1 {v2.D}[0], [x0], #8
|
||||
st1 {v2.D}[1], [x1], #8
|
||||
subs w4, w4, #1
|
||||
b.gt 1b
|
||||
ret
|
||||
endfunc
|
||||
|
||||
function ff_ps_stereo_interpolate_ipdopd_neon, export=1
|
||||
ld1 {v0.4s,v1.4s}, [x2]
|
||||
ld1 {v6.4s,v7.4s}, [x3]
|
||||
fneg v2.4s, v1.4s
|
||||
fneg v3.4s, v7.4s
|
||||
zip1 v16.4s, v0.4s, v0.4s
|
||||
zip2 v17.4s, v0.4s, v0.4s
|
||||
zip1 v18.4s, v2.4s, v1.4s
|
||||
zip2 v19.4s, v2.4s, v1.4s
|
||||
zip1 v20.4s, v6.4s, v6.4s
|
||||
zip2 v21.4s, v6.4s, v6.4s
|
||||
zip1 v22.4s, v3.4s, v7.4s
|
||||
zip2 v23.4s, v3.4s, v7.4s
|
||||
1: ld1 {v2.2s}, [x0]
|
||||
ld1 {v3.2s}, [x1]
|
||||
fadd v16.4s, v16.4s, v20.4s
|
||||
fadd v17.4s, v17.4s, v21.4s
|
||||
mov v2.d[1], v2.d[0]
|
||||
mov v3.d[1], v3.d[0]
|
||||
fmul v4.4s, v2.4s, v16.4s
|
||||
fmla v4.4s, v3.4s, v17.4s
|
||||
fadd v18.4s, v18.4s, v22.4s
|
||||
fadd v19.4s, v19.4s, v23.4s
|
||||
ext v2.16b, v2.16b, v2.16b, #4
|
||||
ext v3.16b, v3.16b, v3.16b, #4
|
||||
fmla v4.4s, v2.4s, v18.4s
|
||||
fmla v4.4s, v3.4s, v19.4s
|
||||
st1 {v4.d}[0], [x0], #8
|
||||
st1 {v4.d}[1], [x1], #8
|
||||
subs w4, w4, #1
|
||||
b.gt 1b
|
||||
ld1 {v0.4S,v1.4S}, [x2]
|
||||
ld1 {v6.4S,v7.4S}, [x3]
|
||||
fneg v2.4S, v1.4S
|
||||
fneg v3.4S, v7.4S
|
||||
zip1 v16.4S, v0.4S, v0.4S
|
||||
zip2 v17.4S, v0.4S, v0.4S
|
||||
zip1 v18.4S, v2.4S, v1.4S
|
||||
zip2 v19.4S, v2.4S, v1.4S
|
||||
zip1 v20.4S, v6.4S, v6.4S
|
||||
zip2 v21.4S, v6.4S, v6.4S
|
||||
zip1 v22.4S, v3.4S, v7.4S
|
||||
zip2 v23.4S, v3.4S, v7.4S
|
||||
1: ld1 {v2.2S}, [x0]
|
||||
ld1 {v3.2S}, [x1]
|
||||
fadd v16.4S, v16.4S, v20.4S
|
||||
fadd v17.4S, v17.4S, v21.4S
|
||||
mov v2.D[1], v2.D[0]
|
||||
mov v3.D[1], v3.D[0]
|
||||
fmul v4.4S, v2.4S, v16.4S
|
||||
fmla v4.4S, v3.4S, v17.4S
|
||||
fadd v18.4S, v18.4S, v22.4S
|
||||
fadd v19.4S, v19.4S, v23.4S
|
||||
ext v2.16B, v2.16B, v2.16B, #4
|
||||
ext v3.16B, v3.16B, v3.16B, #4
|
||||
fmla v4.4S, v2.4S, v18.4S
|
||||
fmla v4.4S, v3.4S, v19.4S
|
||||
st1 {v4.D}[0], [x0], #8
|
||||
st1 {v4.D}[1], [x1], #8
|
||||
subs w4, w4, #1
|
||||
b.gt 1b
|
||||
ret
|
||||
endfunc
|
||||
|
||||
function ff_ps_hybrid_analysis_neon, export=1
|
||||
lsl x3, x3, #3
|
||||
ld2 {v0.4s,v1.4s}, [x1], #32
|
||||
ld2 {v2.2s,v3.2s}, [x1], #16
|
||||
ld1 {v24.2s}, [x1], #8
|
||||
ld2 {v4.2s,v5.2s}, [x1], #16
|
||||
ld2 {v6.4s,v7.4s}, [x1]
|
||||
rev64 v6.4s, v6.4s
|
||||
rev64 v7.4s, v7.4s
|
||||
ext v6.16b, v6.16b, v6.16b, #8
|
||||
ext v7.16b, v7.16b, v7.16b, #8
|
||||
rev64 v4.2s, v4.2s
|
||||
rev64 v5.2s, v5.2s
|
||||
mov v2.d[1], v3.d[0]
|
||||
mov v4.d[1], v5.d[0]
|
||||
mov v5.d[1], v2.d[0]
|
||||
mov v3.d[1], v4.d[0]
|
||||
fadd v16.4s, v0.4s, v6.4s
|
||||
fadd v17.4s, v1.4s, v7.4s
|
||||
fsub v18.4s, v1.4s, v7.4s
|
||||
fsub v19.4s, v0.4s, v6.4s
|
||||
fadd v22.4s, v2.4s, v4.4s
|
||||
fsub v23.4s, v5.4s, v3.4s
|
||||
trn1 v20.2d, v22.2d, v23.2d // {re4+re8, re5+re7, im8-im4, im7-im5}
|
||||
trn2 v21.2d, v22.2d, v23.2d // {im4+im8, im5+im7, re4-re8, re5-re7}
|
||||
1: ld2 {v2.4s,v3.4s}, [x2], #32
|
||||
ld2 {v4.2s,v5.2s}, [x2], #16
|
||||
ld1 {v6.2s}, [x2], #8
|
||||
add x2, x2, #8
|
||||
mov v4.d[1], v5.d[0]
|
||||
mov v6.s[1], v6.s[0]
|
||||
fmul v6.2s, v6.2s, v24.2s
|
||||
fmul v0.4s, v2.4s, v16.4s
|
||||
fmul v1.4s, v2.4s, v17.4s
|
||||
fmls v0.4s, v3.4s, v18.4s
|
||||
fmla v1.4s, v3.4s, v19.4s
|
||||
fmla v0.4s, v4.4s, v20.4s
|
||||
fmla v1.4s, v4.4s, v21.4s
|
||||
faddp v0.4s, v0.4s, v1.4s
|
||||
faddp v0.4s, v0.4s, v0.4s
|
||||
fadd v0.2s, v0.2s, v6.2s
|
||||
st1 {v0.2s}, [x0], x3
|
||||
subs w4, w4, #1
|
||||
b.gt 1b
|
||||
lsl x3, x3, #3
|
||||
ld2 {v0.4S,v1.4S}, [x1], #32
|
||||
ld2 {v2.2S,v3.2S}, [x1], #16
|
||||
ld1 {v24.2S}, [x1], #8
|
||||
ld2 {v4.2S,v5.2S}, [x1], #16
|
||||
ld2 {v6.4S,v7.4S}, [x1]
|
||||
rev64 v6.4S, v6.4S
|
||||
rev64 v7.4S, v7.4S
|
||||
ext v6.16B, v6.16B, v6.16B, #8
|
||||
ext v7.16B, v7.16B, v7.16B, #8
|
||||
rev64 v4.2S, v4.2S
|
||||
rev64 v5.2S, v5.2S
|
||||
mov v2.D[1], v3.D[0]
|
||||
mov v4.D[1], v5.D[0]
|
||||
mov v5.D[1], v2.D[0]
|
||||
mov v3.D[1], v4.D[0]
|
||||
fadd v16.4S, v0.4S, v6.4S
|
||||
fadd v17.4S, v1.4S, v7.4S
|
||||
fsub v18.4S, v1.4S, v7.4S
|
||||
fsub v19.4S, v0.4S, v6.4S
|
||||
fadd v22.4S, v2.4S, v4.4S
|
||||
fsub v23.4S, v5.4S, v3.4S
|
||||
trn1 v20.2D, v22.2D, v23.2D // {re4+re8, re5+re7, im8-im4, im7-im5}
|
||||
trn2 v21.2D, v22.2D, v23.2D // {im4+im8, im5+im7, re4-re8, re5-re7}
|
||||
1: ld2 {v2.4S,v3.4S}, [x2], #32
|
||||
ld2 {v4.2S,v5.2S}, [x2], #16
|
||||
ld1 {v6.2S}, [x2], #8
|
||||
add x2, x2, #8
|
||||
mov v4.D[1], v5.D[0]
|
||||
mov v6.S[1], v6.S[0]
|
||||
fmul v6.2S, v6.2S, v24.2S
|
||||
fmul v0.4S, v2.4S, v16.4S
|
||||
fmul v1.4S, v2.4S, v17.4S
|
||||
fmls v0.4S, v3.4S, v18.4S
|
||||
fmla v1.4S, v3.4S, v19.4S
|
||||
fmla v0.4S, v4.4S, v20.4S
|
||||
fmla v1.4S, v4.4S, v21.4S
|
||||
faddp v0.4S, v0.4S, v1.4S
|
||||
faddp v0.4S, v0.4S, v0.4S
|
||||
fadd v0.2S, v0.2S, v6.2S
|
||||
st1 {v0.2S}, [x0], x3
|
||||
subs w4, w4, #1
|
||||
b.gt 1b
|
||||
ret
|
||||
endfunc
|
||||
|
||||
@@ -353,18 +353,18 @@ function fft\n\()_neon, align=6
|
||||
endfunc
|
||||
.endm
|
||||
|
||||
def_fft 32, 16, 8
|
||||
def_fft 64, 32, 16
|
||||
def_fft 128, 64, 32
|
||||
def_fft 256, 128, 64
|
||||
def_fft 512, 256, 128
|
||||
def_fft 1024, 512, 256
|
||||
def_fft 2048, 1024, 512
|
||||
def_fft 4096, 2048, 1024
|
||||
def_fft 8192, 4096, 2048
|
||||
def_fft 16384, 8192, 4096
|
||||
def_fft 32768, 16384, 8192
|
||||
def_fft 65536, 32768, 16384
|
||||
def_fft 32, 16, 8
|
||||
def_fft 64, 32, 16
|
||||
def_fft 128, 64, 32
|
||||
def_fft 256, 128, 64
|
||||
def_fft 512, 256, 128
|
||||
def_fft 1024, 512, 256
|
||||
def_fft 2048, 1024, 512
|
||||
def_fft 4096, 2048, 1024
|
||||
def_fft 8192, 4096, 2048
|
||||
def_fft 16384, 8192, 4096
|
||||
def_fft 32768, 16384, 8192
|
||||
def_fft 65536, 32768, 16384
|
||||
|
||||
function ff_fft_calc_neon, export=1
|
||||
prfm pldl1keep, [x1]
|
||||
|
||||
+205
-205
@@ -36,11 +36,11 @@ function ff_\type\()_\codec\()_chroma_mc8_neon, export=1
|
||||
lsl w9, w9, #3
|
||||
lsl w10, w10, #1
|
||||
add w9, w9, w10
|
||||
add x6, x6, w9, uxtw
|
||||
ld1r {v22.8h}, [x6]
|
||||
add x6, x6, w9, UXTW
|
||||
ld1r {v22.8H}, [x6]
|
||||
.endif
|
||||
.ifc \codec,vc1
|
||||
movi v22.8h, #28
|
||||
movi v22.8H, #28
|
||||
.endif
|
||||
mul w7, w4, w5
|
||||
lsl w14, w5, #3
|
||||
@@ -53,139 +53,139 @@ function ff_\type\()_\codec\()_chroma_mc8_neon, export=1
|
||||
add w4, w4, #64
|
||||
b.eq 2f
|
||||
|
||||
dup v0.8b, w4
|
||||
dup v1.8b, w12
|
||||
ld1 {v4.8b, v5.8b}, [x1], x2
|
||||
dup v2.8b, w6
|
||||
dup v3.8b, w7
|
||||
ext v5.8b, v4.8b, v5.8b, #1
|
||||
1: ld1 {v6.8b, v7.8b}, [x1], x2
|
||||
umull v16.8h, v4.8b, v0.8b
|
||||
umlal v16.8h, v5.8b, v1.8b
|
||||
ext v7.8b, v6.8b, v7.8b, #1
|
||||
ld1 {v4.8b, v5.8b}, [x1], x2
|
||||
umlal v16.8h, v6.8b, v2.8b
|
||||
dup v0.8B, w4
|
||||
dup v1.8B, w12
|
||||
ld1 {v4.8B, v5.8B}, [x1], x2
|
||||
dup v2.8B, w6
|
||||
dup v3.8B, w7
|
||||
ext v5.8B, v4.8B, v5.8B, #1
|
||||
1: ld1 {v6.8B, v7.8B}, [x1], x2
|
||||
umull v16.8H, v4.8B, v0.8B
|
||||
umlal v16.8H, v5.8B, v1.8B
|
||||
ext v7.8B, v6.8B, v7.8B, #1
|
||||
ld1 {v4.8B, v5.8B}, [x1], x2
|
||||
umlal v16.8H, v6.8B, v2.8B
|
||||
prfm pldl1strm, [x1]
|
||||
ext v5.8b, v4.8b, v5.8b, #1
|
||||
umlal v16.8h, v7.8b, v3.8b
|
||||
umull v17.8h, v6.8b, v0.8b
|
||||
ext v5.8B, v4.8B, v5.8B, #1
|
||||
umlal v16.8H, v7.8B, v3.8B
|
||||
umull v17.8H, v6.8B, v0.8B
|
||||
subs w3, w3, #2
|
||||
umlal v17.8h, v7.8b, v1.8b
|
||||
umlal v17.8h, v4.8b, v2.8b
|
||||
umlal v17.8h, v5.8b, v3.8b
|
||||
umlal v17.8H, v7.8B, v1.8B
|
||||
umlal v17.8H, v4.8B, v2.8B
|
||||
umlal v17.8H, v5.8B, v3.8B
|
||||
prfm pldl1strm, [x1, x2]
|
||||
.ifc \codec,h264
|
||||
rshrn v16.8b, v16.8h, #6
|
||||
rshrn v17.8b, v17.8h, #6
|
||||
rshrn v16.8B, v16.8H, #6
|
||||
rshrn v17.8B, v17.8H, #6
|
||||
.else
|
||||
add v16.8h, v16.8h, v22.8h
|
||||
add v17.8h, v17.8h, v22.8h
|
||||
shrn v16.8b, v16.8h, #6
|
||||
shrn v17.8b, v17.8h, #6
|
||||
add v16.8H, v16.8H, v22.8H
|
||||
add v17.8H, v17.8H, v22.8H
|
||||
shrn v16.8B, v16.8H, #6
|
||||
shrn v17.8B, v17.8H, #6
|
||||
.endif
|
||||
.ifc \type,avg
|
||||
ld1 {v20.8b}, [x8], x2
|
||||
ld1 {v21.8b}, [x8], x2
|
||||
urhadd v16.8b, v16.8b, v20.8b
|
||||
urhadd v17.8b, v17.8b, v21.8b
|
||||
ld1 {v20.8B}, [x8], x2
|
||||
ld1 {v21.8B}, [x8], x2
|
||||
urhadd v16.8B, v16.8B, v20.8B
|
||||
urhadd v17.8B, v17.8B, v21.8B
|
||||
.endif
|
||||
st1 {v16.8b}, [x0], x2
|
||||
st1 {v17.8b}, [x0], x2
|
||||
st1 {v16.8B}, [x0], x2
|
||||
st1 {v17.8B}, [x0], x2
|
||||
b.gt 1b
|
||||
ret
|
||||
|
||||
2: adds w12, w12, w6
|
||||
dup v0.8b, w4
|
||||
dup v0.8B, w4
|
||||
b.eq 5f
|
||||
tst w6, w6
|
||||
dup v1.8b, w12
|
||||
dup v1.8B, w12
|
||||
b.eq 4f
|
||||
|
||||
ld1 {v4.8b}, [x1], x2
|
||||
3: ld1 {v6.8b}, [x1], x2
|
||||
umull v16.8h, v4.8b, v0.8b
|
||||
umlal v16.8h, v6.8b, v1.8b
|
||||
ld1 {v4.8b}, [x1], x2
|
||||
umull v17.8h, v6.8b, v0.8b
|
||||
umlal v17.8h, v4.8b, v1.8b
|
||||
ld1 {v4.8B}, [x1], x2
|
||||
3: ld1 {v6.8B}, [x1], x2
|
||||
umull v16.8H, v4.8B, v0.8B
|
||||
umlal v16.8H, v6.8B, v1.8B
|
||||
ld1 {v4.8B}, [x1], x2
|
||||
umull v17.8H, v6.8B, v0.8B
|
||||
umlal v17.8H, v4.8B, v1.8B
|
||||
prfm pldl1strm, [x1]
|
||||
.ifc \codec,h264
|
||||
rshrn v16.8b, v16.8h, #6
|
||||
rshrn v17.8b, v17.8h, #6
|
||||
rshrn v16.8B, v16.8H, #6
|
||||
rshrn v17.8B, v17.8H, #6
|
||||
.else
|
||||
add v16.8h, v16.8h, v22.8h
|
||||
add v17.8h, v17.8h, v22.8h
|
||||
shrn v16.8b, v16.8h, #6
|
||||
shrn v17.8b, v17.8h, #6
|
||||
add v16.8H, v16.8H, v22.8H
|
||||
add v17.8H, v17.8H, v22.8H
|
||||
shrn v16.8B, v16.8H, #6
|
||||
shrn v17.8B, v17.8H, #6
|
||||
.endif
|
||||
prfm pldl1strm, [x1, x2]
|
||||
.ifc \type,avg
|
||||
ld1 {v20.8b}, [x8], x2
|
||||
ld1 {v21.8b}, [x8], x2
|
||||
urhadd v16.8b, v16.8b, v20.8b
|
||||
urhadd v17.8b, v17.8b, v21.8b
|
||||
ld1 {v20.8B}, [x8], x2
|
||||
ld1 {v21.8B}, [x8], x2
|
||||
urhadd v16.8B, v16.8B, v20.8B
|
||||
urhadd v17.8B, v17.8B, v21.8B
|
||||
.endif
|
||||
subs w3, w3, #2
|
||||
st1 {v16.8b}, [x0], x2
|
||||
st1 {v17.8b}, [x0], x2
|
||||
st1 {v16.8B}, [x0], x2
|
||||
st1 {v17.8B}, [x0], x2
|
||||
b.gt 3b
|
||||
ret
|
||||
|
||||
4: ld1 {v4.8b, v5.8b}, [x1], x2
|
||||
ld1 {v6.8b, v7.8b}, [x1], x2
|
||||
ext v5.8b, v4.8b, v5.8b, #1
|
||||
ext v7.8b, v6.8b, v7.8b, #1
|
||||
4: ld1 {v4.8B, v5.8B}, [x1], x2
|
||||
ld1 {v6.8B, v7.8B}, [x1], x2
|
||||
ext v5.8B, v4.8B, v5.8B, #1
|
||||
ext v7.8B, v6.8B, v7.8B, #1
|
||||
prfm pldl1strm, [x1]
|
||||
subs w3, w3, #2
|
||||
umull v16.8h, v4.8b, v0.8b
|
||||
umlal v16.8h, v5.8b, v1.8b
|
||||
umull v17.8h, v6.8b, v0.8b
|
||||
umlal v17.8h, v7.8b, v1.8b
|
||||
umull v16.8H, v4.8B, v0.8B
|
||||
umlal v16.8H, v5.8B, v1.8B
|
||||
umull v17.8H, v6.8B, v0.8B
|
||||
umlal v17.8H, v7.8B, v1.8B
|
||||
prfm pldl1strm, [x1, x2]
|
||||
.ifc \codec,h264
|
||||
rshrn v16.8b, v16.8h, #6
|
||||
rshrn v17.8b, v17.8h, #6
|
||||
rshrn v16.8B, v16.8H, #6
|
||||
rshrn v17.8B, v17.8H, #6
|
||||
.else
|
||||
add v16.8h, v16.8h, v22.8h
|
||||
add v17.8h, v17.8h, v22.8h
|
||||
shrn v16.8b, v16.8h, #6
|
||||
shrn v17.8b, v17.8h, #6
|
||||
add v16.8H, v16.8H, v22.8H
|
||||
add v17.8H, v17.8H, v22.8H
|
||||
shrn v16.8B, v16.8H, #6
|
||||
shrn v17.8B, v17.8H, #6
|
||||
.endif
|
||||
.ifc \type,avg
|
||||
ld1 {v20.8b}, [x8], x2
|
||||
ld1 {v21.8b}, [x8], x2
|
||||
urhadd v16.8b, v16.8b, v20.8b
|
||||
urhadd v17.8b, v17.8b, v21.8b
|
||||
ld1 {v20.8B}, [x8], x2
|
||||
ld1 {v21.8B}, [x8], x2
|
||||
urhadd v16.8B, v16.8B, v20.8B
|
||||
urhadd v17.8B, v17.8B, v21.8B
|
||||
.endif
|
||||
st1 {v16.8b}, [x0], x2
|
||||
st1 {v17.8b}, [x0], x2
|
||||
st1 {v16.8B}, [x0], x2
|
||||
st1 {v17.8B}, [x0], x2
|
||||
b.gt 4b
|
||||
ret
|
||||
|
||||
5: ld1 {v4.8b}, [x1], x2
|
||||
ld1 {v5.8b}, [x1], x2
|
||||
5: ld1 {v4.8B}, [x1], x2
|
||||
ld1 {v5.8B}, [x1], x2
|
||||
prfm pldl1strm, [x1]
|
||||
subs w3, w3, #2
|
||||
umull v16.8h, v4.8b, v0.8b
|
||||
umull v17.8h, v5.8b, v0.8b
|
||||
umull v16.8H, v4.8B, v0.8B
|
||||
umull v17.8H, v5.8B, v0.8B
|
||||
prfm pldl1strm, [x1, x2]
|
||||
.ifc \codec,h264
|
||||
rshrn v16.8b, v16.8h, #6
|
||||
rshrn v17.8b, v17.8h, #6
|
||||
rshrn v16.8B, v16.8H, #6
|
||||
rshrn v17.8B, v17.8H, #6
|
||||
.else
|
||||
add v16.8h, v16.8h, v22.8h
|
||||
add v17.8h, v17.8h, v22.8h
|
||||
shrn v16.8b, v16.8h, #6
|
||||
shrn v17.8b, v17.8h, #6
|
||||
add v16.8H, v16.8H, v22.8H
|
||||
add v17.8H, v17.8H, v22.8H
|
||||
shrn v16.8B, v16.8H, #6
|
||||
shrn v17.8B, v17.8H, #6
|
||||
.endif
|
||||
.ifc \type,avg
|
||||
ld1 {v20.8b}, [x8], x2
|
||||
ld1 {v21.8b}, [x8], x2
|
||||
urhadd v16.8b, v16.8b, v20.8b
|
||||
urhadd v17.8b, v17.8b, v21.8b
|
||||
ld1 {v20.8B}, [x8], x2
|
||||
ld1 {v21.8B}, [x8], x2
|
||||
urhadd v16.8B, v16.8B, v20.8B
|
||||
urhadd v17.8B, v17.8B, v21.8B
|
||||
.endif
|
||||
st1 {v16.8b}, [x0], x2
|
||||
st1 {v17.8b}, [x0], x2
|
||||
st1 {v16.8B}, [x0], x2
|
||||
st1 {v17.8B}, [x0], x2
|
||||
b.gt 5b
|
||||
ret
|
||||
endfunc
|
||||
@@ -206,11 +206,11 @@ function ff_\type\()_\codec\()_chroma_mc4_neon, export=1
|
||||
lsl w9, w9, #3
|
||||
lsl w10, w10, #1
|
||||
add w9, w9, w10
|
||||
add x6, x6, w9, uxtw
|
||||
ld1r {v22.8h}, [x6]
|
||||
add x6, x6, w9, UXTW
|
||||
ld1r {v22.8H}, [x6]
|
||||
.endif
|
||||
.ifc \codec,vc1
|
||||
movi v22.8h, #28
|
||||
movi v22.8H, #28
|
||||
.endif
|
||||
mul w7, w4, w5
|
||||
lsl w14, w5, #3
|
||||
@@ -223,133 +223,133 @@ function ff_\type\()_\codec\()_chroma_mc4_neon, export=1
|
||||
add w4, w4, #64
|
||||
b.eq 2f
|
||||
|
||||
dup v24.8b, w4
|
||||
dup v25.8b, w12
|
||||
ld1 {v4.8b}, [x1], x2
|
||||
dup v26.8b, w6
|
||||
dup v27.8b, w7
|
||||
ext v5.8b, v4.8b, v5.8b, #1
|
||||
trn1 v0.2s, v24.2s, v25.2s
|
||||
trn1 v2.2s, v26.2s, v27.2s
|
||||
trn1 v4.2s, v4.2s, v5.2s
|
||||
1: ld1 {v6.8b}, [x1], x2
|
||||
ext v7.8b, v6.8b, v7.8b, #1
|
||||
trn1 v6.2s, v6.2s, v7.2s
|
||||
umull v18.8h, v4.8b, v0.8b
|
||||
umlal v18.8h, v6.8b, v2.8b
|
||||
ld1 {v4.8b}, [x1], x2
|
||||
ext v5.8b, v4.8b, v5.8b, #1
|
||||
trn1 v4.2s, v4.2s, v5.2s
|
||||
dup v24.8B, w4
|
||||
dup v25.8B, w12
|
||||
ld1 {v4.8B}, [x1], x2
|
||||
dup v26.8B, w6
|
||||
dup v27.8B, w7
|
||||
ext v5.8B, v4.8B, v5.8B, #1
|
||||
trn1 v0.2S, v24.2S, v25.2S
|
||||
trn1 v2.2S, v26.2S, v27.2S
|
||||
trn1 v4.2S, v4.2S, v5.2S
|
||||
1: ld1 {v6.8B}, [x1], x2
|
||||
ext v7.8B, v6.8B, v7.8B, #1
|
||||
trn1 v6.2S, v6.2S, v7.2S
|
||||
umull v18.8H, v4.8B, v0.8B
|
||||
umlal v18.8H, v6.8B, v2.8B
|
||||
ld1 {v4.8B}, [x1], x2
|
||||
ext v5.8B, v4.8B, v5.8B, #1
|
||||
trn1 v4.2S, v4.2S, v5.2S
|
||||
prfm pldl1strm, [x1]
|
||||
umull v19.8h, v6.8b, v0.8b
|
||||
umlal v19.8h, v4.8b, v2.8b
|
||||
trn1 v30.2d, v18.2d, v19.2d
|
||||
trn2 v31.2d, v18.2d, v19.2d
|
||||
add v18.8h, v30.8h, v31.8h
|
||||
umull v19.8H, v6.8B, v0.8B
|
||||
umlal v19.8H, v4.8B, v2.8B
|
||||
trn1 v30.2D, v18.2D, v19.2D
|
||||
trn2 v31.2D, v18.2D, v19.2D
|
||||
add v18.8H, v30.8H, v31.8H
|
||||
.ifc \codec,h264
|
||||
rshrn v16.8b, v18.8h, #6
|
||||
rshrn v16.8B, v18.8H, #6
|
||||
.else
|
||||
add v18.8h, v18.8h, v22.8h
|
||||
shrn v16.8b, v18.8h, #6
|
||||
add v18.8H, v18.8H, v22.8H
|
||||
shrn v16.8B, v18.8H, #6
|
||||
.endif
|
||||
subs w3, w3, #2
|
||||
prfm pldl1strm, [x1, x2]
|
||||
.ifc \type,avg
|
||||
ld1 {v20.s}[0], [x8], x2
|
||||
ld1 {v20.s}[1], [x8], x2
|
||||
urhadd v16.8b, v16.8b, v20.8b
|
||||
ld1 {v20.S}[0], [x8], x2
|
||||
ld1 {v20.S}[1], [x8], x2
|
||||
urhadd v16.8B, v16.8B, v20.8B
|
||||
.endif
|
||||
st1 {v16.s}[0], [x0], x2
|
||||
st1 {v16.s}[1], [x0], x2
|
||||
st1 {v16.S}[0], [x0], x2
|
||||
st1 {v16.S}[1], [x0], x2
|
||||
b.gt 1b
|
||||
ret
|
||||
|
||||
2: adds w12, w12, w6
|
||||
dup v30.8b, w4
|
||||
dup v30.8B, w4
|
||||
b.eq 5f
|
||||
tst w6, w6
|
||||
dup v31.8b, w12
|
||||
trn1 v0.2s, v30.2s, v31.2s
|
||||
trn2 v1.2s, v30.2s, v31.2s
|
||||
dup v31.8B, w12
|
||||
trn1 v0.2S, v30.2S, v31.2S
|
||||
trn2 v1.2S, v30.2S, v31.2S
|
||||
b.eq 4f
|
||||
|
||||
ext v1.8b, v0.8b, v1.8b, #4
|
||||
ld1 {v4.s}[0], [x1], x2
|
||||
3: ld1 {v4.s}[1], [x1], x2
|
||||
umull v18.8h, v4.8b, v0.8b
|
||||
ld1 {v4.s}[0], [x1], x2
|
||||
umull v19.8h, v4.8b, v1.8b
|
||||
trn1 v30.2d, v18.2d, v19.2d
|
||||
trn2 v31.2d, v18.2d, v19.2d
|
||||
add v18.8h, v30.8h, v31.8h
|
||||
ext v1.8B, v0.8B, v1.8B, #4
|
||||
ld1 {v4.S}[0], [x1], x2
|
||||
3: ld1 {v4.S}[1], [x1], x2
|
||||
umull v18.8H, v4.8B, v0.8B
|
||||
ld1 {v4.S}[0], [x1], x2
|
||||
umull v19.8H, v4.8B, v1.8B
|
||||
trn1 v30.2D, v18.2D, v19.2D
|
||||
trn2 v31.2D, v18.2D, v19.2D
|
||||
add v18.8H, v30.8H, v31.8H
|
||||
prfm pldl1strm, [x1]
|
||||
.ifc \codec,h264
|
||||
rshrn v16.8b, v18.8h, #6
|
||||
rshrn v16.8B, v18.8H, #6
|
||||
.else
|
||||
add v18.8h, v18.8h, v22.8h
|
||||
shrn v16.8b, v18.8h, #6
|
||||
add v18.8H, v18.8H, v22.8H
|
||||
shrn v16.8B, v18.8H, #6
|
||||
.endif
|
||||
.ifc \type,avg
|
||||
ld1 {v20.s}[0], [x8], x2
|
||||
ld1 {v20.s}[1], [x8], x2
|
||||
urhadd v16.8b, v16.8b, v20.8b
|
||||
ld1 {v20.S}[0], [x8], x2
|
||||
ld1 {v20.S}[1], [x8], x2
|
||||
urhadd v16.8B, v16.8B, v20.8B
|
||||
.endif
|
||||
subs w3, w3, #2
|
||||
prfm pldl1strm, [x1, x2]
|
||||
st1 {v16.s}[0], [x0], x2
|
||||
st1 {v16.s}[1], [x0], x2
|
||||
st1 {v16.S}[0], [x0], x2
|
||||
st1 {v16.S}[1], [x0], x2
|
||||
b.gt 3b
|
||||
ret
|
||||
|
||||
4: ld1 {v4.8b}, [x1], x2
|
||||
ld1 {v6.8b}, [x1], x2
|
||||
ext v5.8b, v4.8b, v5.8b, #1
|
||||
ext v7.8b, v6.8b, v7.8b, #1
|
||||
trn1 v4.2s, v4.2s, v5.2s
|
||||
trn1 v6.2s, v6.2s, v7.2s
|
||||
umull v18.8h, v4.8b, v0.8b
|
||||
umull v19.8h, v6.8b, v0.8b
|
||||
4: ld1 {v4.8B}, [x1], x2
|
||||
ld1 {v6.8B}, [x1], x2
|
||||
ext v5.8B, v4.8B, v5.8B, #1
|
||||
ext v7.8B, v6.8B, v7.8B, #1
|
||||
trn1 v4.2S, v4.2S, v5.2S
|
||||
trn1 v6.2S, v6.2S, v7.2S
|
||||
umull v18.8H, v4.8B, v0.8B
|
||||
umull v19.8H, v6.8B, v0.8B
|
||||
subs w3, w3, #2
|
||||
trn1 v30.2d, v18.2d, v19.2d
|
||||
trn2 v31.2d, v18.2d, v19.2d
|
||||
add v18.8h, v30.8h, v31.8h
|
||||
trn1 v30.2D, v18.2D, v19.2D
|
||||
trn2 v31.2D, v18.2D, v19.2D
|
||||
add v18.8H, v30.8H, v31.8H
|
||||
prfm pldl1strm, [x1]
|
||||
.ifc \codec,h264
|
||||
rshrn v16.8b, v18.8h, #6
|
||||
rshrn v16.8B, v18.8H, #6
|
||||
.else
|
||||
add v18.8h, v18.8h, v22.8h
|
||||
shrn v16.8b, v18.8h, #6
|
||||
add v18.8H, v18.8H, v22.8H
|
||||
shrn v16.8B, v18.8H, #6
|
||||
.endif
|
||||
.ifc \type,avg
|
||||
ld1 {v20.s}[0], [x8], x2
|
||||
ld1 {v20.s}[1], [x8], x2
|
||||
urhadd v16.8b, v16.8b, v20.8b
|
||||
ld1 {v20.S}[0], [x8], x2
|
||||
ld1 {v20.S}[1], [x8], x2
|
||||
urhadd v16.8B, v16.8B, v20.8B
|
||||
.endif
|
||||
prfm pldl1strm, [x1]
|
||||
st1 {v16.s}[0], [x0], x2
|
||||
st1 {v16.s}[1], [x0], x2
|
||||
st1 {v16.S}[0], [x0], x2
|
||||
st1 {v16.S}[1], [x0], x2
|
||||
b.gt 4b
|
||||
ret
|
||||
|
||||
5: ld1 {v4.s}[0], [x1], x2
|
||||
ld1 {v4.s}[1], [x1], x2
|
||||
umull v18.8h, v4.8b, v30.8b
|
||||
5: ld1 {v4.S}[0], [x1], x2
|
||||
ld1 {v4.S}[1], [x1], x2
|
||||
umull v18.8H, v4.8B, v30.8B
|
||||
subs w3, w3, #2
|
||||
prfm pldl1strm, [x1]
|
||||
.ifc \codec,h264
|
||||
rshrn v16.8b, v18.8h, #6
|
||||
rshrn v16.8B, v18.8H, #6
|
||||
.else
|
||||
add v18.8h, v18.8h, v22.8h
|
||||
shrn v16.8b, v18.8h, #6
|
||||
add v18.8H, v18.8H, v22.8H
|
||||
shrn v16.8B, v18.8H, #6
|
||||
.endif
|
||||
.ifc \type,avg
|
||||
ld1 {v20.s}[0], [x8], x2
|
||||
ld1 {v20.s}[1], [x8], x2
|
||||
urhadd v16.8b, v16.8b, v20.8b
|
||||
ld1 {v20.S}[0], [x8], x2
|
||||
ld1 {v20.S}[1], [x8], x2
|
||||
urhadd v16.8B, v16.8B, v20.8B
|
||||
.endif
|
||||
prfm pldl1strm, [x1]
|
||||
st1 {v16.s}[0], [x0], x2
|
||||
st1 {v16.s}[1], [x0], x2
|
||||
st1 {v16.S}[0], [x0], x2
|
||||
st1 {v16.S}[1], [x0], x2
|
||||
b.gt 5b
|
||||
ret
|
||||
endfunc
|
||||
@@ -370,51 +370,51 @@ function ff_\type\()_h264_chroma_mc2_neon, export=1
|
||||
sub w4, w7, w13
|
||||
sub w4, w4, w14
|
||||
add w4, w4, #64
|
||||
dup v0.8b, w4
|
||||
dup v2.8b, w12
|
||||
dup v1.8b, w6
|
||||
dup v3.8b, w7
|
||||
trn1 v0.4h, v0.4h, v2.4h
|
||||
trn1 v1.4h, v1.4h, v3.4h
|
||||
dup v0.8B, w4
|
||||
dup v2.8B, w12
|
||||
dup v1.8B, w6
|
||||
dup v3.8B, w7
|
||||
trn1 v0.4H, v0.4H, v2.4H
|
||||
trn1 v1.4H, v1.4H, v3.4H
|
||||
1:
|
||||
ld1 {v4.s}[0], [x1], x2
|
||||
ld1 {v4.s}[1], [x1], x2
|
||||
rev64 v5.2s, v4.2s
|
||||
ld1 {v5.s}[1], [x1]
|
||||
ext v6.8b, v4.8b, v5.8b, #1
|
||||
ext v7.8b, v5.8b, v4.8b, #1
|
||||
trn1 v4.4h, v4.4h, v6.4h
|
||||
trn1 v5.4h, v5.4h, v7.4h
|
||||
umull v16.8h, v4.8b, v0.8b
|
||||
umlal v16.8h, v5.8b, v1.8b
|
||||
ld1 {v4.S}[0], [x1], x2
|
||||
ld1 {v4.S}[1], [x1], x2
|
||||
rev64 v5.2S, v4.2S
|
||||
ld1 {v5.S}[1], [x1]
|
||||
ext v6.8B, v4.8B, v5.8B, #1
|
||||
ext v7.8B, v5.8B, v4.8B, #1
|
||||
trn1 v4.4H, v4.4H, v6.4H
|
||||
trn1 v5.4H, v5.4H, v7.4H
|
||||
umull v16.8H, v4.8B, v0.8B
|
||||
umlal v16.8H, v5.8B, v1.8B
|
||||
.ifc \type,avg
|
||||
ld1 {v18.h}[0], [x0], x2
|
||||
ld1 {v18.h}[2], [x0]
|
||||
ld1 {v18.H}[0], [x0], x2
|
||||
ld1 {v18.H}[2], [x0]
|
||||
sub x0, x0, x2
|
||||
.endif
|
||||
rev64 v17.4s, v16.4s
|
||||
add v16.8h, v16.8h, v17.8h
|
||||
rshrn v16.8b, v16.8h, #6
|
||||
rev64 v17.4S, v16.4S
|
||||
add v16.8H, v16.8H, v17.8H
|
||||
rshrn v16.8B, v16.8H, #6
|
||||
.ifc \type,avg
|
||||
urhadd v16.8b, v16.8b, v18.8b
|
||||
urhadd v16.8B, v16.8B, v18.8B
|
||||
.endif
|
||||
st1 {v16.h}[0], [x0], x2
|
||||
st1 {v16.h}[2], [x0], x2
|
||||
st1 {v16.H}[0], [x0], x2
|
||||
st1 {v16.H}[2], [x0], x2
|
||||
subs w3, w3, #2
|
||||
b.gt 1b
|
||||
ret
|
||||
|
||||
2:
|
||||
ld1 {v16.h}[0], [x1], x2
|
||||
ld1 {v16.h}[1], [x1], x2
|
||||
ld1 {v16.H}[0], [x1], x2
|
||||
ld1 {v16.H}[1], [x1], x2
|
||||
.ifc \type,avg
|
||||
ld1 {v18.h}[0], [x0], x2
|
||||
ld1 {v18.h}[1], [x0]
|
||||
ld1 {v18.H}[0], [x0], x2
|
||||
ld1 {v18.H}[1], [x0]
|
||||
sub x0, x0, x2
|
||||
urhadd v16.8b, v16.8b, v18.8b
|
||||
urhadd v16.8B, v16.8B, v18.8B
|
||||
.endif
|
||||
st1 {v16.h}[0], [x0], x2
|
||||
st1 {v16.h}[1], [x0], x2
|
||||
st1 {v16.H}[0], [x0], x2
|
||||
st1 {v16.H}[1], [x0], x2
|
||||
subs w3, w3, #2
|
||||
b.gt 2b
|
||||
ret
|
||||
|
||||
+531
-531
File diff suppressed because it is too large
Load Diff
+272
-272
@@ -27,114 +27,114 @@
|
||||
.macro lowpass_const r
|
||||
movz \r, #20, lsl #16
|
||||
movk \r, #5
|
||||
mov v6.s[0], \r
|
||||
mov v6.S[0], \r
|
||||
.endm
|
||||
|
||||
//trashes v0-v5
|
||||
.macro lowpass_8 r0, r1, r2, r3, d0, d1, narrow=1
|
||||
ext v2.8b, \r0\().8b, \r1\().8b, #2
|
||||
ext v3.8b, \r0\().8b, \r1\().8b, #3
|
||||
uaddl v2.8h, v2.8b, v3.8b
|
||||
ext v4.8b, \r0\().8b, \r1\().8b, #1
|
||||
ext v5.8b, \r0\().8b, \r1\().8b, #4
|
||||
uaddl v4.8h, v4.8b, v5.8b
|
||||
ext v1.8b, \r0\().8b, \r1\().8b, #5
|
||||
uaddl \d0\().8h, \r0\().8b, v1.8b
|
||||
ext v0.8b, \r2\().8b, \r3\().8b, #2
|
||||
mla \d0\().8h, v2.8h, v6.h[1]
|
||||
ext v1.8b, \r2\().8b, \r3\().8b, #3
|
||||
uaddl v0.8h, v0.8b, v1.8b
|
||||
ext v1.8b, \r2\().8b, \r3\().8b, #1
|
||||
mls \d0\().8h, v4.8h, v6.h[0]
|
||||
ext v3.8b, \r2\().8b, \r3\().8b, #4
|
||||
uaddl v1.8h, v1.8b, v3.8b
|
||||
ext v2.8b, \r2\().8b, \r3\().8b, #5
|
||||
uaddl \d1\().8h, \r2\().8b, v2.8b
|
||||
mla \d1\().8h, v0.8h, v6.h[1]
|
||||
mls \d1\().8h, v1.8h, v6.h[0]
|
||||
ext v2.8B, \r0\().8B, \r1\().8B, #2
|
||||
ext v3.8B, \r0\().8B, \r1\().8B, #3
|
||||
uaddl v2.8H, v2.8B, v3.8B
|
||||
ext v4.8B, \r0\().8B, \r1\().8B, #1
|
||||
ext v5.8B, \r0\().8B, \r1\().8B, #4
|
||||
uaddl v4.8H, v4.8B, v5.8B
|
||||
ext v1.8B, \r0\().8B, \r1\().8B, #5
|
||||
uaddl \d0\().8H, \r0\().8B, v1.8B
|
||||
ext v0.8B, \r2\().8B, \r3\().8B, #2
|
||||
mla \d0\().8H, v2.8H, v6.H[1]
|
||||
ext v1.8B, \r2\().8B, \r3\().8B, #3
|
||||
uaddl v0.8H, v0.8B, v1.8B
|
||||
ext v1.8B, \r2\().8B, \r3\().8B, #1
|
||||
mls \d0\().8H, v4.8H, v6.H[0]
|
||||
ext v3.8B, \r2\().8B, \r3\().8B, #4
|
||||
uaddl v1.8H, v1.8B, v3.8B
|
||||
ext v2.8B, \r2\().8B, \r3\().8B, #5
|
||||
uaddl \d1\().8H, \r2\().8B, v2.8B
|
||||
mla \d1\().8H, v0.8H, v6.H[1]
|
||||
mls \d1\().8H, v1.8H, v6.H[0]
|
||||
.if \narrow
|
||||
sqrshrun \d0\().8b, \d0\().8h, #5
|
||||
sqrshrun \d1\().8b, \d1\().8h, #5
|
||||
sqrshrun \d0\().8B, \d0\().8H, #5
|
||||
sqrshrun \d1\().8B, \d1\().8H, #5
|
||||
.endif
|
||||
.endm
|
||||
|
||||
//trashes v0-v5, v7, v30-v31
|
||||
.macro lowpass_8H r0, r1
|
||||
ext v0.16b, \r0\().16b, \r0\().16b, #2
|
||||
ext v1.16b, \r0\().16b, \r0\().16b, #3
|
||||
uaddl v0.8h, v0.8b, v1.8b
|
||||
ext v2.16b, \r0\().16b, \r0\().16b, #1
|
||||
ext v3.16b, \r0\().16b, \r0\().16b, #4
|
||||
uaddl v2.8h, v2.8b, v3.8b
|
||||
ext v30.16b, \r0\().16b, \r0\().16b, #5
|
||||
uaddl \r0\().8h, \r0\().8b, v30.8b
|
||||
ext v4.16b, \r1\().16b, \r1\().16b, #2
|
||||
mla \r0\().8h, v0.8h, v6.h[1]
|
||||
ext v5.16b, \r1\().16b, \r1\().16b, #3
|
||||
uaddl v4.8h, v4.8b, v5.8b
|
||||
ext v7.16b, \r1\().16b, \r1\().16b, #1
|
||||
mls \r0\().8h, v2.8h, v6.h[0]
|
||||
ext v0.16b, \r1\().16b, \r1\().16b, #4
|
||||
uaddl v7.8h, v7.8b, v0.8b
|
||||
ext v31.16b, \r1\().16b, \r1\().16b, #5
|
||||
uaddl \r1\().8h, \r1\().8b, v31.8b
|
||||
mla \r1\().8h, v4.8h, v6.h[1]
|
||||
mls \r1\().8h, v7.8h, v6.h[0]
|
||||
ext v0.16B, \r0\().16B, \r0\().16B, #2
|
||||
ext v1.16B, \r0\().16B, \r0\().16B, #3
|
||||
uaddl v0.8H, v0.8B, v1.8B
|
||||
ext v2.16B, \r0\().16B, \r0\().16B, #1
|
||||
ext v3.16B, \r0\().16B, \r0\().16B, #4
|
||||
uaddl v2.8H, v2.8B, v3.8B
|
||||
ext v30.16B, \r0\().16B, \r0\().16B, #5
|
||||
uaddl \r0\().8H, \r0\().8B, v30.8B
|
||||
ext v4.16B, \r1\().16B, \r1\().16B, #2
|
||||
mla \r0\().8H, v0.8H, v6.H[1]
|
||||
ext v5.16B, \r1\().16B, \r1\().16B, #3
|
||||
uaddl v4.8H, v4.8B, v5.8B
|
||||
ext v7.16B, \r1\().16B, \r1\().16B, #1
|
||||
mls \r0\().8H, v2.8H, v6.H[0]
|
||||
ext v0.16B, \r1\().16B, \r1\().16B, #4
|
||||
uaddl v7.8H, v7.8B, v0.8B
|
||||
ext v31.16B, \r1\().16B, \r1\().16B, #5
|
||||
uaddl \r1\().8H, \r1\().8B, v31.8B
|
||||
mla \r1\().8H, v4.8H, v6.H[1]
|
||||
mls \r1\().8H, v7.8H, v6.H[0]
|
||||
.endm
|
||||
|
||||
// trashes v2-v5, v30
|
||||
.macro lowpass_8_1 r0, r1, d0, narrow=1
|
||||
ext v2.8b, \r0\().8b, \r1\().8b, #2
|
||||
ext v3.8b, \r0\().8b, \r1\().8b, #3
|
||||
uaddl v2.8h, v2.8b, v3.8b
|
||||
ext v4.8b, \r0\().8b, \r1\().8b, #1
|
||||
ext v5.8b, \r0\().8b, \r1\().8b, #4
|
||||
uaddl v4.8h, v4.8b, v5.8b
|
||||
ext v30.8b, \r0\().8b, \r1\().8b, #5
|
||||
uaddl \d0\().8h, \r0\().8b, v30.8b
|
||||
mla \d0\().8h, v2.8h, v6.h[1]
|
||||
mls \d0\().8h, v4.8h, v6.h[0]
|
||||
ext v2.8B, \r0\().8B, \r1\().8B, #2
|
||||
ext v3.8B, \r0\().8B, \r1\().8B, #3
|
||||
uaddl v2.8H, v2.8B, v3.8B
|
||||
ext v4.8B, \r0\().8B, \r1\().8B, #1
|
||||
ext v5.8B, \r0\().8B, \r1\().8B, #4
|
||||
uaddl v4.8H, v4.8B, v5.8B
|
||||
ext v30.8B, \r0\().8B, \r1\().8B, #5
|
||||
uaddl \d0\().8H, \r0\().8B, v30.8B
|
||||
mla \d0\().8H, v2.8H, v6.H[1]
|
||||
mls \d0\().8H, v4.8H, v6.H[0]
|
||||
.if \narrow
|
||||
sqrshrun \d0\().8b, \d0\().8h, #5
|
||||
sqrshrun \d0\().8B, \d0\().8H, #5
|
||||
.endif
|
||||
.endm
|
||||
|
||||
// trashed v0-v7
|
||||
.macro lowpass_8.16 r0, r1, r2
|
||||
ext v1.16b, \r0\().16b, \r1\().16b, #4
|
||||
ext v0.16b, \r0\().16b, \r1\().16b, #6
|
||||
saddl v5.4s, v1.4h, v0.4h
|
||||
ext v2.16b, \r0\().16b, \r1\().16b, #2
|
||||
saddl2 v1.4s, v1.8h, v0.8h
|
||||
ext v3.16b, \r0\().16b, \r1\().16b, #8
|
||||
saddl v6.4s, v2.4h, v3.4h
|
||||
ext \r1\().16b, \r0\().16b, \r1\().16b, #10
|
||||
saddl2 v2.4s, v2.8h, v3.8h
|
||||
saddl v0.4s, \r0\().4h, \r1\().4h
|
||||
saddl2 v4.4s, \r0\().8h, \r1\().8h
|
||||
ext v1.16B, \r0\().16B, \r1\().16B, #4
|
||||
ext v0.16B, \r0\().16B, \r1\().16B, #6
|
||||
saddl v5.4S, v1.4H, v0.4H
|
||||
ext v2.16B, \r0\().16B, \r1\().16B, #2
|
||||
saddl2 v1.4S, v1.8H, v0.8H
|
||||
ext v3.16B, \r0\().16B, \r1\().16B, #8
|
||||
saddl v6.4S, v2.4H, v3.4H
|
||||
ext \r1\().16B, \r0\().16B, \r1\().16B, #10
|
||||
saddl2 v2.4S, v2.8H, v3.8H
|
||||
saddl v0.4S, \r0\().4H, \r1\().4H
|
||||
saddl2 v4.4S, \r0\().8H, \r1\().8H
|
||||
|
||||
shl v3.4s, v5.4s, #4
|
||||
shl v5.4s, v5.4s, #2
|
||||
shl v7.4s, v6.4s, #2
|
||||
add v5.4s, v5.4s, v3.4s
|
||||
add v6.4s, v6.4s, v7.4s
|
||||
shl v3.4S, v5.4S, #4
|
||||
shl v5.4S, v5.4S, #2
|
||||
shl v7.4S, v6.4S, #2
|
||||
add v5.4S, v5.4S, v3.4S
|
||||
add v6.4S, v6.4S, v7.4S
|
||||
|
||||
shl v3.4s, v1.4s, #4
|
||||
shl v1.4s, v1.4s, #2
|
||||
shl v7.4s, v2.4s, #2
|
||||
add v1.4s, v1.4s, v3.4s
|
||||
add v2.4s, v2.4s, v7.4s
|
||||
shl v3.4S, v1.4S, #4
|
||||
shl v1.4S, v1.4S, #2
|
||||
shl v7.4S, v2.4S, #2
|
||||
add v1.4S, v1.4S, v3.4S
|
||||
add v2.4S, v2.4S, v7.4S
|
||||
|
||||
add v5.4s, v5.4s, v0.4s
|
||||
sub v5.4s, v5.4s, v6.4s
|
||||
add v5.4S, v5.4S, v0.4S
|
||||
sub v5.4S, v5.4S, v6.4S
|
||||
|
||||
add v1.4s, v1.4s, v4.4s
|
||||
sub v1.4s, v1.4s, v2.4s
|
||||
add v1.4S, v1.4S, v4.4S
|
||||
sub v1.4S, v1.4S, v2.4S
|
||||
|
||||
rshrn v5.4h, v5.4s, #10
|
||||
rshrn2 v5.8h, v1.4s, #10
|
||||
rshrn v5.4H, v5.4S, #10
|
||||
rshrn2 v5.8H, v1.4S, #10
|
||||
|
||||
sqxtun \r2\().8b, v5.8h
|
||||
sqxtun \r2\().8B, v5.8H
|
||||
.endm
|
||||
|
||||
function put_h264_qpel16_h_lowpass_neon_packed
|
||||
@@ -163,19 +163,19 @@ function \type\()_h264_qpel16_h_lowpass_neon
|
||||
endfunc
|
||||
|
||||
function \type\()_h264_qpel8_h_lowpass_neon
|
||||
1: ld1 {v28.8b, v29.8b}, [x1], x2
|
||||
ld1 {v16.8b, v17.8b}, [x1], x2
|
||||
1: ld1 {v28.8B, v29.8B}, [x1], x2
|
||||
ld1 {v16.8B, v17.8B}, [x1], x2
|
||||
subs x12, x12, #2
|
||||
lowpass_8 v28, v29, v16, v17, v28, v16
|
||||
.ifc \type,avg
|
||||
ld1 {v2.8b}, [x0], x3
|
||||
urhadd v28.8b, v28.8b, v2.8b
|
||||
ld1 {v3.8b}, [x0]
|
||||
urhadd v16.8b, v16.8b, v3.8b
|
||||
ld1 {v2.8B}, [x0], x3
|
||||
urhadd v28.8B, v28.8B, v2.8B
|
||||
ld1 {v3.8B}, [x0]
|
||||
urhadd v16.8B, v16.8B, v3.8B
|
||||
sub x0, x0, x3
|
||||
.endif
|
||||
st1 {v28.8b}, [x0], x3
|
||||
st1 {v16.8b}, [x0], x3
|
||||
st1 {v28.8B}, [x0], x3
|
||||
st1 {v16.8B}, [x0], x3
|
||||
b.ne 1b
|
||||
ret
|
||||
endfunc
|
||||
@@ -200,23 +200,23 @@ function \type\()_h264_qpel16_h_lowpass_l2_neon
|
||||
endfunc
|
||||
|
||||
function \type\()_h264_qpel8_h_lowpass_l2_neon
|
||||
1: ld1 {v26.8b, v27.8b}, [x1], x2
|
||||
ld1 {v16.8b, v17.8b}, [x1], x2
|
||||
ld1 {v28.8b}, [x3], x2
|
||||
ld1 {v29.8b}, [x3], x2
|
||||
1: ld1 {v26.8B, v27.8B}, [x1], x2
|
||||
ld1 {v16.8B, v17.8B}, [x1], x2
|
||||
ld1 {v28.8B}, [x3], x2
|
||||
ld1 {v29.8B}, [x3], x2
|
||||
subs x12, x12, #2
|
||||
lowpass_8 v26, v27, v16, v17, v26, v27
|
||||
urhadd v26.8b, v26.8b, v28.8b
|
||||
urhadd v27.8b, v27.8b, v29.8b
|
||||
urhadd v26.8B, v26.8B, v28.8B
|
||||
urhadd v27.8B, v27.8B, v29.8B
|
||||
.ifc \type,avg
|
||||
ld1 {v2.8b}, [x0], x2
|
||||
urhadd v26.8b, v26.8b, v2.8b
|
||||
ld1 {v3.8b}, [x0]
|
||||
urhadd v27.8b, v27.8b, v3.8b
|
||||
ld1 {v2.8B}, [x0], x2
|
||||
urhadd v26.8B, v26.8B, v2.8B
|
||||
ld1 {v3.8B}, [x0]
|
||||
urhadd v27.8B, v27.8B, v3.8B
|
||||
sub x0, x0, x2
|
||||
.endif
|
||||
st1 {v26.8b}, [x0], x2
|
||||
st1 {v27.8b}, [x0], x2
|
||||
st1 {v26.8B}, [x0], x2
|
||||
st1 {v27.8B}, [x0], x2
|
||||
b.ne 1b
|
||||
ret
|
||||
endfunc
|
||||
@@ -257,19 +257,19 @@ function \type\()_h264_qpel16_v_lowpass_neon
|
||||
endfunc
|
||||
|
||||
function \type\()_h264_qpel8_v_lowpass_neon
|
||||
ld1 {v16.8b}, [x1], x3
|
||||
ld1 {v18.8b}, [x1], x3
|
||||
ld1 {v20.8b}, [x1], x3
|
||||
ld1 {v22.8b}, [x1], x3
|
||||
ld1 {v24.8b}, [x1], x3
|
||||
ld1 {v26.8b}, [x1], x3
|
||||
ld1 {v28.8b}, [x1], x3
|
||||
ld1 {v30.8b}, [x1], x3
|
||||
ld1 {v17.8b}, [x1], x3
|
||||
ld1 {v19.8b}, [x1], x3
|
||||
ld1 {v21.8b}, [x1], x3
|
||||
ld1 {v23.8b}, [x1], x3
|
||||
ld1 {v25.8b}, [x1]
|
||||
ld1 {v16.8B}, [x1], x3
|
||||
ld1 {v18.8B}, [x1], x3
|
||||
ld1 {v20.8B}, [x1], x3
|
||||
ld1 {v22.8B}, [x1], x3
|
||||
ld1 {v24.8B}, [x1], x3
|
||||
ld1 {v26.8B}, [x1], x3
|
||||
ld1 {v28.8B}, [x1], x3
|
||||
ld1 {v30.8B}, [x1], x3
|
||||
ld1 {v17.8B}, [x1], x3
|
||||
ld1 {v19.8B}, [x1], x3
|
||||
ld1 {v21.8B}, [x1], x3
|
||||
ld1 {v23.8B}, [x1], x3
|
||||
ld1 {v25.8B}, [x1]
|
||||
|
||||
transpose_8x8B v16, v18, v20, v22, v24, v26, v28, v30, v0, v1
|
||||
transpose_8x8B v17, v19, v21, v23, v25, v27, v29, v31, v0, v1
|
||||
@@ -280,33 +280,33 @@ function \type\()_h264_qpel8_v_lowpass_neon
|
||||
transpose_8x8B v16, v17, v18, v19, v20, v21, v22, v23, v0, v1
|
||||
|
||||
.ifc \type,avg
|
||||
ld1 {v24.8b}, [x0], x2
|
||||
urhadd v16.8b, v16.8b, v24.8b
|
||||
ld1 {v25.8b}, [x0], x2
|
||||
urhadd v17.8b, v17.8b, v25.8b
|
||||
ld1 {v26.8b}, [x0], x2
|
||||
urhadd v18.8b, v18.8b, v26.8b
|
||||
ld1 {v27.8b}, [x0], x2
|
||||
urhadd v19.8b, v19.8b, v27.8b
|
||||
ld1 {v28.8b}, [x0], x2
|
||||
urhadd v20.8b, v20.8b, v28.8b
|
||||
ld1 {v29.8b}, [x0], x2
|
||||
urhadd v21.8b, v21.8b, v29.8b
|
||||
ld1 {v30.8b}, [x0], x2
|
||||
urhadd v22.8b, v22.8b, v30.8b
|
||||
ld1 {v31.8b}, [x0], x2
|
||||
urhadd v23.8b, v23.8b, v31.8b
|
||||
ld1 {v24.8B}, [x0], x2
|
||||
urhadd v16.8B, v16.8B, v24.8B
|
||||
ld1 {v25.8B}, [x0], x2
|
||||
urhadd v17.8B, v17.8B, v25.8B
|
||||
ld1 {v26.8B}, [x0], x2
|
||||
urhadd v18.8B, v18.8B, v26.8B
|
||||
ld1 {v27.8B}, [x0], x2
|
||||
urhadd v19.8B, v19.8B, v27.8B
|
||||
ld1 {v28.8B}, [x0], x2
|
||||
urhadd v20.8B, v20.8B, v28.8B
|
||||
ld1 {v29.8B}, [x0], x2
|
||||
urhadd v21.8B, v21.8B, v29.8B
|
||||
ld1 {v30.8B}, [x0], x2
|
||||
urhadd v22.8B, v22.8B, v30.8B
|
||||
ld1 {v31.8B}, [x0], x2
|
||||
urhadd v23.8B, v23.8B, v31.8B
|
||||
sub x0, x0, x2, lsl #3
|
||||
.endif
|
||||
|
||||
st1 {v16.8b}, [x0], x2
|
||||
st1 {v17.8b}, [x0], x2
|
||||
st1 {v18.8b}, [x0], x2
|
||||
st1 {v19.8b}, [x0], x2
|
||||
st1 {v20.8b}, [x0], x2
|
||||
st1 {v21.8b}, [x0], x2
|
||||
st1 {v22.8b}, [x0], x2
|
||||
st1 {v23.8b}, [x0], x2
|
||||
st1 {v16.8B}, [x0], x2
|
||||
st1 {v17.8B}, [x0], x2
|
||||
st1 {v18.8B}, [x0], x2
|
||||
st1 {v19.8B}, [x0], x2
|
||||
st1 {v20.8B}, [x0], x2
|
||||
st1 {v21.8B}, [x0], x2
|
||||
st1 {v22.8B}, [x0], x2
|
||||
st1 {v23.8B}, [x0], x2
|
||||
|
||||
ret
|
||||
endfunc
|
||||
@@ -334,19 +334,19 @@ function \type\()_h264_qpel16_v_lowpass_l2_neon
|
||||
endfunc
|
||||
|
||||
function \type\()_h264_qpel8_v_lowpass_l2_neon
|
||||
ld1 {v16.8b}, [x1], x3
|
||||
ld1 {v18.8b}, [x1], x3
|
||||
ld1 {v20.8b}, [x1], x3
|
||||
ld1 {v22.8b}, [x1], x3
|
||||
ld1 {v24.8b}, [x1], x3
|
||||
ld1 {v26.8b}, [x1], x3
|
||||
ld1 {v28.8b}, [x1], x3
|
||||
ld1 {v30.8b}, [x1], x3
|
||||
ld1 {v17.8b}, [x1], x3
|
||||
ld1 {v19.8b}, [x1], x3
|
||||
ld1 {v21.8b}, [x1], x3
|
||||
ld1 {v23.8b}, [x1], x3
|
||||
ld1 {v25.8b}, [x1]
|
||||
ld1 {v16.8B}, [x1], x3
|
||||
ld1 {v18.8B}, [x1], x3
|
||||
ld1 {v20.8B}, [x1], x3
|
||||
ld1 {v22.8B}, [x1], x3
|
||||
ld1 {v24.8B}, [x1], x3
|
||||
ld1 {v26.8B}, [x1], x3
|
||||
ld1 {v28.8B}, [x1], x3
|
||||
ld1 {v30.8B}, [x1], x3
|
||||
ld1 {v17.8B}, [x1], x3
|
||||
ld1 {v19.8B}, [x1], x3
|
||||
ld1 {v21.8B}, [x1], x3
|
||||
ld1 {v23.8B}, [x1], x3
|
||||
ld1 {v25.8B}, [x1]
|
||||
|
||||
transpose_8x8B v16, v18, v20, v22, v24, v26, v28, v30, v0, v1
|
||||
transpose_8x8B v17, v19, v21, v23, v25, v27, v29, v31, v0, v1
|
||||
@@ -356,51 +356,51 @@ function \type\()_h264_qpel8_v_lowpass_l2_neon
|
||||
lowpass_8 v28, v29, v30, v31, v22, v23
|
||||
transpose_8x8B v16, v17, v18, v19, v20, v21, v22, v23, v0, v1
|
||||
|
||||
ld1 {v24.8b}, [x12], x2
|
||||
ld1 {v25.8b}, [x12], x2
|
||||
ld1 {v26.8b}, [x12], x2
|
||||
ld1 {v27.8b}, [x12], x2
|
||||
ld1 {v28.8b}, [x12], x2
|
||||
urhadd v16.8b, v24.8b, v16.8b
|
||||
urhadd v17.8b, v25.8b, v17.8b
|
||||
ld1 {v29.8b}, [x12], x2
|
||||
urhadd v18.8b, v26.8b, v18.8b
|
||||
urhadd v19.8b, v27.8b, v19.8b
|
||||
ld1 {v30.8b}, [x12], x2
|
||||
urhadd v20.8b, v28.8b, v20.8b
|
||||
urhadd v21.8b, v29.8b, v21.8b
|
||||
ld1 {v31.8b}, [x12], x2
|
||||
urhadd v22.8b, v30.8b, v22.8b
|
||||
urhadd v23.8b, v31.8b, v23.8b
|
||||
ld1 {v24.8B}, [x12], x2
|
||||
ld1 {v25.8B}, [x12], x2
|
||||
ld1 {v26.8B}, [x12], x2
|
||||
ld1 {v27.8B}, [x12], x2
|
||||
ld1 {v28.8B}, [x12], x2
|
||||
urhadd v16.8B, v24.8B, v16.8B
|
||||
urhadd v17.8B, v25.8B, v17.8B
|
||||
ld1 {v29.8B}, [x12], x2
|
||||
urhadd v18.8B, v26.8B, v18.8B
|
||||
urhadd v19.8B, v27.8B, v19.8B
|
||||
ld1 {v30.8B}, [x12], x2
|
||||
urhadd v20.8B, v28.8B, v20.8B
|
||||
urhadd v21.8B, v29.8B, v21.8B
|
||||
ld1 {v31.8B}, [x12], x2
|
||||
urhadd v22.8B, v30.8B, v22.8B
|
||||
urhadd v23.8B, v31.8B, v23.8B
|
||||
|
||||
.ifc \type,avg
|
||||
ld1 {v24.8b}, [x0], x3
|
||||
urhadd v16.8b, v16.8b, v24.8b
|
||||
ld1 {v25.8b}, [x0], x3
|
||||
urhadd v17.8b, v17.8b, v25.8b
|
||||
ld1 {v26.8b}, [x0], x3
|
||||
urhadd v18.8b, v18.8b, v26.8b
|
||||
ld1 {v27.8b}, [x0], x3
|
||||
urhadd v19.8b, v19.8b, v27.8b
|
||||
ld1 {v28.8b}, [x0], x3
|
||||
urhadd v20.8b, v20.8b, v28.8b
|
||||
ld1 {v29.8b}, [x0], x3
|
||||
urhadd v21.8b, v21.8b, v29.8b
|
||||
ld1 {v30.8b}, [x0], x3
|
||||
urhadd v22.8b, v22.8b, v30.8b
|
||||
ld1 {v31.8b}, [x0], x3
|
||||
urhadd v23.8b, v23.8b, v31.8b
|
||||
ld1 {v24.8B}, [x0], x3
|
||||
urhadd v16.8B, v16.8B, v24.8B
|
||||
ld1 {v25.8B}, [x0], x3
|
||||
urhadd v17.8B, v17.8B, v25.8B
|
||||
ld1 {v26.8B}, [x0], x3
|
||||
urhadd v18.8B, v18.8B, v26.8B
|
||||
ld1 {v27.8B}, [x0], x3
|
||||
urhadd v19.8B, v19.8B, v27.8B
|
||||
ld1 {v28.8B}, [x0], x3
|
||||
urhadd v20.8B, v20.8B, v28.8B
|
||||
ld1 {v29.8B}, [x0], x3
|
||||
urhadd v21.8B, v21.8B, v29.8B
|
||||
ld1 {v30.8B}, [x0], x3
|
||||
urhadd v22.8B, v22.8B, v30.8B
|
||||
ld1 {v31.8B}, [x0], x3
|
||||
urhadd v23.8B, v23.8B, v31.8B
|
||||
sub x0, x0, x3, lsl #3
|
||||
.endif
|
||||
|
||||
st1 {v16.8b}, [x0], x3
|
||||
st1 {v17.8b}, [x0], x3
|
||||
st1 {v18.8b}, [x0], x3
|
||||
st1 {v19.8b}, [x0], x3
|
||||
st1 {v20.8b}, [x0], x3
|
||||
st1 {v21.8b}, [x0], x3
|
||||
st1 {v22.8b}, [x0], x3
|
||||
st1 {v23.8b}, [x0], x3
|
||||
st1 {v16.8B}, [x0], x3
|
||||
st1 {v17.8B}, [x0], x3
|
||||
st1 {v18.8B}, [x0], x3
|
||||
st1 {v19.8B}, [x0], x3
|
||||
st1 {v20.8B}, [x0], x3
|
||||
st1 {v21.8B}, [x0], x3
|
||||
st1 {v22.8B}, [x0], x3
|
||||
st1 {v23.8B}, [x0], x3
|
||||
|
||||
ret
|
||||
endfunc
|
||||
@@ -411,19 +411,19 @@ endfunc
|
||||
|
||||
function put_h264_qpel8_hv_lowpass_neon_top
|
||||
lowpass_const w12
|
||||
ld1 {v16.8h}, [x1], x3
|
||||
ld1 {v17.8h}, [x1], x3
|
||||
ld1 {v18.8h}, [x1], x3
|
||||
ld1 {v19.8h}, [x1], x3
|
||||
ld1 {v20.8h}, [x1], x3
|
||||
ld1 {v21.8h}, [x1], x3
|
||||
ld1 {v22.8h}, [x1], x3
|
||||
ld1 {v23.8h}, [x1], x3
|
||||
ld1 {v24.8h}, [x1], x3
|
||||
ld1 {v25.8h}, [x1], x3
|
||||
ld1 {v26.8h}, [x1], x3
|
||||
ld1 {v27.8h}, [x1], x3
|
||||
ld1 {v28.8h}, [x1]
|
||||
ld1 {v16.8H}, [x1], x3
|
||||
ld1 {v17.8H}, [x1], x3
|
||||
ld1 {v18.8H}, [x1], x3
|
||||
ld1 {v19.8H}, [x1], x3
|
||||
ld1 {v20.8H}, [x1], x3
|
||||
ld1 {v21.8H}, [x1], x3
|
||||
ld1 {v22.8H}, [x1], x3
|
||||
ld1 {v23.8H}, [x1], x3
|
||||
ld1 {v24.8H}, [x1], x3
|
||||
ld1 {v25.8H}, [x1], x3
|
||||
ld1 {v26.8H}, [x1], x3
|
||||
ld1 {v27.8H}, [x1], x3
|
||||
ld1 {v28.8H}, [x1]
|
||||
lowpass_8H v16, v17
|
||||
lowpass_8H v18, v19
|
||||
lowpass_8H v20, v21
|
||||
@@ -447,7 +447,7 @@ function put_h264_qpel8_hv_lowpass_neon_top
|
||||
lowpass_8.16 v22, v30, v22
|
||||
lowpass_8.16 v23, v31, v23
|
||||
|
||||
transpose_8x8B v16, v17, v18, v19, v20, v21, v22, v23, v0, v1
|
||||
transpose_8x8B v16, v17, v18, v19, v20, v21, v22, v23, v0, v1
|
||||
|
||||
ret
|
||||
endfunc
|
||||
@@ -457,33 +457,33 @@ function \type\()_h264_qpel8_hv_lowpass_neon
|
||||
mov x10, x30
|
||||
bl put_h264_qpel8_hv_lowpass_neon_top
|
||||
.ifc \type,avg
|
||||
ld1 {v0.8b}, [x0], x2
|
||||
urhadd v16.8b, v16.8b, v0.8b
|
||||
ld1 {v1.8b}, [x0], x2
|
||||
urhadd v17.8b, v17.8b, v1.8b
|
||||
ld1 {v2.8b}, [x0], x2
|
||||
urhadd v18.8b, v18.8b, v2.8b
|
||||
ld1 {v3.8b}, [x0], x2
|
||||
urhadd v19.8b, v19.8b, v3.8b
|
||||
ld1 {v4.8b}, [x0], x2
|
||||
urhadd v20.8b, v20.8b, v4.8b
|
||||
ld1 {v5.8b}, [x0], x2
|
||||
urhadd v21.8b, v21.8b, v5.8b
|
||||
ld1 {v6.8b}, [x0], x2
|
||||
urhadd v22.8b, v22.8b, v6.8b
|
||||
ld1 {v7.8b}, [x0], x2
|
||||
urhadd v23.8b, v23.8b, v7.8b
|
||||
ld1 {v0.8B}, [x0], x2
|
||||
urhadd v16.8B, v16.8B, v0.8B
|
||||
ld1 {v1.8B}, [x0], x2
|
||||
urhadd v17.8B, v17.8B, v1.8B
|
||||
ld1 {v2.8B}, [x0], x2
|
||||
urhadd v18.8B, v18.8B, v2.8B
|
||||
ld1 {v3.8B}, [x0], x2
|
||||
urhadd v19.8B, v19.8B, v3.8B
|
||||
ld1 {v4.8B}, [x0], x2
|
||||
urhadd v20.8B, v20.8B, v4.8B
|
||||
ld1 {v5.8B}, [x0], x2
|
||||
urhadd v21.8B, v21.8B, v5.8B
|
||||
ld1 {v6.8B}, [x0], x2
|
||||
urhadd v22.8B, v22.8B, v6.8B
|
||||
ld1 {v7.8B}, [x0], x2
|
||||
urhadd v23.8B, v23.8B, v7.8B
|
||||
sub x0, x0, x2, lsl #3
|
||||
.endif
|
||||
|
||||
st1 {v16.8b}, [x0], x2
|
||||
st1 {v17.8b}, [x0], x2
|
||||
st1 {v18.8b}, [x0], x2
|
||||
st1 {v19.8b}, [x0], x2
|
||||
st1 {v20.8b}, [x0], x2
|
||||
st1 {v21.8b}, [x0], x2
|
||||
st1 {v22.8b}, [x0], x2
|
||||
st1 {v23.8b}, [x0], x2
|
||||
st1 {v16.8B}, [x0], x2
|
||||
st1 {v17.8B}, [x0], x2
|
||||
st1 {v18.8B}, [x0], x2
|
||||
st1 {v19.8B}, [x0], x2
|
||||
st1 {v20.8B}, [x0], x2
|
||||
st1 {v21.8B}, [x0], x2
|
||||
st1 {v22.8B}, [x0], x2
|
||||
st1 {v23.8B}, [x0], x2
|
||||
|
||||
ret x10
|
||||
endfunc
|
||||
@@ -497,45 +497,45 @@ function \type\()_h264_qpel8_hv_lowpass_l2_neon
|
||||
mov x10, x30
|
||||
bl put_h264_qpel8_hv_lowpass_neon_top
|
||||
|
||||
ld1 {v0.8b, v1.8b}, [x2], #16
|
||||
ld1 {v2.8b, v3.8b}, [x2], #16
|
||||
urhadd v0.8b, v0.8b, v16.8b
|
||||
urhadd v1.8b, v1.8b, v17.8b
|
||||
ld1 {v4.8b, v5.8b}, [x2], #16
|
||||
urhadd v2.8b, v2.8b, v18.8b
|
||||
urhadd v3.8b, v3.8b, v19.8b
|
||||
ld1 {v6.8b, v7.8b}, [x2], #16
|
||||
urhadd v4.8b, v4.8b, v20.8b
|
||||
urhadd v5.8b, v5.8b, v21.8b
|
||||
urhadd v6.8b, v6.8b, v22.8b
|
||||
urhadd v7.8b, v7.8b, v23.8b
|
||||
ld1 {v0.8B, v1.8B}, [x2], #16
|
||||
ld1 {v2.8B, v3.8B}, [x2], #16
|
||||
urhadd v0.8B, v0.8B, v16.8B
|
||||
urhadd v1.8B, v1.8B, v17.8B
|
||||
ld1 {v4.8B, v5.8B}, [x2], #16
|
||||
urhadd v2.8B, v2.8B, v18.8B
|
||||
urhadd v3.8B, v3.8B, v19.8B
|
||||
ld1 {v6.8B, v7.8B}, [x2], #16
|
||||
urhadd v4.8B, v4.8B, v20.8B
|
||||
urhadd v5.8B, v5.8B, v21.8B
|
||||
urhadd v6.8B, v6.8B, v22.8B
|
||||
urhadd v7.8B, v7.8B, v23.8B
|
||||
.ifc \type,avg
|
||||
ld1 {v16.8b}, [x0], x3
|
||||
urhadd v0.8b, v0.8b, v16.8b
|
||||
ld1 {v17.8b}, [x0], x3
|
||||
urhadd v1.8b, v1.8b, v17.8b
|
||||
ld1 {v18.8b}, [x0], x3
|
||||
urhadd v2.8b, v2.8b, v18.8b
|
||||
ld1 {v19.8b}, [x0], x3
|
||||
urhadd v3.8b, v3.8b, v19.8b
|
||||
ld1 {v20.8b}, [x0], x3
|
||||
urhadd v4.8b, v4.8b, v20.8b
|
||||
ld1 {v21.8b}, [x0], x3
|
||||
urhadd v5.8b, v5.8b, v21.8b
|
||||
ld1 {v22.8b}, [x0], x3
|
||||
urhadd v6.8b, v6.8b, v22.8b
|
||||
ld1 {v23.8b}, [x0], x3
|
||||
urhadd v7.8b, v7.8b, v23.8b
|
||||
ld1 {v16.8B}, [x0], x3
|
||||
urhadd v0.8B, v0.8B, v16.8B
|
||||
ld1 {v17.8B}, [x0], x3
|
||||
urhadd v1.8B, v1.8B, v17.8B
|
||||
ld1 {v18.8B}, [x0], x3
|
||||
urhadd v2.8B, v2.8B, v18.8B
|
||||
ld1 {v19.8B}, [x0], x3
|
||||
urhadd v3.8B, v3.8B, v19.8B
|
||||
ld1 {v20.8B}, [x0], x3
|
||||
urhadd v4.8B, v4.8B, v20.8B
|
||||
ld1 {v21.8B}, [x0], x3
|
||||
urhadd v5.8B, v5.8B, v21.8B
|
||||
ld1 {v22.8B}, [x0], x3
|
||||
urhadd v6.8B, v6.8B, v22.8B
|
||||
ld1 {v23.8B}, [x0], x3
|
||||
urhadd v7.8B, v7.8B, v23.8B
|
||||
sub x0, x0, x3, lsl #3
|
||||
.endif
|
||||
st1 {v0.8b}, [x0], x3
|
||||
st1 {v1.8b}, [x0], x3
|
||||
st1 {v2.8b}, [x0], x3
|
||||
st1 {v3.8b}, [x0], x3
|
||||
st1 {v4.8b}, [x0], x3
|
||||
st1 {v5.8b}, [x0], x3
|
||||
st1 {v6.8b}, [x0], x3
|
||||
st1 {v7.8b}, [x0], x3
|
||||
st1 {v0.8B}, [x0], x3
|
||||
st1 {v1.8B}, [x0], x3
|
||||
st1 {v2.8B}, [x0], x3
|
||||
st1 {v3.8B}, [x0], x3
|
||||
st1 {v4.8B}, [x0], x3
|
||||
st1 {v5.8B}, [x0], x3
|
||||
st1 {v6.8B}, [x0], x3
|
||||
st1 {v7.8B}, [x0], x3
|
||||
|
||||
ret x10
|
||||
endfunc
|
||||
@@ -579,8 +579,8 @@ function \type\()_h264_qpel16_hv_lowpass_l2_neon
|
||||
endfunc
|
||||
.endm
|
||||
|
||||
h264_qpel16_hv put
|
||||
h264_qpel16_hv avg
|
||||
h264_qpel16_hv put
|
||||
h264_qpel16_hv avg
|
||||
|
||||
.macro h264_qpel8 type
|
||||
function ff_\type\()_h264_qpel8_mc10_neon, export=1
|
||||
@@ -758,8 +758,8 @@ function ff_\type\()_h264_qpel8_mc33_neon, export=1
|
||||
endfunc
|
||||
.endm
|
||||
|
||||
h264_qpel8 put
|
||||
h264_qpel8 avg
|
||||
h264_qpel8 put
|
||||
h264_qpel8 avg
|
||||
|
||||
.macro h264_qpel16 type
|
||||
function ff_\type\()_h264_qpel16_mc10_neon, export=1
|
||||
@@ -930,5 +930,5 @@ function ff_\type\()_h264_qpel16_mc33_neon, export=1
|
||||
endfunc
|
||||
.endm
|
||||
|
||||
h264_qpel16 put
|
||||
h264_qpel16 avg
|
||||
h264_qpel16 put
|
||||
h264_qpel16 avg
|
||||
|
||||
@@ -38,10 +38,10 @@ const trans, align=4
|
||||
endconst
|
||||
|
||||
.macro clip10 in1, in2, c1, c2
|
||||
smax \in1, \in1, \c1
|
||||
smax \in2, \in2, \c1
|
||||
smin \in1, \in1, \c2
|
||||
smin \in2, \in2, \c2
|
||||
smax \in1, \in1, \c1
|
||||
smax \in2, \in2, \c1
|
||||
smin \in1, \in1, \c2
|
||||
smin \in2, \in2, \c2
|
||||
.endm
|
||||
|
||||
function ff_hevc_add_residual_4x4_8_neon, export=1
|
||||
@@ -50,13 +50,13 @@ function ff_hevc_add_residual_4x4_8_neon, export=1
|
||||
ld1 {v2.s}[1], [x0], x2
|
||||
ld1 {v2.s}[2], [x0], x2
|
||||
ld1 {v2.s}[3], [x0], x2
|
||||
sub x0, x0, x2, lsl #2
|
||||
uxtl v6.8h, v2.8b
|
||||
uxtl2 v7.8h, v2.16b
|
||||
sqadd v0.8h, v0.8h, v6.8h
|
||||
sqadd v1.8h, v1.8h, v7.8h
|
||||
sqxtun v0.8b, v0.8h
|
||||
sqxtun2 v0.16b, v1.8h
|
||||
sub x0, x0, x2, lsl #2
|
||||
uxtl v6.8h, v2.8b
|
||||
uxtl2 v7.8h, v2.16b
|
||||
sqadd v0.8h, v0.8h, v6.8h
|
||||
sqadd v1.8h, v1.8h, v7.8h
|
||||
sqxtun v0.8b, v0.8h
|
||||
sqxtun2 v0.16b, v1.8h
|
||||
st1 {v0.s}[0], [x0], x2
|
||||
st1 {v0.s}[1], [x0], x2
|
||||
st1 {v0.s}[2], [x0], x2
|
||||
@@ -70,12 +70,12 @@ function ff_hevc_add_residual_4x4_10_neon, export=1
|
||||
ld1 {v2.d}[0], [x12], x2
|
||||
ld1 {v2.d}[1], [x12], x2
|
||||
ld1 {v3.d}[0], [x12], x2
|
||||
sqadd v0.8h, v0.8h, v2.8h
|
||||
sqadd v0.8h, v0.8h, v2.8h
|
||||
ld1 {v3.d}[1], [x12], x2
|
||||
movi v4.8h, #0
|
||||
sqadd v1.8h, v1.8h, v3.8h
|
||||
mvni v5.8h, #0xFC, lsl #8 // movi #0x3FF
|
||||
clip10 v0.8h, v1.8h, v4.8h, v5.8h
|
||||
movi v4.8h, #0
|
||||
sqadd v1.8h, v1.8h, v3.8h
|
||||
mvni v5.8h, #0xFC, lsl #8 // movi #0x3FF
|
||||
clip10 v0.8h, v1.8h, v4.8h, v5.8h
|
||||
st1 {v0.d}[0], [x0], x2
|
||||
st1 {v0.d}[1], [x0], x2
|
||||
st1 {v1.d}[0], [x0], x2
|
||||
@@ -85,48 +85,48 @@ endfunc
|
||||
|
||||
function ff_hevc_add_residual_8x8_8_neon, export=1
|
||||
add x12, x0, x2
|
||||
add x2, x2, x2
|
||||
mov x3, #8
|
||||
1: subs x3, x3, #2
|
||||
add x2, x2, x2
|
||||
mov x3, #8
|
||||
1: subs x3, x3, #2
|
||||
ld1 {v2.d}[0], [x0]
|
||||
ld1 {v2.d}[1], [x12]
|
||||
uxtl v3.8h, v2.8b
|
||||
uxtl v3.8h, v2.8b
|
||||
ld1 {v0.8h-v1.8h}, [x1], #32
|
||||
uxtl2 v2.8h, v2.16b
|
||||
sqadd v0.8h, v0.8h, v3.8h
|
||||
sqadd v1.8h, v1.8h, v2.8h
|
||||
sqxtun v0.8b, v0.8h
|
||||
sqxtun2 v0.16b, v1.8h
|
||||
uxtl2 v2.8h, v2.16b
|
||||
sqadd v0.8h, v0.8h, v3.8h
|
||||
sqadd v1.8h, v1.8h, v2.8h
|
||||
sqxtun v0.8b, v0.8h
|
||||
sqxtun2 v0.16b, v1.8h
|
||||
st1 {v0.d}[0], [x0], x2
|
||||
st1 {v0.d}[1], [x12], x2
|
||||
bne 1b
|
||||
bne 1b
|
||||
ret
|
||||
endfunc
|
||||
|
||||
function ff_hevc_add_residual_8x8_10_neon, export=1
|
||||
add x12, x0, x2
|
||||
add x2, x2, x2
|
||||
mov x3, #8
|
||||
movi v4.8h, #0
|
||||
mvni v5.8h, #0xFC, lsl #8 // movi #0x3FF
|
||||
1: subs x3, x3, #2
|
||||
add x2, x2, x2
|
||||
mov x3, #8
|
||||
movi v4.8h, #0
|
||||
mvni v5.8h, #0xFC, lsl #8 // movi #0x3FF
|
||||
1: subs x3, x3, #2
|
||||
ld1 {v0.8h-v1.8h}, [x1], #32
|
||||
ld1 {v2.8h}, [x0]
|
||||
sqadd v0.8h, v0.8h, v2.8h
|
||||
sqadd v0.8h, v0.8h, v2.8h
|
||||
ld1 {v3.8h}, [x12]
|
||||
sqadd v1.8h, v1.8h, v3.8h
|
||||
clip10 v0.8h, v1.8h, v4.8h, v5.8h
|
||||
sqadd v1.8h, v1.8h, v3.8h
|
||||
clip10 v0.8h, v1.8h, v4.8h, v5.8h
|
||||
st1 {v0.8h}, [x0], x2
|
||||
st1 {v1.8h}, [x12], x2
|
||||
bne 1b
|
||||
bne 1b
|
||||
ret
|
||||
endfunc
|
||||
|
||||
function ff_hevc_add_residual_16x16_8_neon, export=1
|
||||
mov x3, #16
|
||||
mov x3, #16
|
||||
add x12, x0, x2
|
||||
add x2, x2, x2
|
||||
1: subs x3, x3, #2
|
||||
add x2, x2, x2
|
||||
1: subs x3, x3, #2
|
||||
ld1 {v16.16b}, [x0]
|
||||
ld1 {v0.8h-v3.8h}, [x1], #64
|
||||
ld1 {v19.16b}, [x12]
|
||||
@@ -134,47 +134,47 @@ function ff_hevc_add_residual_16x16_8_neon, export=1
|
||||
uxtl2 v18.8h, v16.16b
|
||||
uxtl v20.8h, v19.8b
|
||||
uxtl2 v21.8h, v19.16b
|
||||
sqadd v0.8h, v0.8h, v17.8h
|
||||
sqadd v1.8h, v1.8h, v18.8h
|
||||
sqadd v2.8h, v2.8h, v20.8h
|
||||
sqadd v3.8h, v3.8h, v21.8h
|
||||
sqxtun v0.8b, v0.8h
|
||||
sqadd v0.8h, v0.8h, v17.8h
|
||||
sqadd v1.8h, v1.8h, v18.8h
|
||||
sqadd v2.8h, v2.8h, v20.8h
|
||||
sqadd v3.8h, v3.8h, v21.8h
|
||||
sqxtun v0.8b, v0.8h
|
||||
sqxtun2 v0.16b, v1.8h
|
||||
sqxtun v1.8b, v2.8h
|
||||
sqxtun v1.8b, v2.8h
|
||||
sqxtun2 v1.16b, v3.8h
|
||||
st1 {v0.16b}, [x0], x2
|
||||
st1 {v1.16b}, [x12], x2
|
||||
bne 1b
|
||||
bne 1b
|
||||
ret
|
||||
endfunc
|
||||
|
||||
function ff_hevc_add_residual_16x16_10_neon, export=1
|
||||
mov x3, #16
|
||||
mov x3, #16
|
||||
movi v20.8h, #0
|
||||
mvni v21.8h, #0xFC, lsl #8 // movi #0x3FF
|
||||
add x12, x0, x2
|
||||
add x2, x2, x2
|
||||
1: subs x3, x3, #2
|
||||
add x2, x2, x2
|
||||
1: subs x3, x3, #2
|
||||
ld1 {v16.8h-v17.8h}, [x0]
|
||||
ld1 {v0.8h-v3.8h}, [x1], #64
|
||||
sqadd v0.8h, v0.8h, v16.8h
|
||||
sqadd v0.8h, v0.8h, v16.8h
|
||||
ld1 {v18.8h-v19.8h}, [x12]
|
||||
sqadd v1.8h, v1.8h, v17.8h
|
||||
sqadd v2.8h, v2.8h, v18.8h
|
||||
sqadd v3.8h, v3.8h, v19.8h
|
||||
clip10 v0.8h, v1.8h, v20.8h, v21.8h
|
||||
clip10 v2.8h, v3.8h, v20.8h, v21.8h
|
||||
sqadd v1.8h, v1.8h, v17.8h
|
||||
sqadd v2.8h, v2.8h, v18.8h
|
||||
sqadd v3.8h, v3.8h, v19.8h
|
||||
clip10 v0.8h, v1.8h, v20.8h, v21.8h
|
||||
clip10 v2.8h, v3.8h, v20.8h, v21.8h
|
||||
st1 {v0.8h-v1.8h}, [x0], x2
|
||||
st1 {v2.8h-v3.8h}, [x12], x2
|
||||
bne 1b
|
||||
bne 1b
|
||||
ret
|
||||
endfunc
|
||||
|
||||
function ff_hevc_add_residual_32x32_8_neon, export=1
|
||||
add x12, x0, x2
|
||||
add x2, x2, x2
|
||||
mov x3, #32
|
||||
1: subs x3, x3, #2
|
||||
add x2, x2, x2
|
||||
mov x3, #32
|
||||
1: subs x3, x3, #2
|
||||
ld1 {v20.16b, v21.16b}, [x0]
|
||||
uxtl v16.8h, v20.8b
|
||||
uxtl2 v17.8h, v20.16b
|
||||
@@ -187,43 +187,43 @@ function ff_hevc_add_residual_32x32_8_neon, export=1
|
||||
uxtl2 v21.8h, v22.16b
|
||||
uxtl v22.8h, v23.8b
|
||||
uxtl2 v23.8h, v23.16b
|
||||
sqadd v0.8h, v0.8h, v16.8h
|
||||
sqadd v1.8h, v1.8h, v17.8h
|
||||
sqadd v2.8h, v2.8h, v18.8h
|
||||
sqadd v3.8h, v3.8h, v19.8h
|
||||
sqadd v4.8h, v4.8h, v20.8h
|
||||
sqadd v5.8h, v5.8h, v21.8h
|
||||
sqadd v6.8h, v6.8h, v22.8h
|
||||
sqadd v7.8h, v7.8h, v23.8h
|
||||
sqxtun v0.8b, v0.8h
|
||||
sqadd v0.8h, v0.8h, v16.8h
|
||||
sqadd v1.8h, v1.8h, v17.8h
|
||||
sqadd v2.8h, v2.8h, v18.8h
|
||||
sqadd v3.8h, v3.8h, v19.8h
|
||||
sqadd v4.8h, v4.8h, v20.8h
|
||||
sqadd v5.8h, v5.8h, v21.8h
|
||||
sqadd v6.8h, v6.8h, v22.8h
|
||||
sqadd v7.8h, v7.8h, v23.8h
|
||||
sqxtun v0.8b, v0.8h
|
||||
sqxtun2 v0.16b, v1.8h
|
||||
sqxtun v1.8b, v2.8h
|
||||
sqxtun v1.8b, v2.8h
|
||||
sqxtun2 v1.16b, v3.8h
|
||||
sqxtun v2.8b, v4.8h
|
||||
sqxtun v2.8b, v4.8h
|
||||
sqxtun2 v2.16b, v5.8h
|
||||
st1 {v0.16b, v1.16b}, [x0], x2
|
||||
sqxtun v3.8b, v6.8h
|
||||
sqxtun v3.8b, v6.8h
|
||||
sqxtun2 v3.16b, v7.8h
|
||||
st1 {v2.16b, v3.16b}, [x12], x2
|
||||
bne 1b
|
||||
bne 1b
|
||||
ret
|
||||
endfunc
|
||||
|
||||
function ff_hevc_add_residual_32x32_10_neon, export=1
|
||||
mov x3, #32
|
||||
mov x3, #32
|
||||
movi v20.8h, #0
|
||||
mvni v21.8h, #0xFC, lsl #8 // movi #0x3FF
|
||||
1: subs x3, x3, #1
|
||||
1: subs x3, x3, #1
|
||||
ld1 {v0.8h-v3.8h}, [x1], #64
|
||||
ld1 {v16.8h-v19.8h}, [x0]
|
||||
sqadd v0.8h, v0.8h, v16.8h
|
||||
sqadd v1.8h, v1.8h, v17.8h
|
||||
sqadd v2.8h, v2.8h, v18.8h
|
||||
sqadd v3.8h, v3.8h, v19.8h
|
||||
clip10 v0.8h, v1.8h, v20.8h, v21.8h
|
||||
clip10 v2.8h, v3.8h, v20.8h, v21.8h
|
||||
sqadd v0.8h, v0.8h, v16.8h
|
||||
sqadd v1.8h, v1.8h, v17.8h
|
||||
sqadd v2.8h, v2.8h, v18.8h
|
||||
sqadd v3.8h, v3.8h, v19.8h
|
||||
clip10 v0.8h, v1.8h, v20.8h, v21.8h
|
||||
clip10 v2.8h, v3.8h, v20.8h, v21.8h
|
||||
st1 {v0.8h-v3.8h}, [x0], x2
|
||||
bne 1b
|
||||
bne 1b
|
||||
ret
|
||||
endfunc
|
||||
|
||||
@@ -246,19 +246,19 @@ endfunc
|
||||
|
||||
// uses and clobbers v28-v31 as temp registers
|
||||
.macro tr_4x4_8 in0, in1, in2, in3, out0, out1, out2, out3, p1, p2
|
||||
sshll\p1 v28.4s, \in0, #6
|
||||
mov v29.16b, v28.16b
|
||||
smull\p1 v30.4s, \in1, v0.h[1]
|
||||
smull\p1 v31.4s, \in1, v0.h[3]
|
||||
smlal\p2 v28.4s, \in2, v0.h[0] //e0
|
||||
smlsl\p2 v29.4s, \in2, v0.h[0] //e1
|
||||
smlal\p2 v30.4s, \in3, v0.h[3] //o0
|
||||
smlsl\p2 v31.4s, \in3, v0.h[1] //o1
|
||||
sshll\p1 v28.4s, \in0, #6
|
||||
mov v29.16b, v28.16b
|
||||
smull\p1 v30.4s, \in1, v0.h[1]
|
||||
smull\p1 v31.4s, \in1, v0.h[3]
|
||||
smlal\p2 v28.4s, \in2, v0.h[0] //e0
|
||||
smlsl\p2 v29.4s, \in2, v0.h[0] //e1
|
||||
smlal\p2 v30.4s, \in3, v0.h[3] //o0
|
||||
smlsl\p2 v31.4s, \in3, v0.h[1] //o1
|
||||
|
||||
add \out0, v28.4s, v30.4s
|
||||
add \out1, v29.4s, v31.4s
|
||||
sub \out2, v29.4s, v31.4s
|
||||
sub \out3, v28.4s, v30.4s
|
||||
add \out0, v28.4s, v30.4s
|
||||
add \out1, v29.4s, v31.4s
|
||||
sub \out2, v29.4s, v31.4s
|
||||
sub \out3, v28.4s, v30.4s
|
||||
.endm
|
||||
|
||||
.macro transpose8_4x4 r0, r1, r2, r3
|
||||
@@ -325,11 +325,11 @@ endfunc
|
||||
.macro idct_8x8 bitdepth
|
||||
function ff_hevc_idct_8x8_\bitdepth\()_neon, export=1
|
||||
//x0 - coeffs
|
||||
mov x1, x0
|
||||
mov x1, x0
|
||||
ld1 {v16.8h-v19.8h}, [x1], #64
|
||||
ld1 {v20.8h-v23.8h}, [x1]
|
||||
|
||||
movrel x1, trans
|
||||
movrel x1, trans
|
||||
ld1 {v0.8h}, [x1]
|
||||
|
||||
tr_8x4 7, v16,.4h, v17,.4h, v18,.4h, v19,.4h, v20,.4h, v21,.4h, v22,.4h, v23,.4h
|
||||
@@ -342,7 +342,7 @@ function ff_hevc_idct_8x8_\bitdepth\()_neon, export=1
|
||||
|
||||
transpose_8x8 v16, v17, v18, v19, v20, v21, v22, v23
|
||||
|
||||
mov x1, x0
|
||||
mov x1, x0
|
||||
st1 {v16.8h-v19.8h}, [x1], #64
|
||||
st1 {v20.8h-v23.8h}, [x1]
|
||||
|
||||
@@ -351,8 +351,8 @@ endfunc
|
||||
.endm
|
||||
|
||||
.macro butterfly e, o, tmp_p, tmp_m
|
||||
add \tmp_p, \e, \o
|
||||
sub \tmp_m, \e, \o
|
||||
add \tmp_p, \e, \o
|
||||
sub \tmp_m, \e, \o
|
||||
.endm
|
||||
|
||||
.macro tr16_8x4 in0, in1, in2, in3, offset
|
||||
@@ -381,7 +381,7 @@ endfunc
|
||||
butterfly v25.4s, v29.4s, v17.4s, v22.4s
|
||||
butterfly v26.4s, v30.4s, v18.4s, v21.4s
|
||||
butterfly v27.4s, v31.4s, v19.4s, v20.4s
|
||||
add x4, sp, #\offset
|
||||
add x4, sp, #\offset
|
||||
st1 {v16.4s-v19.4s}, [x4], #64
|
||||
st1 {v20.4s-v23.4s}, [x4]
|
||||
.endm
|
||||
@@ -398,14 +398,14 @@ endfunc
|
||||
.endm
|
||||
|
||||
.macro add_member in, t0, t1, t2, t3, t4, t5, t6, t7, op0, op1, op2, op3, op4, op5, op6, op7, p
|
||||
sum_sub v21.4s, \in, \t0, \op0, \p
|
||||
sum_sub v22.4s, \in, \t1, \op1, \p
|
||||
sum_sub v23.4s, \in, \t2, \op2, \p
|
||||
sum_sub v24.4s, \in, \t3, \op3, \p
|
||||
sum_sub v25.4s, \in, \t4, \op4, \p
|
||||
sum_sub v26.4s, \in, \t5, \op5, \p
|
||||
sum_sub v27.4s, \in, \t6, \op6, \p
|
||||
sum_sub v28.4s, \in, \t7, \op7, \p
|
||||
sum_sub v21.4s, \in, \t0, \op0, \p
|
||||
sum_sub v22.4s, \in, \t1, \op1, \p
|
||||
sum_sub v23.4s, \in, \t2, \op2, \p
|
||||
sum_sub v24.4s, \in, \t3, \op3, \p
|
||||
sum_sub v25.4s, \in, \t4, \op4, \p
|
||||
sum_sub v26.4s, \in, \t5, \op5, \p
|
||||
sum_sub v27.4s, \in, \t6, \op6, \p
|
||||
sum_sub v28.4s, \in, \t7, \op7, \p
|
||||
.endm
|
||||
|
||||
.macro butterfly16 in0, in1, in2, in3, in4, in5, in6, in7
|
||||
@@ -473,20 +473,20 @@ endfunc
|
||||
|
||||
.macro tr_16x4 name, shift, offset, step
|
||||
function func_tr_16x4_\name
|
||||
mov x1, x5
|
||||
add x3, x5, #(\step * 64)
|
||||
mov x2, #(\step * 128)
|
||||
mov x1, x5
|
||||
add x3, x5, #(\step * 64)
|
||||
mov x2, #(\step * 128)
|
||||
load16 v16.d, v17.d, v18.d, v19.d
|
||||
movrel x1, trans
|
||||
movrel x1, trans
|
||||
ld1 {v0.8h}, [x1]
|
||||
|
||||
tr16_8x4 v16, v17, v18, v19, \offset
|
||||
|
||||
add x1, x5, #(\step * 32)
|
||||
add x3, x5, #(\step * 3 *32)
|
||||
mov x2, #(\step * 128)
|
||||
add x1, x5, #(\step * 32)
|
||||
add x3, x5, #(\step * 3 *32)
|
||||
mov x2, #(\step * 128)
|
||||
load16 v20.d, v17.d, v18.d, v19.d
|
||||
movrel x1, trans, 16
|
||||
movrel x1, trans, 16
|
||||
ld1 {v1.8h}, [x1]
|
||||
smull v21.4s, v20.4h, v1.h[0]
|
||||
smull v22.4s, v20.4h, v1.h[1]
|
||||
@@ -505,16 +505,16 @@ function func_tr_16x4_\name
|
||||
add_member v19.4h, v1.h[6], v1.h[3], v1.h[0], v1.h[2], v1.h[5], v1.h[7], v1.h[4], v1.h[1], +, -, +, -, +, +, -, +
|
||||
add_member v19.8h, v1.h[7], v1.h[6], v1.h[5], v1.h[4], v1.h[3], v1.h[2], v1.h[1], v1.h[0], +, -, +, -, +, -, +, -, 2
|
||||
|
||||
add x4, sp, #\offset
|
||||
add x4, sp, #\offset
|
||||
ld1 {v16.4s-v19.4s}, [x4], #64
|
||||
|
||||
butterfly16 v16.4s, v21.4s, v17.4s, v22.4s, v18.4s, v23.4s, v19.4s, v24.4s
|
||||
scale v29, v30, v31, v24, v20.4s, v16.4s, v21.4s, v17.4s, v22.4s, v18.4s, v23.4s, v19.4s, \shift
|
||||
transpose16_4x4_2 v29, v30, v31, v24
|
||||
mov x1, x6
|
||||
add x3, x6, #(24 +3*32)
|
||||
mov x2, #32
|
||||
mov x4, #-32
|
||||
mov x1, x6
|
||||
add x3, x6, #(24 +3*32)
|
||||
mov x2, #32
|
||||
mov x4, #-32
|
||||
store16 v29.d, v30.d, v31.d, v24.d, x4
|
||||
|
||||
add x4, sp, #(\offset + 64)
|
||||
@@ -523,10 +523,10 @@ function func_tr_16x4_\name
|
||||
scale v29, v30, v31, v20, v20.4s, v16.4s, v25.4s, v17.4s, v26.4s, v18.4s, v27.4s, v19.4s, \shift
|
||||
transpose16_4x4_2 v29, v30, v31, v20
|
||||
|
||||
add x1, x6, #8
|
||||
add x3, x6, #(16 + 3 * 32)
|
||||
mov x2, #32
|
||||
mov x4, #-32
|
||||
add x1, x6, #8
|
||||
add x3, x6, #(16 + 3 * 32)
|
||||
mov x2, #32
|
||||
mov x4, #-32
|
||||
store16 v29.d, v30.d, v31.d, v20.d, x4
|
||||
|
||||
ret
|
||||
@@ -539,21 +539,21 @@ function ff_hevc_idct_16x16_\bitdepth\()_neon, export=1
|
||||
mov x15, x30
|
||||
|
||||
// allocate a temp buffer
|
||||
sub sp, sp, #640
|
||||
sub sp, sp, #640
|
||||
|
||||
.irp i, 0, 1, 2, 3
|
||||
add x5, x0, #(8 * \i)
|
||||
add x6, sp, #(8 * \i * 16)
|
||||
add x5, x0, #(8 * \i)
|
||||
add x6, sp, #(8 * \i * 16)
|
||||
bl func_tr_16x4_firstpass
|
||||
.endr
|
||||
|
||||
.irp i, 0, 1, 2, 3
|
||||
add x5, sp, #(8 * \i)
|
||||
add x6, x0, #(8 * \i * 16)
|
||||
add x5, sp, #(8 * \i)
|
||||
add x6, x0, #(8 * \i * 16)
|
||||
bl func_tr_16x4_secondpass_\bitdepth
|
||||
.endr
|
||||
|
||||
add sp, sp, #640
|
||||
add sp, sp, #640
|
||||
|
||||
mov x30, x15
|
||||
ret
|
||||
@@ -573,35 +573,36 @@ idct_16x16 10
|
||||
// void ff_hevc_idct_NxN_dc_DEPTH_neon(int16_t *coeffs)
|
||||
.macro idct_dc size, bitdepth
|
||||
function ff_hevc_idct_\size\()x\size\()_dc_\bitdepth\()_neon, export=1
|
||||
ld1r {v4.8h}, [x0]
|
||||
srshr v4.8h, v4.8h, #1
|
||||
srshr v0.8h, v4.8h, #(14 - \bitdepth)
|
||||
srshr v1.8h, v4.8h, #(14 - \bitdepth)
|
||||
movi v1.8h, #((1 << (14 - \bitdepth))+1)
|
||||
ld1r {v4.8h}, [x0]
|
||||
add v4.8h, v4.8h, v1.8h
|
||||
sshr v0.8h, v4.8h, #(15 - \bitdepth)
|
||||
sshr v1.8h, v4.8h, #(15 - \bitdepth)
|
||||
.if \size > 4
|
||||
srshr v2.8h, v4.8h, #(14 - \bitdepth)
|
||||
srshr v3.8h, v4.8h, #(14 - \bitdepth)
|
||||
sshr v2.8h, v4.8h, #(15 - \bitdepth)
|
||||
sshr v3.8h, v4.8h, #(15 - \bitdepth)
|
||||
.if \size > 16 /* dc 32x32 */
|
||||
mov x2, #4
|
||||
mov x2, #4
|
||||
1:
|
||||
subs x2, x2, #1
|
||||
subs x2, x2, #1
|
||||
.endif
|
||||
add x12, x0, #64
|
||||
mov x13, #128
|
||||
.if \size > 8 /* dc 16x16 */
|
||||
st1 {v0.8h-v3.8h}, [x0], x13
|
||||
st1 {v0.8h-v3.8h}, [x12], x13
|
||||
st1 {v0.8h-v3.8h}, [x0], x13
|
||||
st1 {v0.8h-v3.8h}, [x12], x13
|
||||
st1 {v0.8h-v3.8h}, [x0], x13
|
||||
st1 {v0.8h-v3.8h}, [x12], x13
|
||||
st1 {v0.8h-v3.8h}, [x0], x13
|
||||
st1 {v0.8h-v3.8h}, [x12], x13
|
||||
st1 {v0.8h-v3.8h}, [x0], x13
|
||||
st1 {v0.8h-v3.8h}, [x12], x13
|
||||
st1 {v0.8h-v3.8h}, [x0], x13
|
||||
st1 {v0.8h-v3.8h}, [x12], x13
|
||||
.endif /* dc 8x8 */
|
||||
st1 {v0.8h-v3.8h}, [x0], x13
|
||||
st1 {v0.8h-v3.8h}, [x12], x13
|
||||
st1 {v0.8h-v3.8h}, [x0], x13
|
||||
st1 {v0.8h-v3.8h}, [x12], x13
|
||||
.if \size > 16 /* dc 32x32 */
|
||||
bne 1b
|
||||
.endif
|
||||
.else /* dc 4x4 */
|
||||
st1 {v0.8h-v1.8h}, [x0]
|
||||
st1 {v0.8h-v1.8h}, [x0]
|
||||
.endif
|
||||
ret
|
||||
endfunc
|
||||
|
||||
@@ -30,20 +30,20 @@
|
||||
// int width, int height)
|
||||
function ff_hevc_sao_band_filter_8x8_8_neon, export=1
|
||||
sub sp, sp, #64
|
||||
stp xzr, xzr, [sp]
|
||||
stp xzr, xzr, [sp, #16]
|
||||
stp xzr, xzr, [sp, #32]
|
||||
stp xzr, xzr, [sp, #48]
|
||||
stp xzr, xzr, [sp]
|
||||
stp xzr, xzr, [sp, #16]
|
||||
stp xzr, xzr, [sp, #32]
|
||||
stp xzr, xzr, [sp, #48]
|
||||
mov w8, #4
|
||||
0:
|
||||
ldrsh x9, [x4, x8, lsl #1] // x9 = sao_offset_val[k+1]
|
||||
subs w8, w8, #1
|
||||
add w10, w8, w5 // x10 = k + sao_left_class
|
||||
and w10, w10, #0x1F
|
||||
add w10, w8, w5 // x10 = k + sao_left_class
|
||||
and w10, w10, #0x1F
|
||||
strh w9, [sp, x10, lsl #1]
|
||||
bne 0b
|
||||
ld1 {v16.16b-v19.16b}, [sp], #64
|
||||
movi v20.8h, #1
|
||||
ld1 {v16.16b-v19.16b}, [sp], #64
|
||||
movi v20.8h, #1
|
||||
1: // beginning of line
|
||||
mov w8, w6
|
||||
2:
|
||||
@@ -56,7 +56,7 @@ function ff_hevc_sao_band_filter_8x8_8_neon, export=1
|
||||
// +----------------------------------->
|
||||
// i-0 i-1 i-2 i-3
|
||||
// dst[x] = av_clip_pixel(src[x] + offset_table[src[x] >> shift]);
|
||||
ld1 {v2.8b}, [x1]
|
||||
ld1 {v2.8b}, [x1]
|
||||
// load src[x]
|
||||
uxtl v0.8h, v2.8b
|
||||
// >> shift
|
||||
@@ -68,13 +68,13 @@ function ff_hevc_sao_band_filter_8x8_8_neon, export=1
|
||||
// shift insert index to upper byte
|
||||
sli v1.8h, v3.8h, #8
|
||||
// table
|
||||
tbx v2.16b, {v16.16b-v19.16b}, v1.16b
|
||||
tbx v2.16b, {v16.16b-v19.16b}, v1.16b
|
||||
// src[x] + table
|
||||
add v1.8h, v0.8h, v2.8h
|
||||
// clip + narrow
|
||||
sqxtun v4.8b, v1.8h
|
||||
// store
|
||||
st1 {v4.8b}, [x0]
|
||||
st1 {v4.8b}, [x0]
|
||||
// done 8 pixels
|
||||
subs w8, w8, #8
|
||||
bne 2b
|
||||
|
||||
+181
-183
@@ -26,297 +26,295 @@
|
||||
.if \avg
|
||||
mov x12, x0
|
||||
.endif
|
||||
1: ld1 {v0.16b}, [x1], x2
|
||||
ld1 {v1.16b}, [x1], x2
|
||||
ld1 {v2.16b}, [x1], x2
|
||||
ld1 {v3.16b}, [x1], x2
|
||||
1: ld1 {v0.16B}, [x1], x2
|
||||
ld1 {v1.16B}, [x1], x2
|
||||
ld1 {v2.16B}, [x1], x2
|
||||
ld1 {v3.16B}, [x1], x2
|
||||
.if \avg
|
||||
ld1 {v4.16b}, [x12], x2
|
||||
urhadd v0.16b, v0.16b, v4.16b
|
||||
ld1 {v5.16b}, [x12], x2
|
||||
urhadd v1.16b, v1.16b, v5.16b
|
||||
ld1 {v6.16b}, [x12], x2
|
||||
urhadd v2.16b, v2.16b, v6.16b
|
||||
ld1 {v7.16b}, [x12], x2
|
||||
urhadd v3.16b, v3.16b, v7.16b
|
||||
ld1 {v4.16B}, [x12], x2
|
||||
urhadd v0.16B, v0.16B, v4.16B
|
||||
ld1 {v5.16B}, [x12], x2
|
||||
urhadd v1.16B, v1.16B, v5.16B
|
||||
ld1 {v6.16B}, [x12], x2
|
||||
urhadd v2.16B, v2.16B, v6.16B
|
||||
ld1 {v7.16B}, [x12], x2
|
||||
urhadd v3.16B, v3.16B, v7.16B
|
||||
.endif
|
||||
subs w3, w3, #4
|
||||
st1 {v0.16b}, [x0], x2
|
||||
st1 {v1.16b}, [x0], x2
|
||||
st1 {v2.16b}, [x0], x2
|
||||
st1 {v3.16b}, [x0], x2
|
||||
st1 {v0.16B}, [x0], x2
|
||||
st1 {v1.16B}, [x0], x2
|
||||
st1 {v2.16B}, [x0], x2
|
||||
st1 {v3.16B}, [x0], x2
|
||||
b.ne 1b
|
||||
ret
|
||||
.endm
|
||||
|
||||
.macro pixels16_x2 rnd=1, avg=0
|
||||
1:
|
||||
ldur q1, [x1, #1]
|
||||
ld1 {v0.16b}, [x1], x2
|
||||
1: ld1 {v0.16B, v1.16B}, [x1], x2
|
||||
ld1 {v2.16B, v3.16B}, [x1], x2
|
||||
subs w3, w3, #2
|
||||
ldur q3, [x1, #1]
|
||||
ld1 {v2.16b}, [x1], x2
|
||||
avg v0.16b, v0.16b, v1.16b
|
||||
avg v2.16b, v2.16b, v3.16b
|
||||
ext v1.16B, v0.16B, v1.16B, #1
|
||||
avg v0.16B, v0.16B, v1.16B
|
||||
ext v3.16B, v2.16B, v3.16B, #1
|
||||
avg v2.16B, v2.16B, v3.16B
|
||||
.if \avg
|
||||
ld1 {v1.16b}, [x0], x2
|
||||
ld1 {v3.16b}, [x0]
|
||||
urhadd v0.16b, v0.16b, v1.16b
|
||||
urhadd v2.16b, v2.16b, v3.16b
|
||||
ld1 {v1.16B}, [x0], x2
|
||||
ld1 {v3.16B}, [x0]
|
||||
urhadd v0.16B, v0.16B, v1.16B
|
||||
urhadd v2.16B, v2.16B, v3.16B
|
||||
sub x0, x0, x2
|
||||
.endif
|
||||
st1 {v0.16b}, [x0], x2
|
||||
st1 {v2.16b}, [x0], x2
|
||||
st1 {v0.16B}, [x0], x2
|
||||
st1 {v2.16B}, [x0], x2
|
||||
b.ne 1b
|
||||
ret
|
||||
.endm
|
||||
|
||||
.macro pixels16_y2 rnd=1, avg=0
|
||||
sub w3, w3, #2
|
||||
ld1 {v0.16b}, [x1], x2
|
||||
ld1 {v1.16b}, [x1], x2
|
||||
ld1 {v0.16B}, [x1], x2
|
||||
ld1 {v1.16B}, [x1], x2
|
||||
1: subs w3, w3, #2
|
||||
avg v2.16b, v0.16b, v1.16b
|
||||
ld1 {v0.16b}, [x1], x2
|
||||
avg v3.16b, v0.16b, v1.16b
|
||||
ld1 {v1.16b}, [x1], x2
|
||||
avg v2.16B, v0.16B, v1.16B
|
||||
ld1 {v0.16B}, [x1], x2
|
||||
avg v3.16B, v0.16B, v1.16B
|
||||
ld1 {v1.16B}, [x1], x2
|
||||
.if \avg
|
||||
ld1 {v4.16b}, [x0], x2
|
||||
ld1 {v5.16b}, [x0]
|
||||
urhadd v2.16b, v2.16b, v4.16b
|
||||
urhadd v3.16b, v3.16b, v5.16b
|
||||
ld1 {v4.16B}, [x0], x2
|
||||
ld1 {v5.16B}, [x0]
|
||||
urhadd v2.16B, v2.16B, v4.16B
|
||||
urhadd v3.16B, v3.16B, v5.16B
|
||||
sub x0, x0, x2
|
||||
.endif
|
||||
st1 {v2.16b}, [x0], x2
|
||||
st1 {v3.16b}, [x0], x2
|
||||
st1 {v2.16B}, [x0], x2
|
||||
st1 {v3.16B}, [x0], x2
|
||||
b.ne 1b
|
||||
|
||||
avg v2.16b, v0.16b, v1.16b
|
||||
ld1 {v0.16b}, [x1], x2
|
||||
avg v3.16b, v0.16b, v1.16b
|
||||
avg v2.16B, v0.16B, v1.16B
|
||||
ld1 {v0.16B}, [x1], x2
|
||||
avg v3.16B, v0.16B, v1.16B
|
||||
.if \avg
|
||||
ld1 {v4.16b}, [x0], x2
|
||||
ld1 {v5.16b}, [x0]
|
||||
urhadd v2.16b, v2.16b, v4.16b
|
||||
urhadd v3.16b, v3.16b, v5.16b
|
||||
ld1 {v4.16B}, [x0], x2
|
||||
ld1 {v5.16B}, [x0]
|
||||
urhadd v2.16B, v2.16B, v4.16B
|
||||
urhadd v3.16B, v3.16B, v5.16B
|
||||
sub x0, x0, x2
|
||||
.endif
|
||||
st1 {v2.16b}, [x0], x2
|
||||
st1 {v3.16b}, [x0], x2
|
||||
st1 {v2.16B}, [x0], x2
|
||||
st1 {v3.16B}, [x0], x2
|
||||
|
||||
ret
|
||||
.endm
|
||||
|
||||
.macro pixels16_xy2 rnd=1, avg=0
|
||||
sub w3, w3, #2
|
||||
ldur q1, [x1, #1]
|
||||
ld1 {v0.16b}, [x1], x2
|
||||
ld1 {v0.16B, v1.16B}, [x1], x2
|
||||
ld1 {v4.16B, v5.16B}, [x1], x2
|
||||
NRND movi v26.8H, #1
|
||||
ldur q5, [x1, #1]
|
||||
ld1 {v4.16b}, [x1], x2
|
||||
uaddl v16.8h, v0.8b, v1.8b
|
||||
uaddl2 v20.8h, v0.16b, v1.16b
|
||||
uaddl v18.8h, v4.8b, v5.8b
|
||||
uaddl2 v22.8h, v4.16b, v5.16b
|
||||
ext v1.16B, v0.16B, v1.16B, #1
|
||||
ext v5.16B, v4.16B, v5.16B, #1
|
||||
uaddl v16.8H, v0.8B, v1.8B
|
||||
uaddl2 v20.8H, v0.16B, v1.16B
|
||||
uaddl v18.8H, v4.8B, v5.8B
|
||||
uaddl2 v22.8H, v4.16B, v5.16B
|
||||
1: subs w3, w3, #2
|
||||
ldur q30, [x1, #1]
|
||||
ld1 {v0.16b}, [x1], x2
|
||||
add v24.8h, v16.8h, v18.8h
|
||||
ld1 {v0.16B, v1.16B}, [x1], x2
|
||||
add v24.8H, v16.8H, v18.8H
|
||||
NRND add v24.8H, v24.8H, v26.8H
|
||||
add v1.8h, v20.8h, v22.8h
|
||||
mshrn v28.8b, v24.8h, #2
|
||||
ext v30.16B, v0.16B, v1.16B, #1
|
||||
add v1.8H, v20.8H, v22.8H
|
||||
mshrn v28.8B, v24.8H, #2
|
||||
NRND add v1.8H, v1.8H, v26.8H
|
||||
mshrn2 v28.16b, v1.8h, #2
|
||||
mshrn2 v28.16B, v1.8H, #2
|
||||
.if \avg
|
||||
ld1 {v16.16b}, [x0]
|
||||
urhadd v28.16b, v28.16b, v16.16b
|
||||
ld1 {v16.16B}, [x0]
|
||||
urhadd v28.16B, v28.16B, v16.16B
|
||||
.endif
|
||||
uaddl v16.8h, v0.8b, v30.8b
|
||||
ldur q3, [x1, #1]
|
||||
ld1 {v2.16b}, [x1], x2
|
||||
uaddl2 v20.8h, v0.16b, v30.16b
|
||||
st1 {v28.16b}, [x0], x2
|
||||
add v24.8h, v16.8h, v18.8h
|
||||
uaddl v16.8H, v0.8B, v30.8B
|
||||
ld1 {v2.16B, v3.16B}, [x1], x2
|
||||
uaddl2 v20.8H, v0.16B, v30.16B
|
||||
st1 {v28.16B}, [x0], x2
|
||||
add v24.8H, v16.8H, v18.8H
|
||||
NRND add v24.8H, v24.8H, v26.8H
|
||||
add v0.8h, v20.8h, v22.8h
|
||||
mshrn v30.8b, v24.8h, #2
|
||||
ext v3.16B, v2.16B, v3.16B, #1
|
||||
add v0.8H, v20.8H, v22.8H
|
||||
mshrn v30.8B, v24.8H, #2
|
||||
NRND add v0.8H, v0.8H, v26.8H
|
||||
mshrn2 v30.16b, v0.8h, #2
|
||||
mshrn2 v30.16B, v0.8H, #2
|
||||
.if \avg
|
||||
ld1 {v18.16b}, [x0]
|
||||
urhadd v30.16b, v30.16b, v18.16b
|
||||
ld1 {v18.16B}, [x0]
|
||||
urhadd v30.16B, v30.16B, v18.16B
|
||||
.endif
|
||||
uaddl v18.8h, v2.8b, v3.8b
|
||||
uaddl2 v22.8h, v2.16b, v3.16b
|
||||
st1 {v30.16b}, [x0], x2
|
||||
uaddl v18.8H, v2.8B, v3.8B
|
||||
uaddl2 v22.8H, v2.16B, v3.16B
|
||||
st1 {v30.16B}, [x0], x2
|
||||
b.gt 1b
|
||||
|
||||
ldur q30, [x1, #1]
|
||||
ld1 {v0.16b}, [x1], x2
|
||||
add v24.8h, v16.8h, v18.8h
|
||||
ld1 {v0.16B, v1.16B}, [x1], x2
|
||||
add v24.8H, v16.8H, v18.8H
|
||||
NRND add v24.8H, v24.8H, v26.8H
|
||||
add v1.8h, v20.8h, v22.8h
|
||||
mshrn v28.8b, v24.8h, #2
|
||||
ext v30.16B, v0.16B, v1.16B, #1
|
||||
add v1.8H, v20.8H, v22.8H
|
||||
mshrn v28.8B, v24.8H, #2
|
||||
NRND add v1.8H, v1.8H, v26.8H
|
||||
mshrn2 v28.16b, v1.8h, #2
|
||||
mshrn2 v28.16B, v1.8H, #2
|
||||
.if \avg
|
||||
ld1 {v16.16b}, [x0]
|
||||
urhadd v28.16b, v28.16b, v16.16b
|
||||
ld1 {v16.16B}, [x0]
|
||||
urhadd v28.16B, v28.16B, v16.16B
|
||||
.endif
|
||||
uaddl v16.8h, v0.8b, v30.8b
|
||||
uaddl2 v20.8h, v0.16b, v30.16b
|
||||
st1 {v28.16b}, [x0], x2
|
||||
add v24.8h, v16.8h, v18.8h
|
||||
uaddl v16.8H, v0.8B, v30.8B
|
||||
uaddl2 v20.8H, v0.16B, v30.16B
|
||||
st1 {v28.16B}, [x0], x2
|
||||
add v24.8H, v16.8H, v18.8H
|
||||
NRND add v24.8H, v24.8H, v26.8H
|
||||
add v0.8h, v20.8h, v22.8h
|
||||
mshrn v30.8b, v24.8h, #2
|
||||
add v0.8H, v20.8H, v22.8H
|
||||
mshrn v30.8B, v24.8H, #2
|
||||
NRND add v0.8H, v0.8H, v26.8H
|
||||
mshrn2 v30.16b, v0.8h, #2
|
||||
mshrn2 v30.16B, v0.8H, #2
|
||||
.if \avg
|
||||
ld1 {v18.16b}, [x0]
|
||||
urhadd v30.16b, v30.16b, v18.16b
|
||||
ld1 {v18.16B}, [x0]
|
||||
urhadd v30.16B, v30.16B, v18.16B
|
||||
.endif
|
||||
st1 {v30.16b}, [x0], x2
|
||||
st1 {v30.16B}, [x0], x2
|
||||
|
||||
ret
|
||||
.endm
|
||||
|
||||
.macro pixels8 rnd=1, avg=0
|
||||
1: ld1 {v0.8b}, [x1], x2
|
||||
ld1 {v1.8b}, [x1], x2
|
||||
ld1 {v2.8b}, [x1], x2
|
||||
ld1 {v3.8b}, [x1], x2
|
||||
1: ld1 {v0.8B}, [x1], x2
|
||||
ld1 {v1.8B}, [x1], x2
|
||||
ld1 {v2.8B}, [x1], x2
|
||||
ld1 {v3.8B}, [x1], x2
|
||||
.if \avg
|
||||
ld1 {v4.8b}, [x0], x2
|
||||
urhadd v0.8b, v0.8b, v4.8b
|
||||
ld1 {v5.8b}, [x0], x2
|
||||
urhadd v1.8b, v1.8b, v5.8b
|
||||
ld1 {v6.8b}, [x0], x2
|
||||
urhadd v2.8b, v2.8b, v6.8b
|
||||
ld1 {v7.8b}, [x0], x2
|
||||
urhadd v3.8b, v3.8b, v7.8b
|
||||
ld1 {v4.8B}, [x0], x2
|
||||
urhadd v0.8B, v0.8B, v4.8B
|
||||
ld1 {v5.8B}, [x0], x2
|
||||
urhadd v1.8B, v1.8B, v5.8B
|
||||
ld1 {v6.8B}, [x0], x2
|
||||
urhadd v2.8B, v2.8B, v6.8B
|
||||
ld1 {v7.8B}, [x0], x2
|
||||
urhadd v3.8B, v3.8B, v7.8B
|
||||
sub x0, x0, x2, lsl #2
|
||||
.endif
|
||||
subs w3, w3, #4
|
||||
st1 {v0.8b}, [x0], x2
|
||||
st1 {v1.8b}, [x0], x2
|
||||
st1 {v2.8b}, [x0], x2
|
||||
st1 {v3.8b}, [x0], x2
|
||||
st1 {v0.8B}, [x0], x2
|
||||
st1 {v1.8B}, [x0], x2
|
||||
st1 {v2.8B}, [x0], x2
|
||||
st1 {v3.8B}, [x0], x2
|
||||
b.ne 1b
|
||||
ret
|
||||
.endm
|
||||
|
||||
.macro pixels8_x2 rnd=1, avg=0
|
||||
1:
|
||||
ldur d1, [x1, #1]
|
||||
ld1 {v0.8b}, [x1], x2
|
||||
ldur d3, [x1, #1]
|
||||
ld1 {v2.8b}, [x1], x2
|
||||
1: ld1 {v0.8B, v1.8B}, [x1], x2
|
||||
ext v1.8B, v0.8B, v1.8B, #1
|
||||
ld1 {v2.8B, v3.8B}, [x1], x2
|
||||
ext v3.8B, v2.8B, v3.8B, #1
|
||||
subs w3, w3, #2
|
||||
avg v0.8b, v0.8b, v1.8b
|
||||
avg v2.8b, v2.8b, v3.8b
|
||||
avg v0.8B, v0.8B, v1.8B
|
||||
avg v2.8B, v2.8B, v3.8B
|
||||
.if \avg
|
||||
ld1 {v4.8b}, [x0], x2
|
||||
ld1 {v5.8b}, [x0]
|
||||
urhadd v0.8b, v0.8b, v4.8b
|
||||
urhadd v2.8b, v2.8b, v5.8b
|
||||
ld1 {v4.8B}, [x0], x2
|
||||
ld1 {v5.8B}, [x0]
|
||||
urhadd v0.8B, v0.8B, v4.8B
|
||||
urhadd v2.8B, v2.8B, v5.8B
|
||||
sub x0, x0, x2
|
||||
.endif
|
||||
st1 {v0.8b}, [x0], x2
|
||||
st1 {v2.8b}, [x0], x2
|
||||
st1 {v0.8B}, [x0], x2
|
||||
st1 {v2.8B}, [x0], x2
|
||||
b.ne 1b
|
||||
ret
|
||||
.endm
|
||||
|
||||
.macro pixels8_y2 rnd=1, avg=0
|
||||
sub w3, w3, #2
|
||||
ld1 {v0.8b}, [x1], x2
|
||||
ld1 {v1.8b}, [x1], x2
|
||||
ld1 {v0.8B}, [x1], x2
|
||||
ld1 {v1.8B}, [x1], x2
|
||||
1: subs w3, w3, #2
|
||||
avg v4.8b, v0.8b, v1.8b
|
||||
ld1 {v0.8b}, [x1], x2
|
||||
avg v5.8b, v0.8b, v1.8b
|
||||
ld1 {v1.8b}, [x1], x2
|
||||
avg v4.8B, v0.8B, v1.8B
|
||||
ld1 {v0.8B}, [x1], x2
|
||||
avg v5.8B, v0.8B, v1.8B
|
||||
ld1 {v1.8B}, [x1], x2
|
||||
.if \avg
|
||||
ld1 {v2.8b}, [x0], x2
|
||||
ld1 {v3.8b}, [x0]
|
||||
urhadd v4.8b, v4.8b, v2.8b
|
||||
urhadd v5.8b, v5.8b, v3.8b
|
||||
ld1 {v2.8B}, [x0], x2
|
||||
ld1 {v3.8B}, [x0]
|
||||
urhadd v4.8B, v4.8B, v2.8B
|
||||
urhadd v5.8B, v5.8B, v3.8B
|
||||
sub x0, x0, x2
|
||||
.endif
|
||||
st1 {v4.8b}, [x0], x2
|
||||
st1 {v5.8b}, [x0], x2
|
||||
st1 {v4.8B}, [x0], x2
|
||||
st1 {v5.8B}, [x0], x2
|
||||
b.ne 1b
|
||||
|
||||
avg v4.8b, v0.8b, v1.8b
|
||||
ld1 {v0.8b}, [x1], x2
|
||||
avg v5.8b, v0.8b, v1.8b
|
||||
avg v4.8B, v0.8B, v1.8B
|
||||
ld1 {v0.8B}, [x1], x2
|
||||
avg v5.8B, v0.8B, v1.8B
|
||||
.if \avg
|
||||
ld1 {v2.8b}, [x0], x2
|
||||
ld1 {v3.8b}, [x0]
|
||||
urhadd v4.8b, v4.8b, v2.8b
|
||||
urhadd v5.8b, v5.8b, v3.8b
|
||||
ld1 {v2.8B}, [x0], x2
|
||||
ld1 {v3.8B}, [x0]
|
||||
urhadd v4.8B, v4.8B, v2.8B
|
||||
urhadd v5.8B, v5.8B, v3.8B
|
||||
sub x0, x0, x2
|
||||
.endif
|
||||
st1 {v4.8b}, [x0], x2
|
||||
st1 {v5.8b}, [x0], x2
|
||||
st1 {v4.8B}, [x0], x2
|
||||
st1 {v5.8B}, [x0], x2
|
||||
|
||||
ret
|
||||
.endm
|
||||
|
||||
.macro pixels8_xy2 rnd=1, avg=0
|
||||
ldur d4, [x1, #1]
|
||||
sub w3, w3, #2
|
||||
ld1 {v0.8b}, [x1], x2
|
||||
ld1 {v0.16B}, [x1], x2
|
||||
ld1 {v1.16B}, [x1], x2
|
||||
NRND movi v19.8H, #1
|
||||
ldur d6, [x1, #1]
|
||||
ld1 {v1.8b}, [x1], x2
|
||||
uaddl v16.8h, v0.8b, v4.8b
|
||||
uaddl v17.8h, v1.8b, v6.8b
|
||||
ext v4.16B, v0.16B, v4.16B, #1
|
||||
ext v6.16B, v1.16B, v6.16B, #1
|
||||
uaddl v16.8H, v0.8B, v4.8B
|
||||
uaddl v17.8H, v1.8B, v6.8B
|
||||
1: subs w3, w3, #2
|
||||
ldur d4, [x1, #1]
|
||||
ld1 {v0.8b}, [x1], x2
|
||||
add v18.8h, v16.8h, v17.8h
|
||||
ld1 {v0.16B}, [x1], x2
|
||||
add v18.8H, v16.8H, v17.8H
|
||||
ext v4.16B, v0.16B, v4.16B, #1
|
||||
NRND add v18.8H, v18.8H, v19.8H
|
||||
uaddl v16.8h, v0.8b, v4.8b
|
||||
mshrn v5.8b, v18.8h, #2
|
||||
ldur d6, [x1, #1]
|
||||
ld1 {v1.8b}, [x1], x2
|
||||
add v18.8h, v16.8h, v17.8h
|
||||
uaddl v16.8H, v0.8B, v4.8B
|
||||
mshrn v5.8B, v18.8H, #2
|
||||
ld1 {v1.16B}, [x1], x2
|
||||
add v18.8H, v16.8H, v17.8H
|
||||
.if \avg
|
||||
ld1 {v7.8b}, [x0]
|
||||
urhadd v5.8b, v5.8b, v7.8b
|
||||
ld1 {v7.8B}, [x0]
|
||||
urhadd v5.8B, v5.8B, v7.8B
|
||||
.endif
|
||||
NRND add v18.8H, v18.8H, v19.8H
|
||||
st1 {v5.8b}, [x0], x2
|
||||
mshrn v7.8b, v18.8h, #2
|
||||
st1 {v5.8B}, [x0], x2
|
||||
mshrn v7.8B, v18.8H, #2
|
||||
.if \avg
|
||||
ld1 {v5.8b}, [x0]
|
||||
urhadd v7.8b, v7.8b, v5.8b
|
||||
ld1 {v5.8B}, [x0]
|
||||
urhadd v7.8B, v7.8B, v5.8B
|
||||
.endif
|
||||
uaddl v17.8h, v1.8b, v6.8b
|
||||
st1 {v7.8b}, [x0], x2
|
||||
ext v6.16B, v1.16B, v6.16B, #1
|
||||
uaddl v17.8H, v1.8B, v6.8B
|
||||
st1 {v7.8B}, [x0], x2
|
||||
b.gt 1b
|
||||
|
||||
ldur d4, [x1, #1]
|
||||
ld1 {v0.8b}, [x1], x2
|
||||
add v18.8h, v16.8h, v17.8h
|
||||
ld1 {v0.16B}, [x1], x2
|
||||
add v18.8H, v16.8H, v17.8H
|
||||
ext v4.16B, v0.16B, v4.16B, #1
|
||||
NRND add v18.8H, v18.8H, v19.8H
|
||||
uaddl v16.8h, v0.8b, v4.8b
|
||||
mshrn v5.8b, v18.8h, #2
|
||||
add v18.8h, v16.8h, v17.8h
|
||||
uaddl v16.8H, v0.8B, v4.8B
|
||||
mshrn v5.8B, v18.8H, #2
|
||||
add v18.8H, v16.8H, v17.8H
|
||||
.if \avg
|
||||
ld1 {v7.8b}, [x0]
|
||||
urhadd v5.8b, v5.8b, v7.8b
|
||||
ld1 {v7.8B}, [x0]
|
||||
urhadd v5.8B, v5.8B, v7.8B
|
||||
.endif
|
||||
NRND add v18.8H, v18.8H, v19.8H
|
||||
st1 {v5.8b}, [x0], x2
|
||||
mshrn v7.8b, v18.8h, #2
|
||||
st1 {v5.8B}, [x0], x2
|
||||
mshrn v7.8B, v18.8H, #2
|
||||
.if \avg
|
||||
ld1 {v5.8b}, [x0]
|
||||
urhadd v7.8b, v7.8b, v5.8b
|
||||
ld1 {v5.8B}, [x0]
|
||||
urhadd v7.8B, v7.8B, v5.8B
|
||||
.endif
|
||||
st1 {v7.8b}, [x0], x2
|
||||
st1 {v7.8B}, [x0], x2
|
||||
|
||||
ret
|
||||
.endm
|
||||
|
||||
@@ -19,7 +19,6 @@
|
||||
#ifndef AVCODEC_AARCH64_IDCT_H
|
||||
#define AVCODEC_AARCH64_IDCT_H
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
|
||||
void ff_simple_idct_neon(int16_t *data);
|
||||
|
||||
+96
-96
@@ -17,133 +17,133 @@
|
||||
*/
|
||||
|
||||
.macro transpose_8x8B r0, r1, r2, r3, r4, r5, r6, r7, r8, r9
|
||||
trn1 \r8\().8b, \r0\().8b, \r1\().8b
|
||||
trn2 \r9\().8b, \r0\().8b, \r1\().8b
|
||||
trn1 \r1\().8b, \r2\().8b, \r3\().8b
|
||||
trn2 \r3\().8b, \r2\().8b, \r3\().8b
|
||||
trn1 \r0\().8b, \r4\().8b, \r5\().8b
|
||||
trn2 \r5\().8b, \r4\().8b, \r5\().8b
|
||||
trn1 \r2\().8b, \r6\().8b, \r7\().8b
|
||||
trn2 \r7\().8b, \r6\().8b, \r7\().8b
|
||||
trn1 \r8\().8B, \r0\().8B, \r1\().8B
|
||||
trn2 \r9\().8B, \r0\().8B, \r1\().8B
|
||||
trn1 \r1\().8B, \r2\().8B, \r3\().8B
|
||||
trn2 \r3\().8B, \r2\().8B, \r3\().8B
|
||||
trn1 \r0\().8B, \r4\().8B, \r5\().8B
|
||||
trn2 \r5\().8B, \r4\().8B, \r5\().8B
|
||||
trn1 \r2\().8B, \r6\().8B, \r7\().8B
|
||||
trn2 \r7\().8B, \r6\().8B, \r7\().8B
|
||||
|
||||
trn1 \r4\().4h, \r0\().4h, \r2\().4h
|
||||
trn2 \r2\().4h, \r0\().4h, \r2\().4h
|
||||
trn1 \r6\().4h, \r5\().4h, \r7\().4h
|
||||
trn2 \r7\().4h, \r5\().4h, \r7\().4h
|
||||
trn1 \r5\().4h, \r9\().4h, \r3\().4h
|
||||
trn2 \r9\().4h, \r9\().4h, \r3\().4h
|
||||
trn1 \r3\().4h, \r8\().4h, \r1\().4h
|
||||
trn2 \r8\().4h, \r8\().4h, \r1\().4h
|
||||
trn1 \r4\().4H, \r0\().4H, \r2\().4H
|
||||
trn2 \r2\().4H, \r0\().4H, \r2\().4H
|
||||
trn1 \r6\().4H, \r5\().4H, \r7\().4H
|
||||
trn2 \r7\().4H, \r5\().4H, \r7\().4H
|
||||
trn1 \r5\().4H, \r9\().4H, \r3\().4H
|
||||
trn2 \r9\().4H, \r9\().4H, \r3\().4H
|
||||
trn1 \r3\().4H, \r8\().4H, \r1\().4H
|
||||
trn2 \r8\().4H, \r8\().4H, \r1\().4H
|
||||
|
||||
trn1 \r0\().2s, \r3\().2s, \r4\().2s
|
||||
trn2 \r4\().2s, \r3\().2s, \r4\().2s
|
||||
trn1 \r0\().2S, \r3\().2S, \r4\().2S
|
||||
trn2 \r4\().2S, \r3\().2S, \r4\().2S
|
||||
|
||||
trn1 \r1\().2s, \r5\().2s, \r6\().2s
|
||||
trn2 \r5\().2s, \r5\().2s, \r6\().2s
|
||||
trn1 \r1\().2S, \r5\().2S, \r6\().2S
|
||||
trn2 \r5\().2S, \r5\().2S, \r6\().2S
|
||||
|
||||
trn2 \r6\().2s, \r8\().2s, \r2\().2s
|
||||
trn1 \r2\().2s, \r8\().2s, \r2\().2s
|
||||
trn2 \r6\().2S, \r8\().2S, \r2\().2S
|
||||
trn1 \r2\().2S, \r8\().2S, \r2\().2S
|
||||
|
||||
trn1 \r3\().2s, \r9\().2s, \r7\().2s
|
||||
trn2 \r7\().2s, \r9\().2s, \r7\().2s
|
||||
trn1 \r3\().2S, \r9\().2S, \r7\().2S
|
||||
trn2 \r7\().2S, \r9\().2S, \r7\().2S
|
||||
.endm
|
||||
|
||||
.macro transpose_8x16B r0, r1, r2, r3, r4, r5, r6, r7, t0, t1
|
||||
trn1 \t0\().16b, \r0\().16b, \r1\().16b
|
||||
trn2 \t1\().16b, \r0\().16b, \r1\().16b
|
||||
trn1 \r1\().16b, \r2\().16b, \r3\().16b
|
||||
trn2 \r3\().16b, \r2\().16b, \r3\().16b
|
||||
trn1 \r0\().16b, \r4\().16b, \r5\().16b
|
||||
trn2 \r5\().16b, \r4\().16b, \r5\().16b
|
||||
trn1 \r2\().16b, \r6\().16b, \r7\().16b
|
||||
trn2 \r7\().16b, \r6\().16b, \r7\().16b
|
||||
trn1 \t0\().16B, \r0\().16B, \r1\().16B
|
||||
trn2 \t1\().16B, \r0\().16B, \r1\().16B
|
||||
trn1 \r1\().16B, \r2\().16B, \r3\().16B
|
||||
trn2 \r3\().16B, \r2\().16B, \r3\().16B
|
||||
trn1 \r0\().16B, \r4\().16B, \r5\().16B
|
||||
trn2 \r5\().16B, \r4\().16B, \r5\().16B
|
||||
trn1 \r2\().16B, \r6\().16B, \r7\().16B
|
||||
trn2 \r7\().16B, \r6\().16B, \r7\().16B
|
||||
|
||||
trn1 \r4\().8h, \r0\().8h, \r2\().8h
|
||||
trn2 \r2\().8h, \r0\().8h, \r2\().8h
|
||||
trn1 \r6\().8h, \r5\().8h, \r7\().8h
|
||||
trn2 \r7\().8h, \r5\().8h, \r7\().8h
|
||||
trn1 \r5\().8h, \t1\().8h, \r3\().8h
|
||||
trn2 \t1\().8h, \t1\().8h, \r3\().8h
|
||||
trn1 \r3\().8h, \t0\().8h, \r1\().8h
|
||||
trn2 \t0\().8h, \t0\().8h, \r1\().8h
|
||||
trn1 \r4\().8H, \r0\().8H, \r2\().8H
|
||||
trn2 \r2\().8H, \r0\().8H, \r2\().8H
|
||||
trn1 \r6\().8H, \r5\().8H, \r7\().8H
|
||||
trn2 \r7\().8H, \r5\().8H, \r7\().8H
|
||||
trn1 \r5\().8H, \t1\().8H, \r3\().8H
|
||||
trn2 \t1\().8H, \t1\().8H, \r3\().8H
|
||||
trn1 \r3\().8H, \t0\().8H, \r1\().8H
|
||||
trn2 \t0\().8H, \t0\().8H, \r1\().8H
|
||||
|
||||
trn1 \r0\().4s, \r3\().4s, \r4\().4s
|
||||
trn2 \r4\().4s, \r3\().4s, \r4\().4s
|
||||
trn1 \r0\().4S, \r3\().4S, \r4\().4S
|
||||
trn2 \r4\().4S, \r3\().4S, \r4\().4S
|
||||
|
||||
trn1 \r1\().4s, \r5\().4s, \r6\().4s
|
||||
trn2 \r5\().4s, \r5\().4s, \r6\().4s
|
||||
trn1 \r1\().4S, \r5\().4S, \r6\().4S
|
||||
trn2 \r5\().4S, \r5\().4S, \r6\().4S
|
||||
|
||||
trn2 \r6\().4s, \t0\().4s, \r2\().4s
|
||||
trn1 \r2\().4s, \t0\().4s, \r2\().4s
|
||||
trn2 \r6\().4S, \t0\().4S, \r2\().4S
|
||||
trn1 \r2\().4S, \t0\().4S, \r2\().4S
|
||||
|
||||
trn1 \r3\().4s, \t1\().4s, \r7\().4s
|
||||
trn2 \r7\().4s, \t1\().4s, \r7\().4s
|
||||
trn1 \r3\().4S, \t1\().4S, \r7\().4S
|
||||
trn2 \r7\().4S, \t1\().4S, \r7\().4S
|
||||
.endm
|
||||
|
||||
.macro transpose_4x16B r0, r1, r2, r3, t4, t5, t6, t7
|
||||
trn1 \t4\().16b, \r0\().16b, \r1\().16b
|
||||
trn2 \t5\().16b, \r0\().16b, \r1\().16b
|
||||
trn1 \t6\().16b, \r2\().16b, \r3\().16b
|
||||
trn2 \t7\().16b, \r2\().16b, \r3\().16b
|
||||
trn1 \t4\().16B, \r0\().16B, \r1\().16B
|
||||
trn2 \t5\().16B, \r0\().16B, \r1\().16B
|
||||
trn1 \t6\().16B, \r2\().16B, \r3\().16B
|
||||
trn2 \t7\().16B, \r2\().16B, \r3\().16B
|
||||
|
||||
trn1 \r0\().8h, \t4\().8h, \t6\().8h
|
||||
trn2 \r2\().8h, \t4\().8h, \t6\().8h
|
||||
trn1 \r1\().8h, \t5\().8h, \t7\().8h
|
||||
trn2 \r3\().8h, \t5\().8h, \t7\().8h
|
||||
trn1 \r0\().8H, \t4\().8H, \t6\().8H
|
||||
trn2 \r2\().8H, \t4\().8H, \t6\().8H
|
||||
trn1 \r1\().8H, \t5\().8H, \t7\().8H
|
||||
trn2 \r3\().8H, \t5\().8H, \t7\().8H
|
||||
.endm
|
||||
|
||||
.macro transpose_4x8B r0, r1, r2, r3, t4, t5, t6, t7
|
||||
trn1 \t4\().8b, \r0\().8b, \r1\().8b
|
||||
trn2 \t5\().8b, \r0\().8b, \r1\().8b
|
||||
trn1 \t6\().8b, \r2\().8b, \r3\().8b
|
||||
trn2 \t7\().8b, \r2\().8b, \r3\().8b
|
||||
trn1 \t4\().8B, \r0\().8B, \r1\().8B
|
||||
trn2 \t5\().8B, \r0\().8B, \r1\().8B
|
||||
trn1 \t6\().8B, \r2\().8B, \r3\().8B
|
||||
trn2 \t7\().8B, \r2\().8B, \r3\().8B
|
||||
|
||||
trn1 \r0\().4h, \t4\().4h, \t6\().4h
|
||||
trn2 \r2\().4h, \t4\().4h, \t6\().4h
|
||||
trn1 \r1\().4h, \t5\().4h, \t7\().4h
|
||||
trn2 \r3\().4h, \t5\().4h, \t7\().4h
|
||||
trn1 \r0\().4H, \t4\().4H, \t6\().4H
|
||||
trn2 \r2\().4H, \t4\().4H, \t6\().4H
|
||||
trn1 \r1\().4H, \t5\().4H, \t7\().4H
|
||||
trn2 \r3\().4H, \t5\().4H, \t7\().4H
|
||||
.endm
|
||||
|
||||
.macro transpose_4x4H r0, r1, r2, r3, r4, r5, r6, r7
|
||||
trn1 \r4\().4h, \r0\().4h, \r1\().4h
|
||||
trn2 \r5\().4h, \r0\().4h, \r1\().4h
|
||||
trn1 \r6\().4h, \r2\().4h, \r3\().4h
|
||||
trn2 \r7\().4h, \r2\().4h, \r3\().4h
|
||||
trn1 \r0\().2s, \r4\().2s, \r6\().2s
|
||||
trn2 \r2\().2s, \r4\().2s, \r6\().2s
|
||||
trn1 \r1\().2s, \r5\().2s, \r7\().2s
|
||||
trn2 \r3\().2s, \r5\().2s, \r7\().2s
|
||||
trn1 \r4\().4H, \r0\().4H, \r1\().4H
|
||||
trn2 \r5\().4H, \r0\().4H, \r1\().4H
|
||||
trn1 \r6\().4H, \r2\().4H, \r3\().4H
|
||||
trn2 \r7\().4H, \r2\().4H, \r3\().4H
|
||||
trn1 \r0\().2S, \r4\().2S, \r6\().2S
|
||||
trn2 \r2\().2S, \r4\().2S, \r6\().2S
|
||||
trn1 \r1\().2S, \r5\().2S, \r7\().2S
|
||||
trn2 \r3\().2S, \r5\().2S, \r7\().2S
|
||||
.endm
|
||||
|
||||
.macro transpose_8x8H r0, r1, r2, r3, r4, r5, r6, r7, r8, r9
|
||||
trn1 \r8\().8h, \r0\().8h, \r1\().8h
|
||||
trn2 \r9\().8h, \r0\().8h, \r1\().8h
|
||||
trn1 \r1\().8h, \r2\().8h, \r3\().8h
|
||||
trn2 \r3\().8h, \r2\().8h, \r3\().8h
|
||||
trn1 \r0\().8h, \r4\().8h, \r5\().8h
|
||||
trn2 \r5\().8h, \r4\().8h, \r5\().8h
|
||||
trn1 \r2\().8h, \r6\().8h, \r7\().8h
|
||||
trn2 \r7\().8h, \r6\().8h, \r7\().8h
|
||||
trn1 \r8\().8H, \r0\().8H, \r1\().8H
|
||||
trn2 \r9\().8H, \r0\().8H, \r1\().8H
|
||||
trn1 \r1\().8H, \r2\().8H, \r3\().8H
|
||||
trn2 \r3\().8H, \r2\().8H, \r3\().8H
|
||||
trn1 \r0\().8H, \r4\().8H, \r5\().8H
|
||||
trn2 \r5\().8H, \r4\().8H, \r5\().8H
|
||||
trn1 \r2\().8H, \r6\().8H, \r7\().8H
|
||||
trn2 \r7\().8H, \r6\().8H, \r7\().8H
|
||||
|
||||
trn1 \r4\().4s, \r0\().4s, \r2\().4s
|
||||
trn2 \r2\().4s, \r0\().4s, \r2\().4s
|
||||
trn1 \r6\().4s, \r5\().4s, \r7\().4s
|
||||
trn2 \r7\().4s, \r5\().4s, \r7\().4s
|
||||
trn1 \r5\().4s, \r9\().4s, \r3\().4s
|
||||
trn2 \r9\().4s, \r9\().4s, \r3\().4s
|
||||
trn1 \r3\().4s, \r8\().4s, \r1\().4s
|
||||
trn2 \r8\().4s, \r8\().4s, \r1\().4s
|
||||
trn1 \r4\().4S, \r0\().4S, \r2\().4S
|
||||
trn2 \r2\().4S, \r0\().4S, \r2\().4S
|
||||
trn1 \r6\().4S, \r5\().4S, \r7\().4S
|
||||
trn2 \r7\().4S, \r5\().4S, \r7\().4S
|
||||
trn1 \r5\().4S, \r9\().4S, \r3\().4S
|
||||
trn2 \r9\().4S, \r9\().4S, \r3\().4S
|
||||
trn1 \r3\().4S, \r8\().4S, \r1\().4S
|
||||
trn2 \r8\().4S, \r8\().4S, \r1\().4S
|
||||
|
||||
trn1 \r0\().2d, \r3\().2d, \r4\().2d
|
||||
trn2 \r4\().2d, \r3\().2d, \r4\().2d
|
||||
trn1 \r0\().2D, \r3\().2D, \r4\().2D
|
||||
trn2 \r4\().2D, \r3\().2D, \r4\().2D
|
||||
|
||||
trn1 \r1\().2d, \r5\().2d, \r6\().2d
|
||||
trn2 \r5\().2d, \r5\().2d, \r6\().2d
|
||||
trn1 \r1\().2D, \r5\().2D, \r6\().2D
|
||||
trn2 \r5\().2D, \r5\().2D, \r6\().2D
|
||||
|
||||
trn2 \r6\().2d, \r8\().2d, \r2\().2d
|
||||
trn1 \r2\().2d, \r8\().2d, \r2\().2d
|
||||
trn2 \r6\().2D, \r8\().2D, \r2\().2D
|
||||
trn1 \r2\().2D, \r8\().2D, \r2\().2D
|
||||
|
||||
trn1 \r3\().2d, \r9\().2d, \r7\().2d
|
||||
trn2 \r7\().2d, \r9\().2d, \r7\().2d
|
||||
trn1 \r3\().2D, \r9\().2D, \r7\().2D
|
||||
trn2 \r7\().2D, \r9\().2D, \r7\().2D
|
||||
|
||||
.endm
|
||||
|
||||
@@ -33,81 +33,81 @@ const tab_x2, align=4
|
||||
endconst
|
||||
|
||||
function ff_opus_deemphasis_neon, export=1
|
||||
movrel x4, tab_st
|
||||
ld1 {v4.4s}, [x4]
|
||||
movrel x4, tab_x0
|
||||
ld1 {v5.4s}, [x4]
|
||||
movrel x4, tab_x1
|
||||
ld1 {v6.4s}, [x4]
|
||||
movrel x4, tab_x2
|
||||
ld1 {v7.4s}, [x4]
|
||||
movrel x4, tab_st
|
||||
ld1 {v4.4s}, [x4]
|
||||
movrel x4, tab_x0
|
||||
ld1 {v5.4s}, [x4]
|
||||
movrel x4, tab_x1
|
||||
ld1 {v6.4s}, [x4]
|
||||
movrel x4, tab_x2
|
||||
ld1 {v7.4s}, [x4]
|
||||
|
||||
fmul v0.4s, v4.4s, v0.s[0]
|
||||
fmul v0.4s, v4.4s, v0.s[0]
|
||||
|
||||
1: ld1 {v1.4s, v2.4s}, [x1], #32
|
||||
1: ld1 {v1.4s, v2.4s}, [x1], #32
|
||||
|
||||
fmla v0.4s, v5.4s, v1.s[0]
|
||||
fmul v3.4s, v7.4s, v2.s[2]
|
||||
fmla v0.4s, v5.4s, v1.s[0]
|
||||
fmul v3.4s, v7.4s, v2.s[2]
|
||||
|
||||
fmla v0.4s, v6.4s, v1.s[1]
|
||||
fmla v3.4s, v6.4s, v2.s[1]
|
||||
fmla v0.4s, v6.4s, v1.s[1]
|
||||
fmla v3.4s, v6.4s, v2.s[1]
|
||||
|
||||
fmla v0.4s, v7.4s, v1.s[2]
|
||||
fmla v3.4s, v5.4s, v2.s[0]
|
||||
fmla v0.4s, v7.4s, v1.s[2]
|
||||
fmla v3.4s, v5.4s, v2.s[0]
|
||||
|
||||
fadd v1.4s, v1.4s, v0.4s
|
||||
fadd v2.4s, v2.4s, v3.4s
|
||||
fadd v1.4s, v1.4s, v0.4s
|
||||
fadd v2.4s, v2.4s, v3.4s
|
||||
|
||||
fmla v2.4s, v4.4s, v1.s[3]
|
||||
fmla v2.4s, v4.4s, v1.s[3]
|
||||
|
||||
st1 {v1.4s, v2.4s}, [x0], #32
|
||||
fmul v0.4s, v4.4s, v2.s[3]
|
||||
st1 {v1.4s, v2.4s}, [x0], #32
|
||||
fmul v0.4s, v4.4s, v2.s[3]
|
||||
|
||||
subs w2, w2, #8
|
||||
b.gt 1b
|
||||
subs w2, w2, #8
|
||||
b.gt 1b
|
||||
|
||||
mov s0, v2.s[3]
|
||||
mov s0, v2.s[3]
|
||||
|
||||
ret
|
||||
endfunc
|
||||
|
||||
function ff_opus_postfilter_neon, export=1
|
||||
ld1 {v0.4s}, [x2]
|
||||
dup v1.4s, v0.s[1]
|
||||
dup v2.4s, v0.s[2]
|
||||
dup v0.4s, v0.s[0]
|
||||
ld1 {v0.4s}, [x2]
|
||||
dup v1.4s, v0.s[1]
|
||||
dup v2.4s, v0.s[2]
|
||||
dup v0.4s, v0.s[0]
|
||||
|
||||
add w1, w1, #2
|
||||
sub x1, x0, x1, lsl #2
|
||||
add w1, w1, #2
|
||||
sub x1, x0, x1, lsl #2
|
||||
|
||||
ld1 {v3.4s}, [x1]
|
||||
fmul v3.4s, v3.4s, v2.4s
|
||||
ld1 {v3.4s}, [x1]
|
||||
fmul v3.4s, v3.4s, v2.4s
|
||||
|
||||
1: add x1, x1, #4
|
||||
ld1 {v4.4s}, [x1]
|
||||
add x1, x1, #4
|
||||
ld1 {v5.4s}, [x1]
|
||||
add x1, x1, #4
|
||||
ld1 {v6.4s}, [x1]
|
||||
add x1, x1, #4
|
||||
ld1 {v7.4s}, [x1]
|
||||
1: add x1, x1, #4
|
||||
ld1 {v4.4s}, [x1]
|
||||
add x1, x1, #4
|
||||
ld1 {v5.4s}, [x1]
|
||||
add x1, x1, #4
|
||||
ld1 {v6.4s}, [x1]
|
||||
add x1, x1, #4
|
||||
ld1 {v7.4s}, [x1]
|
||||
|
||||
fmla v3.4s, v7.4s, v2.4s
|
||||
fadd v6.4s, v6.4s, v4.4s
|
||||
fmla v3.4s, v7.4s, v2.4s
|
||||
fadd v6.4s, v6.4s, v4.4s
|
||||
|
||||
ld1 {v4.4s}, [x0]
|
||||
fmla v4.4s, v5.4s, v0.4s
|
||||
ld1 {v4.4s}, [x0]
|
||||
fmla v4.4s, v5.4s, v0.4s
|
||||
|
||||
fmul v6.4s, v6.4s, v1.4s
|
||||
fadd v6.4s, v6.4s, v3.4s
|
||||
fmul v6.4s, v6.4s, v1.4s
|
||||
fadd v6.4s, v6.4s, v3.4s
|
||||
|
||||
fadd v4.4s, v4.4s, v6.4s
|
||||
fmul v3.4s, v7.4s, v2.4s
|
||||
fadd v4.4s, v4.4s, v6.4s
|
||||
fmul v3.4s, v7.4s, v2.4s
|
||||
|
||||
st1 {v4.4s}, [x0], #16
|
||||
st1 {v4.4s}, [x0], #16
|
||||
|
||||
subs w3, w3, #4
|
||||
b.gt 1b
|
||||
subs w3, w3, #4
|
||||
b.gt 1b
|
||||
|
||||
ret
|
||||
endfunc
|
||||
|
||||
+147
-147
@@ -46,49 +46,49 @@ function ff_sbr_sum64x5_neon, export=1
|
||||
add x3, x0, #192*4
|
||||
add x4, x0, #256*4
|
||||
mov x5, #64
|
||||
1: ld1 {v0.4s}, [x0]
|
||||
ld1 {v1.4s}, [x1], #16
|
||||
fadd v0.4s, v0.4s, v1.4s
|
||||
ld1 {v2.4s}, [x2], #16
|
||||
fadd v0.4s, v0.4s, v2.4s
|
||||
ld1 {v3.4s}, [x3], #16
|
||||
fadd v0.4s, v0.4s, v3.4s
|
||||
ld1 {v4.4s}, [x4], #16
|
||||
fadd v0.4s, v0.4s, v4.4s
|
||||
st1 {v0.4s}, [x0], #16
|
||||
1: ld1 {v0.4S}, [x0]
|
||||
ld1 {v1.4S}, [x1], #16
|
||||
fadd v0.4S, v0.4S, v1.4S
|
||||
ld1 {v2.4S}, [x2], #16
|
||||
fadd v0.4S, v0.4S, v2.4S
|
||||
ld1 {v3.4S}, [x3], #16
|
||||
fadd v0.4S, v0.4S, v3.4S
|
||||
ld1 {v4.4S}, [x4], #16
|
||||
fadd v0.4S, v0.4S, v4.4S
|
||||
st1 {v0.4S}, [x0], #16
|
||||
subs x5, x5, #4
|
||||
b.gt 1b
|
||||
ret
|
||||
endfunc
|
||||
|
||||
function ff_sbr_sum_square_neon, export=1
|
||||
movi v0.4s, #0
|
||||
1: ld1 {v1.4s}, [x0], #16
|
||||
fmla v0.4s, v1.4s, v1.4s
|
||||
movi v0.4S, #0
|
||||
1: ld1 {v1.4S}, [x0], #16
|
||||
fmla v0.4S, v1.4S, v1.4S
|
||||
subs w1, w1, #2
|
||||
b.gt 1b
|
||||
faddp v0.4s, v0.4s, v0.4s
|
||||
faddp v0.4s, v0.4s, v0.4s
|
||||
faddp v0.4S, v0.4S, v0.4S
|
||||
faddp v0.4S, v0.4S, v0.4S
|
||||
ret
|
||||
endfunc
|
||||
|
||||
function ff_sbr_neg_odd_64_neon, export=1
|
||||
mov x1, x0
|
||||
movi v5.4s, #1<<7, lsl #24
|
||||
ld2 {v0.4s, v1.4s}, [x0], #32
|
||||
eor v1.16b, v1.16b, v5.16b
|
||||
ld2 {v2.4s, v3.4s}, [x0], #32
|
||||
movi v5.4S, #1<<7, lsl #24
|
||||
ld2 {v0.4S, v1.4S}, [x0], #32
|
||||
eor v1.16B, v1.16B, v5.16B
|
||||
ld2 {v2.4S, v3.4S}, [x0], #32
|
||||
.rept 3
|
||||
st2 {v0.4s, v1.4s}, [x1], #32
|
||||
eor v3.16b, v3.16b, v5.16b
|
||||
ld2 {v0.4s, v1.4s}, [x0], #32
|
||||
st2 {v2.4s, v3.4s}, [x1], #32
|
||||
eor v1.16b, v1.16b, v5.16b
|
||||
ld2 {v2.4s, v3.4s}, [x0], #32
|
||||
st2 {v0.4S, v1.4S}, [x1], #32
|
||||
eor v3.16B, v3.16B, v5.16B
|
||||
ld2 {v0.4S, v1.4S}, [x0], #32
|
||||
st2 {v2.4S, v3.4S}, [x1], #32
|
||||
eor v1.16B, v1.16B, v5.16B
|
||||
ld2 {v2.4S, v3.4S}, [x0], #32
|
||||
.endr
|
||||
eor v3.16b, v3.16b, v5.16b
|
||||
st2 {v0.4s, v1.4s}, [x1], #32
|
||||
st2 {v2.4s, v3.4s}, [x1], #32
|
||||
eor v3.16B, v3.16B, v5.16B
|
||||
st2 {v0.4S, v1.4S}, [x1], #32
|
||||
st2 {v2.4S, v3.4S}, [x1], #32
|
||||
ret
|
||||
endfunc
|
||||
|
||||
@@ -97,26 +97,26 @@ function ff_sbr_qmf_pre_shuffle_neon, export=1
|
||||
add x2, x0, #64*4
|
||||
mov x3, #-16
|
||||
mov x4, #-4
|
||||
movi v6.4s, #1<<7, lsl #24
|
||||
ld1 {v0.2s}, [x0], #8
|
||||
st1 {v0.2s}, [x2], #8
|
||||
movi v6.4S, #1<<7, lsl #24
|
||||
ld1 {v0.2S}, [x0], #8
|
||||
st1 {v0.2S}, [x2], #8
|
||||
.rept 7
|
||||
ld1 {v1.4s}, [x1], x3
|
||||
ld1 {v2.4s}, [x0], #16
|
||||
eor v1.16b, v1.16b, v6.16b
|
||||
rev64 v1.4s, v1.4s
|
||||
ext v1.16b, v1.16b, v1.16b, #8
|
||||
st2 {v1.4s, v2.4s}, [x2], #32
|
||||
ld1 {v1.4S}, [x1], x3
|
||||
ld1 {v2.4S}, [x0], #16
|
||||
eor v1.16B, v1.16B, v6.16B
|
||||
rev64 v1.4S, v1.4S
|
||||
ext v1.16B, v1.16B, v1.16B, #8
|
||||
st2 {v1.4S, v2.4S}, [x2], #32
|
||||
.endr
|
||||
add x1, x1, #8
|
||||
ld1 {v1.2s}, [x1], x4
|
||||
ld1 {v2.2s}, [x0], #8
|
||||
ld1 {v1.s}[3], [x1]
|
||||
ld1 {v2.s}[2], [x0]
|
||||
eor v1.16b, v1.16b, v6.16b
|
||||
rev64 v1.4s, v1.4s
|
||||
st2 {v1.2s, v2.2s}, [x2], #16
|
||||
st2 {v1.s, v2.s}[2], [x2]
|
||||
ld1 {v1.2S}, [x1], x4
|
||||
ld1 {v2.2S}, [x0], #8
|
||||
ld1 {v1.S}[3], [x1]
|
||||
ld1 {v2.S}[2], [x0]
|
||||
eor v1.16B, v1.16B, v6.16B
|
||||
rev64 v1.4S, v1.4S
|
||||
st2 {v1.2S, v2.2S}, [x2], #16
|
||||
st2 {v1.S, v2.S}[2], [x2]
|
||||
ret
|
||||
endfunc
|
||||
|
||||
@@ -124,13 +124,13 @@ function ff_sbr_qmf_post_shuffle_neon, export=1
|
||||
add x2, x1, #60*4
|
||||
mov x3, #-16
|
||||
mov x4, #32
|
||||
movi v6.4s, #1<<7, lsl #24
|
||||
1: ld1 {v0.4s}, [x2], x3
|
||||
ld1 {v1.4s}, [x1], #16
|
||||
eor v0.16b, v0.16b, v6.16b
|
||||
rev64 v0.4s, v0.4s
|
||||
ext v0.16b, v0.16b, v0.16b, #8
|
||||
st2 {v0.4s, v1.4s}, [x0], #32
|
||||
movi v6.4S, #1<<7, lsl #24
|
||||
1: ld1 {v0.4S}, [x2], x3
|
||||
ld1 {v1.4S}, [x1], #16
|
||||
eor v0.16B, v0.16B, v6.16B
|
||||
rev64 v0.4S, v0.4S
|
||||
ext v0.16B, v0.16B, v0.16B, #8
|
||||
st2 {v0.4S, v1.4S}, [x0], #32
|
||||
subs x4, x4, #4
|
||||
b.gt 1b
|
||||
ret
|
||||
@@ -141,13 +141,13 @@ function ff_sbr_qmf_deint_neg_neon, export=1
|
||||
add x2, x0, #60*4
|
||||
mov x3, #-32
|
||||
mov x4, #32
|
||||
movi v2.4s, #1<<7, lsl #24
|
||||
1: ld2 {v0.4s, v1.4s}, [x1], x3
|
||||
eor v0.16b, v0.16b, v2.16b
|
||||
rev64 v1.4s, v1.4s
|
||||
ext v1.16b, v1.16b, v1.16b, #8
|
||||
st1 {v0.4s}, [x2]
|
||||
st1 {v1.4s}, [x0], #16
|
||||
movi v2.4S, #1<<7, lsl #24
|
||||
1: ld2 {v0.4S, v1.4S}, [x1], x3
|
||||
eor v0.16B, v0.16B, v2.16B
|
||||
rev64 v1.4S, v1.4S
|
||||
ext v1.16B, v1.16B, v1.16B, #8
|
||||
st1 {v0.4S}, [x2]
|
||||
st1 {v1.4S}, [x0], #16
|
||||
sub x2, x2, #16
|
||||
subs x4, x4, #4
|
||||
b.gt 1b
|
||||
@@ -159,16 +159,16 @@ function ff_sbr_qmf_deint_bfly_neon, export=1
|
||||
add x3, x0, #124*4
|
||||
mov x4, #64
|
||||
mov x5, #-16
|
||||
1: ld1 {v0.4s}, [x1], #16
|
||||
ld1 {v1.4s}, [x2], x5
|
||||
rev64 v2.4s, v0.4s
|
||||
ext v2.16b, v2.16b, v2.16b, #8
|
||||
rev64 v3.4s, v1.4s
|
||||
ext v3.16b, v3.16b, v3.16b, #8
|
||||
fadd v1.4s, v1.4s, v2.4s
|
||||
fsub v0.4s, v0.4s, v3.4s
|
||||
st1 {v0.4s}, [x0], #16
|
||||
st1 {v1.4s}, [x3], x5
|
||||
1: ld1 {v0.4S}, [x1], #16
|
||||
ld1 {v1.4S}, [x2], x5
|
||||
rev64 v2.4S, v0.4S
|
||||
ext v2.16B, v2.16B, v2.16B, #8
|
||||
rev64 v3.4S, v1.4S
|
||||
ext v3.16B, v3.16B, v3.16B, #8
|
||||
fadd v1.4S, v1.4S, v2.4S
|
||||
fsub v0.4S, v0.4S, v3.4S
|
||||
st1 {v0.4S}, [x0], #16
|
||||
st1 {v1.4S}, [x3], x5
|
||||
subs x4, x4, #4
|
||||
b.gt 1b
|
||||
ret
|
||||
@@ -178,32 +178,32 @@ function ff_sbr_hf_gen_neon, export=1
|
||||
sxtw x4, w4
|
||||
sxtw x5, w5
|
||||
movrel x6, factors
|
||||
ld1 {v7.4s}, [x6]
|
||||
dup v1.4s, v0.s[0]
|
||||
mov v2.8b, v1.8b
|
||||
mov v2.s[2], v7.s[0]
|
||||
mov v2.s[3], v7.s[0]
|
||||
fmul v1.4s, v1.4s, v2.4s
|
||||
ld1 {v0.d}[0], [x3]
|
||||
ld1 {v0.d}[1], [x2]
|
||||
fmul v0.4s, v0.4s, v1.4s
|
||||
fmul v1.4s, v0.4s, v7.4s
|
||||
rev64 v0.4s, v0.4s
|
||||
ld1 {v7.4S}, [x6]
|
||||
dup v1.4S, v0.S[0]
|
||||
mov v2.8B, v1.8B
|
||||
mov v2.S[2], v7.S[0]
|
||||
mov v2.S[3], v7.S[0]
|
||||
fmul v1.4S, v1.4S, v2.4S
|
||||
ld1 {v0.D}[0], [x3]
|
||||
ld1 {v0.D}[1], [x2]
|
||||
fmul v0.4S, v0.4S, v1.4S
|
||||
fmul v1.4S, v0.4S, v7.4S
|
||||
rev64 v0.4S, v0.4S
|
||||
sub x7, x5, x4
|
||||
add x0, x0, x4, lsl #3
|
||||
add x1, x1, x4, lsl #3
|
||||
sub x1, x1, #16
|
||||
1: ld1 {v2.4s}, [x1], #16
|
||||
ld1 {v3.2s}, [x1]
|
||||
fmul v4.4s, v2.4s, v1.4s
|
||||
fmul v5.4s, v2.4s, v0.4s
|
||||
faddp v4.4s, v4.4s, v4.4s
|
||||
faddp v5.4s, v5.4s, v5.4s
|
||||
faddp v4.4s, v4.4s, v4.4s
|
||||
faddp v5.4s, v5.4s, v5.4s
|
||||
mov v4.s[1], v5.s[0]
|
||||
fadd v4.2s, v4.2s, v3.2s
|
||||
st1 {v4.2s}, [x0], #8
|
||||
1: ld1 {v2.4S}, [x1], #16
|
||||
ld1 {v3.2S}, [x1]
|
||||
fmul v4.4S, v2.4S, v1.4S
|
||||
fmul v5.4S, v2.4S, v0.4S
|
||||
faddp v4.4S, v4.4S, v4.4S
|
||||
faddp v5.4S, v5.4S, v5.4S
|
||||
faddp v4.4S, v4.4S, v4.4S
|
||||
faddp v5.4S, v5.4S, v5.4S
|
||||
mov v4.S[1], v5.S[0]
|
||||
fadd v4.2S, v4.2S, v3.2S
|
||||
st1 {v4.2S}, [x0], #8
|
||||
sub x1, x1, #8
|
||||
subs x7, x7, #1
|
||||
b.gt 1b
|
||||
@@ -215,10 +215,10 @@ function ff_sbr_hf_g_filt_neon, export=1
|
||||
sxtw x4, w4
|
||||
mov x5, #40*2*4
|
||||
add x1, x1, x4, lsl #3
|
||||
1: ld1 {v0.2s}, [x1], x5
|
||||
ld1 {v1.s}[0], [x2], #4
|
||||
fmul v2.4s, v0.4s, v1.s[0]
|
||||
st1 {v2.2s}, [x0], #8
|
||||
1: ld1 {v0.2S}, [x1], x5
|
||||
ld1 {v1.S}[0], [x2], #4
|
||||
fmul v2.4S, v0.4S, v1.S[0]
|
||||
st1 {v2.2S}, [x0], #8
|
||||
subs x3, x3, #1
|
||||
b.gt 1b
|
||||
ret
|
||||
@@ -227,46 +227,46 @@ endfunc
|
||||
function ff_sbr_autocorrelate_neon, export=1
|
||||
mov x2, #38
|
||||
movrel x3, factors
|
||||
ld1 {v0.4s}, [x3]
|
||||
movi v1.4s, #0
|
||||
movi v2.4s, #0
|
||||
movi v3.4s, #0
|
||||
ld1 {v4.2s}, [x0], #8
|
||||
ld1 {v5.2s}, [x0], #8
|
||||
fmul v16.2s, v4.2s, v4.2s
|
||||
fmul v17.2s, v5.2s, v4.s[0]
|
||||
fmul v18.2s, v5.2s, v4.s[1]
|
||||
1: ld1 {v5.d}[1], [x0], #8
|
||||
fmla v1.2s, v4.2s, v4.2s
|
||||
fmla v2.4s, v5.4s, v4.s[0]
|
||||
fmla v3.4s, v5.4s, v4.s[1]
|
||||
mov v4.d[0], v5.d[0]
|
||||
mov v5.d[0], v5.d[1]
|
||||
ld1 {v0.4S}, [x3]
|
||||
movi v1.4S, #0
|
||||
movi v2.4S, #0
|
||||
movi v3.4S, #0
|
||||
ld1 {v4.2S}, [x0], #8
|
||||
ld1 {v5.2S}, [x0], #8
|
||||
fmul v16.2S, v4.2S, v4.2S
|
||||
fmul v17.2S, v5.2S, v4.S[0]
|
||||
fmul v18.2S, v5.2S, v4.S[1]
|
||||
1: ld1 {v5.D}[1], [x0], #8
|
||||
fmla v1.2S, v4.2S, v4.2S
|
||||
fmla v2.4S, v5.4S, v4.S[0]
|
||||
fmla v3.4S, v5.4S, v4.S[1]
|
||||
mov v4.D[0], v5.D[0]
|
||||
mov v5.D[0], v5.D[1]
|
||||
subs x2, x2, #1
|
||||
b.gt 1b
|
||||
fmul v19.2s, v4.2s, v4.2s
|
||||
fmul v20.2s, v5.2s, v4.s[0]
|
||||
fmul v21.2s, v5.2s, v4.s[1]
|
||||
fadd v22.4s, v2.4s, v20.4s
|
||||
fsub v22.4s, v22.4s, v17.4s
|
||||
fadd v23.4s, v3.4s, v21.4s
|
||||
fsub v23.4s, v23.4s, v18.4s
|
||||
rev64 v23.4s, v23.4s
|
||||
fmul v23.4s, v23.4s, v0.4s
|
||||
fadd v22.4s, v22.4s, v23.4s
|
||||
st1 {v22.4s}, [x1], #16
|
||||
fadd v23.2s, v1.2s, v19.2s
|
||||
fsub v23.2s, v23.2s, v16.2s
|
||||
faddp v23.2s, v23.2s, v23.2s
|
||||
st1 {v23.s}[0], [x1]
|
||||
fmul v19.2S, v4.2S, v4.2S
|
||||
fmul v20.2S, v5.2S, v4.S[0]
|
||||
fmul v21.2S, v5.2S, v4.S[1]
|
||||
fadd v22.4S, v2.4S, v20.4S
|
||||
fsub v22.4S, v22.4S, v17.4S
|
||||
fadd v23.4S, v3.4S, v21.4S
|
||||
fsub v23.4S, v23.4S, v18.4S
|
||||
rev64 v23.4S, v23.4S
|
||||
fmul v23.4S, v23.4S, v0.4S
|
||||
fadd v22.4S, v22.4S, v23.4S
|
||||
st1 {v22.4S}, [x1], #16
|
||||
fadd v23.2S, v1.2S, v19.2S
|
||||
fsub v23.2S, v23.2S, v16.2S
|
||||
faddp v23.2S, v23.2S, v23.2S
|
||||
st1 {v23.S}[0], [x1]
|
||||
add x1, x1, #8
|
||||
rev64 v3.2s, v3.2s
|
||||
fmul v3.2s, v3.2s, v0.2s
|
||||
fadd v2.2s, v2.2s, v3.2s
|
||||
st1 {v2.2s}, [x1]
|
||||
rev64 v3.2S, v3.2S
|
||||
fmul v3.2S, v3.2S, v0.2S
|
||||
fadd v2.2S, v2.2S, v3.2S
|
||||
st1 {v2.2S}, [x1]
|
||||
add x1, x1, #16
|
||||
faddp v1.2s, v1.2s, v1.2s
|
||||
st1 {v1.s}[0], [x1]
|
||||
faddp v1.2S, v1.2S, v1.2S
|
||||
st1 {v1.S}[0], [x1]
|
||||
ret
|
||||
endfunc
|
||||
|
||||
@@ -278,25 +278,25 @@ endfunc
|
||||
1: and x3, x3, #0x1ff
|
||||
add x8, x7, x3, lsl #3
|
||||
add x3, x3, #2
|
||||
ld1 {v2.4s}, [x0]
|
||||
ld1 {v3.2s}, [x1], #8
|
||||
ld1 {v4.2s}, [x2], #8
|
||||
ld1 {v5.4s}, [x8]
|
||||
mov v6.16b, v2.16b
|
||||
zip1 v3.4s, v3.4s, v3.4s
|
||||
zip1 v4.4s, v4.4s, v4.4s
|
||||
fmla v6.4s, v1.4s, v3.4s
|
||||
fmla v2.4s, v5.4s, v4.4s
|
||||
fcmeq v7.4s, v3.4s, #0
|
||||
bif v2.16b, v6.16b, v7.16b
|
||||
st1 {v2.4s}, [x0], #16
|
||||
ld1 {v2.4S}, [x0]
|
||||
ld1 {v3.2S}, [x1], #8
|
||||
ld1 {v4.2S}, [x2], #8
|
||||
ld1 {v5.4S}, [x8]
|
||||
mov v6.16B, v2.16B
|
||||
zip1 v3.4S, v3.4S, v3.4S
|
||||
zip1 v4.4S, v4.4S, v4.4S
|
||||
fmla v6.4S, v1.4S, v3.4S
|
||||
fmla v2.4S, v5.4S, v4.4S
|
||||
fcmeq v7.4S, v3.4S, #0
|
||||
bif v2.16B, v6.16B, v7.16B
|
||||
st1 {v2.4S}, [x0], #16
|
||||
subs x5, x5, #2
|
||||
b.gt 1b
|
||||
.endm
|
||||
|
||||
function ff_sbr_hf_apply_noise_0_neon, export=1
|
||||
movrel x9, phi_noise_0
|
||||
ld1 {v1.4s}, [x9]
|
||||
ld1 {v1.4S}, [x9]
|
||||
apply_noise_common
|
||||
ret
|
||||
endfunc
|
||||
@@ -305,14 +305,14 @@ function ff_sbr_hf_apply_noise_1_neon, export=1
|
||||
movrel x9, phi_noise_1
|
||||
and x4, x4, #1
|
||||
add x9, x9, x4, lsl #4
|
||||
ld1 {v1.4s}, [x9]
|
||||
ld1 {v1.4S}, [x9]
|
||||
apply_noise_common
|
||||
ret
|
||||
endfunc
|
||||
|
||||
function ff_sbr_hf_apply_noise_2_neon, export=1
|
||||
movrel x9, phi_noise_2
|
||||
ld1 {v1.4s}, [x9]
|
||||
ld1 {v1.4S}, [x9]
|
||||
apply_noise_common
|
||||
ret
|
||||
endfunc
|
||||
@@ -321,7 +321,7 @@ function ff_sbr_hf_apply_noise_3_neon, export=1
|
||||
movrel x9, phi_noise_3
|
||||
and x4, x4, #1
|
||||
add x9, x9, x4, lsl #4
|
||||
ld1 {v1.4s}, [x9]
|
||||
ld1 {v1.4S}, [x9]
|
||||
apply_noise_common
|
||||
ret
|
||||
endfunc
|
||||
|
||||
@@ -54,7 +54,7 @@ endconst
|
||||
prfm pldl1keep, [\data]
|
||||
mov x10, x30
|
||||
movrel x3, idct_coeff_neon
|
||||
ld1 {v0.2d}, [x3]
|
||||
ld1 {v0.2D}, [x3]
|
||||
.endm
|
||||
|
||||
.macro idct_end
|
||||
@@ -74,146 +74,146 @@ endconst
|
||||
.endm
|
||||
|
||||
.macro idct_col4_top y1, y2, y3, y4, i, l
|
||||
smull\i v7.4s, \y3\l, z2
|
||||
smull\i v16.4s, \y3\l, z6
|
||||
smull\i v17.4s, \y2\l, z1
|
||||
add v19.4s, v23.4s, v7.4s
|
||||
smull\i v18.4s, \y2\l, z3
|
||||
add v20.4s, v23.4s, v16.4s
|
||||
smull\i v5.4s, \y2\l, z5
|
||||
sub v21.4s, v23.4s, v16.4s
|
||||
smull\i v6.4s, \y2\l, z7
|
||||
sub v22.4s, v23.4s, v7.4s
|
||||
smull\i v7.4S, \y3\l, z2
|
||||
smull\i v16.4S, \y3\l, z6
|
||||
smull\i v17.4S, \y2\l, z1
|
||||
add v19.4S, v23.4S, v7.4S
|
||||
smull\i v18.4S, \y2\l, z3
|
||||
add v20.4S, v23.4S, v16.4S
|
||||
smull\i v5.4S, \y2\l, z5
|
||||
sub v21.4S, v23.4S, v16.4S
|
||||
smull\i v6.4S, \y2\l, z7
|
||||
sub v22.4S, v23.4S, v7.4S
|
||||
|
||||
smlal\i v17.4s, \y4\l, z3
|
||||
smlsl\i v18.4s, \y4\l, z7
|
||||
smlsl\i v5.4s, \y4\l, z1
|
||||
smlsl\i v6.4s, \y4\l, z5
|
||||
smlal\i v17.4S, \y4\l, z3
|
||||
smlsl\i v18.4S, \y4\l, z7
|
||||
smlsl\i v5.4S, \y4\l, z1
|
||||
smlsl\i v6.4S, \y4\l, z5
|
||||
.endm
|
||||
|
||||
.macro idct_row4_neon y1, y2, y3, y4, pass
|
||||
ld1 {\y1\().2d,\y2\().2d}, [x2], #32
|
||||
movi v23.4s, #1<<2, lsl #8
|
||||
orr v5.16b, \y1\().16b, \y2\().16b
|
||||
ld1 {\y3\().2d,\y4\().2d}, [x2], #32
|
||||
orr v6.16b, \y3\().16b, \y4\().16b
|
||||
orr v5.16b, v5.16b, v6.16b
|
||||
mov x3, v5.d[1]
|
||||
smlal v23.4s, \y1\().4h, z4
|
||||
ld1 {\y1\().2D,\y2\().2D}, [x2], #32
|
||||
movi v23.4S, #1<<2, lsl #8
|
||||
orr v5.16B, \y1\().16B, \y2\().16B
|
||||
ld1 {\y3\().2D,\y4\().2D}, [x2], #32
|
||||
orr v6.16B, \y3\().16B, \y4\().16B
|
||||
orr v5.16B, v5.16B, v6.16B
|
||||
mov x3, v5.D[1]
|
||||
smlal v23.4S, \y1\().4H, z4
|
||||
|
||||
idct_col4_top \y1, \y2, \y3, \y4, 1, .4h
|
||||
idct_col4_top \y1, \y2, \y3, \y4, 1, .4H
|
||||
|
||||
cmp x3, #0
|
||||
b.eq \pass\()f
|
||||
|
||||
smull2 v7.4s, \y1\().8h, z4
|
||||
smlal2 v17.4s, \y2\().8h, z5
|
||||
smlsl2 v18.4s, \y2\().8h, z1
|
||||
smull2 v16.4s, \y3\().8h, z2
|
||||
smlal2 v5.4s, \y2\().8h, z7
|
||||
add v19.4s, v19.4s, v7.4s
|
||||
sub v20.4s, v20.4s, v7.4s
|
||||
sub v21.4s, v21.4s, v7.4s
|
||||
add v22.4s, v22.4s, v7.4s
|
||||
smlal2 v6.4s, \y2\().8h, z3
|
||||
smull2 v7.4s, \y3\().8h, z6
|
||||
smlal2 v17.4s, \y4\().8h, z7
|
||||
smlsl2 v18.4s, \y4\().8h, z5
|
||||
smlal2 v5.4s, \y4\().8h, z3
|
||||
smlsl2 v6.4s, \y4\().8h, z1
|
||||
add v19.4s, v19.4s, v7.4s
|
||||
sub v20.4s, v20.4s, v16.4s
|
||||
add v21.4s, v21.4s, v16.4s
|
||||
sub v22.4s, v22.4s, v7.4s
|
||||
smull2 v7.4S, \y1\().8H, z4
|
||||
smlal2 v17.4S, \y2\().8H, z5
|
||||
smlsl2 v18.4S, \y2\().8H, z1
|
||||
smull2 v16.4S, \y3\().8H, z2
|
||||
smlal2 v5.4S, \y2\().8H, z7
|
||||
add v19.4S, v19.4S, v7.4S
|
||||
sub v20.4S, v20.4S, v7.4S
|
||||
sub v21.4S, v21.4S, v7.4S
|
||||
add v22.4S, v22.4S, v7.4S
|
||||
smlal2 v6.4S, \y2\().8H, z3
|
||||
smull2 v7.4S, \y3\().8H, z6
|
||||
smlal2 v17.4S, \y4\().8H, z7
|
||||
smlsl2 v18.4S, \y4\().8H, z5
|
||||
smlal2 v5.4S, \y4\().8H, z3
|
||||
smlsl2 v6.4S, \y4\().8H, z1
|
||||
add v19.4S, v19.4S, v7.4S
|
||||
sub v20.4S, v20.4S, v16.4S
|
||||
add v21.4S, v21.4S, v16.4S
|
||||
sub v22.4S, v22.4S, v7.4S
|
||||
|
||||
\pass: add \y3\().4S, v19.4S, v17.4S
|
||||
add \y4\().4s, v20.4s, v18.4s
|
||||
shrn \y1\().4h, \y3\().4s, #ROW_SHIFT
|
||||
shrn \y2\().4h, \y4\().4s, #ROW_SHIFT
|
||||
add v7.4s, v21.4s, v5.4s
|
||||
add v16.4s, v22.4s, v6.4s
|
||||
shrn \y3\().4h, v7.4s, #ROW_SHIFT
|
||||
shrn \y4\().4h, v16.4s, #ROW_SHIFT
|
||||
sub v22.4s, v22.4s, v6.4s
|
||||
sub v19.4s, v19.4s, v17.4s
|
||||
sub v21.4s, v21.4s, v5.4s
|
||||
shrn2 \y1\().8h, v22.4s, #ROW_SHIFT
|
||||
sub v20.4s, v20.4s, v18.4s
|
||||
shrn2 \y2\().8h, v21.4s, #ROW_SHIFT
|
||||
shrn2 \y3\().8h, v20.4s, #ROW_SHIFT
|
||||
shrn2 \y4\().8h, v19.4s, #ROW_SHIFT
|
||||
add \y4\().4S, v20.4S, v18.4S
|
||||
shrn \y1\().4H, \y3\().4S, #ROW_SHIFT
|
||||
shrn \y2\().4H, \y4\().4S, #ROW_SHIFT
|
||||
add v7.4S, v21.4S, v5.4S
|
||||
add v16.4S, v22.4S, v6.4S
|
||||
shrn \y3\().4H, v7.4S, #ROW_SHIFT
|
||||
shrn \y4\().4H, v16.4S, #ROW_SHIFT
|
||||
sub v22.4S, v22.4S, v6.4S
|
||||
sub v19.4S, v19.4S, v17.4S
|
||||
sub v21.4S, v21.4S, v5.4S
|
||||
shrn2 \y1\().8H, v22.4S, #ROW_SHIFT
|
||||
sub v20.4S, v20.4S, v18.4S
|
||||
shrn2 \y2\().8H, v21.4S, #ROW_SHIFT
|
||||
shrn2 \y3\().8H, v20.4S, #ROW_SHIFT
|
||||
shrn2 \y4\().8H, v19.4S, #ROW_SHIFT
|
||||
|
||||
trn1 v16.8h, \y1\().8h, \y2\().8h
|
||||
trn2 v17.8h, \y1\().8h, \y2\().8h
|
||||
trn1 v18.8h, \y3\().8h, \y4\().8h
|
||||
trn2 v19.8h, \y3\().8h, \y4\().8h
|
||||
trn1 \y1\().4s, v16.4s, v18.4s
|
||||
trn1 \y2\().4s, v17.4s, v19.4s
|
||||
trn2 \y3\().4s, v16.4s, v18.4s
|
||||
trn2 \y4\().4s, v17.4s, v19.4s
|
||||
trn1 v16.8H, \y1\().8H, \y2\().8H
|
||||
trn2 v17.8H, \y1\().8H, \y2\().8H
|
||||
trn1 v18.8H, \y3\().8H, \y4\().8H
|
||||
trn2 v19.8H, \y3\().8H, \y4\().8H
|
||||
trn1 \y1\().4S, v16.4S, v18.4S
|
||||
trn1 \y2\().4S, v17.4S, v19.4S
|
||||
trn2 \y3\().4S, v16.4S, v18.4S
|
||||
trn2 \y4\().4S, v17.4S, v19.4S
|
||||
.endm
|
||||
|
||||
.macro declare_idct_col4_neon i, l
|
||||
function idct_col4_neon\i
|
||||
dup v23.4h, z4c
|
||||
dup v23.4H, z4c
|
||||
.if \i == 1
|
||||
add v23.4h, v23.4h, v24.4h
|
||||
add v23.4H, v23.4H, v24.4H
|
||||
.else
|
||||
mov v5.d[0], v24.d[1]
|
||||
add v23.4h, v23.4h, v5.4h
|
||||
mov v5.D[0], v24.D[1]
|
||||
add v23.4H, v23.4H, v5.4H
|
||||
.endif
|
||||
smull v23.4s, v23.4h, z4
|
||||
smull v23.4S, v23.4H, z4
|
||||
|
||||
idct_col4_top v24, v25, v26, v27, \i, \l
|
||||
|
||||
mov x4, v28.d[\i - 1]
|
||||
mov x5, v29.d[\i - 1]
|
||||
mov x4, v28.D[\i - 1]
|
||||
mov x5, v29.D[\i - 1]
|
||||
cmp x4, #0
|
||||
b.eq 1f
|
||||
|
||||
smull\i v7.4s, v28\l, z4
|
||||
add v19.4s, v19.4s, v7.4s
|
||||
sub v20.4s, v20.4s, v7.4s
|
||||
sub v21.4s, v21.4s, v7.4s
|
||||
add v22.4s, v22.4s, v7.4s
|
||||
smull\i v7.4S, v28\l, z4
|
||||
add v19.4S, v19.4S, v7.4S
|
||||
sub v20.4S, v20.4S, v7.4S
|
||||
sub v21.4S, v21.4S, v7.4S
|
||||
add v22.4S, v22.4S, v7.4S
|
||||
|
||||
1: mov x4, v30.d[\i - 1]
|
||||
1: mov x4, v30.D[\i - 1]
|
||||
cmp x5, #0
|
||||
b.eq 2f
|
||||
|
||||
smlal\i v17.4s, v29\l, z5
|
||||
smlsl\i v18.4s, v29\l, z1
|
||||
smlal\i v5.4s, v29\l, z7
|
||||
smlal\i v6.4s, v29\l, z3
|
||||
smlal\i v17.4S, v29\l, z5
|
||||
smlsl\i v18.4S, v29\l, z1
|
||||
smlal\i v5.4S, v29\l, z7
|
||||
smlal\i v6.4S, v29\l, z3
|
||||
|
||||
2: mov x5, v31.d[\i - 1]
|
||||
2: mov x5, v31.D[\i - 1]
|
||||
cmp x4, #0
|
||||
b.eq 3f
|
||||
|
||||
smull\i v7.4s, v30\l, z6
|
||||
smull\i v16.4s, v30\l, z2
|
||||
add v19.4s, v19.4s, v7.4s
|
||||
sub v22.4s, v22.4s, v7.4s
|
||||
sub v20.4s, v20.4s, v16.4s
|
||||
add v21.4s, v21.4s, v16.4s
|
||||
smull\i v7.4S, v30\l, z6
|
||||
smull\i v16.4S, v30\l, z2
|
||||
add v19.4S, v19.4S, v7.4S
|
||||
sub v22.4S, v22.4S, v7.4S
|
||||
sub v20.4S, v20.4S, v16.4S
|
||||
add v21.4S, v21.4S, v16.4S
|
||||
|
||||
3: cmp x5, #0
|
||||
b.eq 4f
|
||||
|
||||
smlal\i v17.4s, v31\l, z7
|
||||
smlsl\i v18.4s, v31\l, z5
|
||||
smlal\i v5.4s, v31\l, z3
|
||||
smlsl\i v6.4s, v31\l, z1
|
||||
smlal\i v17.4S, v31\l, z7
|
||||
smlsl\i v18.4S, v31\l, z5
|
||||
smlal\i v5.4S, v31\l, z3
|
||||
smlsl\i v6.4S, v31\l, z1
|
||||
|
||||
4: addhn v7.4h, v19.4s, v17.4s
|
||||
addhn2 v7.8h, v20.4s, v18.4s
|
||||
subhn v18.4h, v20.4s, v18.4s
|
||||
subhn2 v18.8h, v19.4s, v17.4s
|
||||
4: addhn v7.4H, v19.4S, v17.4S
|
||||
addhn2 v7.8H, v20.4S, v18.4S
|
||||
subhn v18.4H, v20.4S, v18.4S
|
||||
subhn2 v18.8H, v19.4S, v17.4S
|
||||
|
||||
addhn v16.4h, v21.4s, v5.4s
|
||||
addhn2 v16.8h, v22.4s, v6.4s
|
||||
subhn v17.4h, v22.4s, v6.4s
|
||||
subhn2 v17.8h, v21.4s, v5.4s
|
||||
addhn v16.4H, v21.4S, v5.4S
|
||||
addhn2 v16.8H, v22.4S, v6.4S
|
||||
subhn v17.4H, v22.4S, v6.4S
|
||||
subhn2 v17.8H, v21.4S, v5.4S
|
||||
|
||||
ret
|
||||
endfunc
|
||||
@@ -229,33 +229,33 @@ function ff_simple_idct_put_neon, export=1
|
||||
idct_row4_neon v28, v29, v30, v31, 2
|
||||
bl idct_col4_neon1
|
||||
|
||||
sqshrun v1.8b, v7.8h, #COL_SHIFT-16
|
||||
sqshrun2 v1.16b, v16.8h, #COL_SHIFT-16
|
||||
sqshrun v3.8b, v17.8h, #COL_SHIFT-16
|
||||
sqshrun2 v3.16b, v18.8h, #COL_SHIFT-16
|
||||
sqshrun v1.8B, v7.8H, #COL_SHIFT-16
|
||||
sqshrun2 v1.16B, v16.8H, #COL_SHIFT-16
|
||||
sqshrun v3.8B, v17.8H, #COL_SHIFT-16
|
||||
sqshrun2 v3.16B, v18.8H, #COL_SHIFT-16
|
||||
|
||||
bl idct_col4_neon2
|
||||
|
||||
sqshrun v2.8b, v7.8h, #COL_SHIFT-16
|
||||
sqshrun2 v2.16b, v16.8h, #COL_SHIFT-16
|
||||
sqshrun v4.8b, v17.8h, #COL_SHIFT-16
|
||||
sqshrun2 v4.16b, v18.8h, #COL_SHIFT-16
|
||||
sqshrun v2.8B, v7.8H, #COL_SHIFT-16
|
||||
sqshrun2 v2.16B, v16.8H, #COL_SHIFT-16
|
||||
sqshrun v4.8B, v17.8H, #COL_SHIFT-16
|
||||
sqshrun2 v4.16B, v18.8H, #COL_SHIFT-16
|
||||
|
||||
zip1 v16.4s, v1.4s, v2.4s
|
||||
zip2 v17.4s, v1.4s, v2.4s
|
||||
zip1 v16.4S, v1.4S, v2.4S
|
||||
zip2 v17.4S, v1.4S, v2.4S
|
||||
|
||||
st1 {v16.d}[0], [x0], x1
|
||||
st1 {v16.d}[1], [x0], x1
|
||||
st1 {v16.D}[0], [x0], x1
|
||||
st1 {v16.D}[1], [x0], x1
|
||||
|
||||
zip1 v18.4s, v3.4s, v4.4s
|
||||
zip2 v19.4s, v3.4s, v4.4s
|
||||
zip1 v18.4S, v3.4S, v4.4S
|
||||
zip2 v19.4S, v3.4S, v4.4S
|
||||
|
||||
st1 {v17.d}[0], [x0], x1
|
||||
st1 {v17.d}[1], [x0], x1
|
||||
st1 {v18.d}[0], [x0], x1
|
||||
st1 {v18.d}[1], [x0], x1
|
||||
st1 {v19.d}[0], [x0], x1
|
||||
st1 {v19.d}[1], [x0], x1
|
||||
st1 {v17.D}[0], [x0], x1
|
||||
st1 {v17.D}[1], [x0], x1
|
||||
st1 {v18.D}[0], [x0], x1
|
||||
st1 {v18.D}[1], [x0], x1
|
||||
st1 {v19.D}[0], [x0], x1
|
||||
st1 {v19.D}[1], [x0], x1
|
||||
|
||||
idct_end
|
||||
endfunc
|
||||
@@ -267,59 +267,59 @@ function ff_simple_idct_add_neon, export=1
|
||||
idct_row4_neon v28, v29, v30, v31, 2
|
||||
bl idct_col4_neon1
|
||||
|
||||
sshr v1.8h, v7.8h, #COL_SHIFT-16
|
||||
sshr v2.8h, v16.8h, #COL_SHIFT-16
|
||||
sshr v3.8h, v17.8h, #COL_SHIFT-16
|
||||
sshr v4.8h, v18.8h, #COL_SHIFT-16
|
||||
sshr v1.8H, v7.8H, #COL_SHIFT-16
|
||||
sshr v2.8H, v16.8H, #COL_SHIFT-16
|
||||
sshr v3.8H, v17.8H, #COL_SHIFT-16
|
||||
sshr v4.8H, v18.8H, #COL_SHIFT-16
|
||||
|
||||
bl idct_col4_neon2
|
||||
|
||||
sshr v7.8h, v7.8h, #COL_SHIFT-16
|
||||
sshr v16.8h, v16.8h, #COL_SHIFT-16
|
||||
sshr v17.8h, v17.8h, #COL_SHIFT-16
|
||||
sshr v18.8h, v18.8h, #COL_SHIFT-16
|
||||
sshr v7.8H, v7.8H, #COL_SHIFT-16
|
||||
sshr v16.8H, v16.8H, #COL_SHIFT-16
|
||||
sshr v17.8H, v17.8H, #COL_SHIFT-16
|
||||
sshr v18.8H, v18.8H, #COL_SHIFT-16
|
||||
|
||||
mov x9, x0
|
||||
ld1 {v19.d}[0], [x0], x1
|
||||
zip1 v23.2d, v1.2d, v7.2d
|
||||
zip2 v24.2d, v1.2d, v7.2d
|
||||
ld1 {v19.d}[1], [x0], x1
|
||||
zip1 v25.2d, v2.2d, v16.2d
|
||||
zip2 v26.2d, v2.2d, v16.2d
|
||||
ld1 {v20.d}[0], [x0], x1
|
||||
zip1 v27.2d, v3.2d, v17.2d
|
||||
zip2 v28.2d, v3.2d, v17.2d
|
||||
ld1 {v20.d}[1], [x0], x1
|
||||
zip1 v29.2d, v4.2d, v18.2d
|
||||
zip2 v30.2d, v4.2d, v18.2d
|
||||
ld1 {v21.d}[0], [x0], x1
|
||||
uaddw v23.8h, v23.8h, v19.8b
|
||||
uaddw2 v24.8h, v24.8h, v19.16b
|
||||
ld1 {v21.d}[1], [x0], x1
|
||||
sqxtun v23.8b, v23.8h
|
||||
sqxtun2 v23.16b, v24.8h
|
||||
ld1 {v22.d}[0], [x0], x1
|
||||
uaddw v24.8h, v25.8h, v20.8b
|
||||
uaddw2 v25.8h, v26.8h, v20.16b
|
||||
ld1 {v22.d}[1], [x0], x1
|
||||
sqxtun v24.8b, v24.8h
|
||||
sqxtun2 v24.16b, v25.8h
|
||||
st1 {v23.d}[0], [x9], x1
|
||||
uaddw v25.8h, v27.8h, v21.8b
|
||||
uaddw2 v26.8h, v28.8h, v21.16b
|
||||
st1 {v23.d}[1], [x9], x1
|
||||
sqxtun v25.8b, v25.8h
|
||||
sqxtun2 v25.16b, v26.8h
|
||||
st1 {v24.d}[0], [x9], x1
|
||||
uaddw v26.8h, v29.8h, v22.8b
|
||||
uaddw2 v27.8h, v30.8h, v22.16b
|
||||
st1 {v24.d}[1], [x9], x1
|
||||
sqxtun v26.8b, v26.8h
|
||||
sqxtun2 v26.16b, v27.8h
|
||||
st1 {v25.d}[0], [x9], x1
|
||||
st1 {v25.d}[1], [x9], x1
|
||||
st1 {v26.d}[0], [x9], x1
|
||||
st1 {v26.d}[1], [x9], x1
|
||||
ld1 {v19.D}[0], [x0], x1
|
||||
zip1 v23.2D, v1.2D, v7.2D
|
||||
zip2 v24.2D, v1.2D, v7.2D
|
||||
ld1 {v19.D}[1], [x0], x1
|
||||
zip1 v25.2D, v2.2D, v16.2D
|
||||
zip2 v26.2D, v2.2D, v16.2D
|
||||
ld1 {v20.D}[0], [x0], x1
|
||||
zip1 v27.2D, v3.2D, v17.2D
|
||||
zip2 v28.2D, v3.2D, v17.2D
|
||||
ld1 {v20.D}[1], [x0], x1
|
||||
zip1 v29.2D, v4.2D, v18.2D
|
||||
zip2 v30.2D, v4.2D, v18.2D
|
||||
ld1 {v21.D}[0], [x0], x1
|
||||
uaddw v23.8H, v23.8H, v19.8B
|
||||
uaddw2 v24.8H, v24.8H, v19.16B
|
||||
ld1 {v21.D}[1], [x0], x1
|
||||
sqxtun v23.8B, v23.8H
|
||||
sqxtun2 v23.16B, v24.8H
|
||||
ld1 {v22.D}[0], [x0], x1
|
||||
uaddw v24.8H, v25.8H, v20.8B
|
||||
uaddw2 v25.8H, v26.8H, v20.16B
|
||||
ld1 {v22.D}[1], [x0], x1
|
||||
sqxtun v24.8B, v24.8H
|
||||
sqxtun2 v24.16B, v25.8H
|
||||
st1 {v23.D}[0], [x9], x1
|
||||
uaddw v25.8H, v27.8H, v21.8B
|
||||
uaddw2 v26.8H, v28.8H, v21.16B
|
||||
st1 {v23.D}[1], [x9], x1
|
||||
sqxtun v25.8B, v25.8H
|
||||
sqxtun2 v25.16B, v26.8H
|
||||
st1 {v24.D}[0], [x9], x1
|
||||
uaddw v26.8H, v29.8H, v22.8B
|
||||
uaddw2 v27.8H, v30.8H, v22.16B
|
||||
st1 {v24.D}[1], [x9], x1
|
||||
sqxtun v26.8B, v26.8H
|
||||
sqxtun2 v26.16B, v27.8H
|
||||
st1 {v25.D}[0], [x9], x1
|
||||
st1 {v25.D}[1], [x9], x1
|
||||
st1 {v26.D}[0], [x9], x1
|
||||
st1 {v26.D}[1], [x9], x1
|
||||
|
||||
idct_end
|
||||
endfunc
|
||||
@@ -333,30 +333,30 @@ function ff_simple_idct_neon, export=1
|
||||
sub x2, x2, #128
|
||||
bl idct_col4_neon1
|
||||
|
||||
sshr v1.8h, v7.8h, #COL_SHIFT-16
|
||||
sshr v2.8h, v16.8h, #COL_SHIFT-16
|
||||
sshr v3.8h, v17.8h, #COL_SHIFT-16
|
||||
sshr v4.8h, v18.8h, #COL_SHIFT-16
|
||||
sshr v1.8H, v7.8H, #COL_SHIFT-16
|
||||
sshr v2.8H, v16.8H, #COL_SHIFT-16
|
||||
sshr v3.8H, v17.8H, #COL_SHIFT-16
|
||||
sshr v4.8H, v18.8H, #COL_SHIFT-16
|
||||
|
||||
bl idct_col4_neon2
|
||||
|
||||
sshr v7.8h, v7.8h, #COL_SHIFT-16
|
||||
sshr v16.8h, v16.8h, #COL_SHIFT-16
|
||||
sshr v17.8h, v17.8h, #COL_SHIFT-16
|
||||
sshr v18.8h, v18.8h, #COL_SHIFT-16
|
||||
sshr v7.8H, v7.8H, #COL_SHIFT-16
|
||||
sshr v16.8H, v16.8H, #COL_SHIFT-16
|
||||
sshr v17.8H, v17.8H, #COL_SHIFT-16
|
||||
sshr v18.8H, v18.8H, #COL_SHIFT-16
|
||||
|
||||
zip1 v23.2d, v1.2d, v7.2d
|
||||
zip2 v24.2d, v1.2d, v7.2d
|
||||
st1 {v23.2d,v24.2d}, [x2], #32
|
||||
zip1 v25.2d, v2.2d, v16.2d
|
||||
zip2 v26.2d, v2.2d, v16.2d
|
||||
st1 {v25.2d,v26.2d}, [x2], #32
|
||||
zip1 v27.2d, v3.2d, v17.2d
|
||||
zip2 v28.2d, v3.2d, v17.2d
|
||||
st1 {v27.2d,v28.2d}, [x2], #32
|
||||
zip1 v29.2d, v4.2d, v18.2d
|
||||
zip2 v30.2d, v4.2d, v18.2d
|
||||
st1 {v29.2d,v30.2d}, [x2], #32
|
||||
zip1 v23.2D, v1.2D, v7.2D
|
||||
zip2 v24.2D, v1.2D, v7.2D
|
||||
st1 {v23.2D,v24.2D}, [x2], #32
|
||||
zip1 v25.2D, v2.2D, v16.2D
|
||||
zip2 v26.2D, v2.2D, v16.2D
|
||||
st1 {v25.2D,v26.2D}, [x2], #32
|
||||
zip1 v27.2D, v3.2D, v17.2D
|
||||
zip2 v28.2D, v3.2D, v17.2D
|
||||
st1 {v27.2D,v28.2D}, [x2], #32
|
||||
zip1 v29.2D, v4.2D, v18.2D
|
||||
zip2 v30.2D, v4.2D, v18.2D
|
||||
st1 {v29.2D,v30.2D}, [x2], #32
|
||||
|
||||
idct_end
|
||||
endfunc
|
||||
|
||||
+151
-151
@@ -330,32 +330,32 @@ endfunc
|
||||
// v17: hev
|
||||
|
||||
// convert to signed value:
|
||||
eor v3.16b, v3.16b, v21.16b // PS0 = P0 ^ 0x80
|
||||
eor v4.16b, v4.16b, v21.16b // QS0 = Q0 ^ 0x80
|
||||
eor v3.16b, v3.16b, v21.16b // PS0 = P0 ^ 0x80
|
||||
eor v4.16b, v4.16b, v21.16b // QS0 = Q0 ^ 0x80
|
||||
|
||||
movi v20.8h, #3
|
||||
ssubl v18.8h, v4.8b, v3.8b // QS0 - PS0
|
||||
ssubl2 v19.8h, v4.16b, v3.16b // (widened to 16bit)
|
||||
eor v2.16b, v2.16b, v21.16b // PS1 = P1 ^ 0x80
|
||||
eor v5.16b, v5.16b, v21.16b // QS1 = Q1 ^ 0x80
|
||||
mul v18.8h, v18.8h, v20.8h // w = 3 * (QS0 - PS0)
|
||||
mul v19.8h, v19.8h, v20.8h
|
||||
movi v20.8h, #3
|
||||
ssubl v18.8h, v4.8b, v3.8b // QS0 - PS0
|
||||
ssubl2 v19.8h, v4.16b, v3.16b // (widened to 16bit)
|
||||
eor v2.16b, v2.16b, v21.16b // PS1 = P1 ^ 0x80
|
||||
eor v5.16b, v5.16b, v21.16b // QS1 = Q1 ^ 0x80
|
||||
mul v18.8h, v18.8h, v20.8h // w = 3 * (QS0 - PS0)
|
||||
mul v19.8h, v19.8h, v20.8h
|
||||
|
||||
sqsub v20.16b, v2.16b, v5.16b // clamp(PS1-QS1)
|
||||
movi v22.16b, #4
|
||||
movi v23.16b, #3
|
||||
sqsub v20.16b, v2.16b, v5.16b // clamp(PS1-QS1)
|
||||
movi v22.16b, #4
|
||||
movi v23.16b, #3
|
||||
.if \inner
|
||||
and v20.16b, v20.16b, v17.16b // if(hev) w += clamp(PS1-QS1)
|
||||
and v20.16b, v20.16b, v17.16b // if(hev) w += clamp(PS1-QS1)
|
||||
.endif
|
||||
saddw v18.8h, v18.8h, v20.8b // w += clamp(PS1-QS1)
|
||||
saddw2 v19.8h, v19.8h, v20.16b
|
||||
sqxtn v18.8b, v18.8h // narrow result back into v18
|
||||
sqxtn2 v18.16b, v19.8h
|
||||
saddw v18.8h, v18.8h, v20.8b // w += clamp(PS1-QS1)
|
||||
saddw2 v19.8h, v19.8h, v20.16b
|
||||
sqxtn v18.8b, v18.8h // narrow result back into v18
|
||||
sqxtn2 v18.16b, v19.8h
|
||||
.if !\inner && !\simple
|
||||
eor v1.16b, v1.16b, v21.16b // PS2 = P2 ^ 0x80
|
||||
eor v6.16b, v6.16b, v21.16b // QS2 = Q2 ^ 0x80
|
||||
eor v1.16b, v1.16b, v21.16b // PS2 = P2 ^ 0x80
|
||||
eor v6.16b, v6.16b, v21.16b // QS2 = Q2 ^ 0x80
|
||||
.endif
|
||||
and v18.16b, v18.16b, v16.16b // w &= normal_limit
|
||||
and v18.16b, v18.16b, v16.16b // w &= normal_limit
|
||||
|
||||
// registers used at this point..
|
||||
// v0 -> P3 (don't corrupt)
|
||||
@@ -375,44 +375,44 @@ endfunc
|
||||
// P0 = s2u(PS0 + c2);
|
||||
|
||||
.if \simple
|
||||
sqadd v19.16b, v18.16b, v22.16b // c1 = clamp((w&hev)+4)
|
||||
sqadd v20.16b, v18.16b, v23.16b // c2 = clamp((w&hev)+3)
|
||||
sshr v19.16b, v19.16b, #3 // c1 >>= 3
|
||||
sshr v20.16b, v20.16b, #3 // c2 >>= 3
|
||||
sqsub v4.16b, v4.16b, v19.16b // QS0 = clamp(QS0-c1)
|
||||
sqadd v3.16b, v3.16b, v20.16b // PS0 = clamp(PS0+c2)
|
||||
eor v4.16b, v4.16b, v21.16b // Q0 = QS0 ^ 0x80
|
||||
eor v3.16b, v3.16b, v21.16b // P0 = PS0 ^ 0x80
|
||||
eor v5.16b, v5.16b, v21.16b // Q1 = QS1 ^ 0x80
|
||||
eor v2.16b, v2.16b, v21.16b // P1 = PS1 ^ 0x80
|
||||
sqadd v19.16b, v18.16b, v22.16b // c1 = clamp((w&hev)+4)
|
||||
sqadd v20.16b, v18.16b, v23.16b // c2 = clamp((w&hev)+3)
|
||||
sshr v19.16b, v19.16b, #3 // c1 >>= 3
|
||||
sshr v20.16b, v20.16b, #3 // c2 >>= 3
|
||||
sqsub v4.16b, v4.16b, v19.16b // QS0 = clamp(QS0-c1)
|
||||
sqadd v3.16b, v3.16b, v20.16b // PS0 = clamp(PS0+c2)
|
||||
eor v4.16b, v4.16b, v21.16b // Q0 = QS0 ^ 0x80
|
||||
eor v3.16b, v3.16b, v21.16b // P0 = PS0 ^ 0x80
|
||||
eor v5.16b, v5.16b, v21.16b // Q1 = QS1 ^ 0x80
|
||||
eor v2.16b, v2.16b, v21.16b // P1 = PS1 ^ 0x80
|
||||
.elseif \inner
|
||||
// the !is4tap case of filter_common, only used for inner blocks
|
||||
// c3 = ((c1&~hev) + 1) >> 1;
|
||||
// Q1 = s2u(QS1 - c3);
|
||||
// P1 = s2u(PS1 + c3);
|
||||
sqadd v19.16b, v18.16b, v22.16b // c1 = clamp((w&hev)+4)
|
||||
sqadd v20.16b, v18.16b, v23.16b // c2 = clamp((w&hev)+3)
|
||||
sshr v19.16b, v19.16b, #3 // c1 >>= 3
|
||||
sshr v20.16b, v20.16b, #3 // c2 >>= 3
|
||||
sqsub v4.16b, v4.16b, v19.16b // QS0 = clamp(QS0-c1)
|
||||
sqadd v3.16b, v3.16b, v20.16b // PS0 = clamp(PS0+c2)
|
||||
bic v19.16b, v19.16b, v17.16b // c1 & ~hev
|
||||
eor v4.16b, v4.16b, v21.16b // Q0 = QS0 ^ 0x80
|
||||
srshr v19.16b, v19.16b, #1 // c3 >>= 1
|
||||
eor v3.16b, v3.16b, v21.16b // P0 = PS0 ^ 0x80
|
||||
sqsub v5.16b, v5.16b, v19.16b // QS1 = clamp(QS1-c3)
|
||||
sqadd v2.16b, v2.16b, v19.16b // PS1 = clamp(PS1+c3)
|
||||
eor v5.16b, v5.16b, v21.16b // Q1 = QS1 ^ 0x80
|
||||
eor v2.16b, v2.16b, v21.16b // P1 = PS1 ^ 0x80
|
||||
sqadd v19.16b, v18.16b, v22.16b // c1 = clamp((w&hev)+4)
|
||||
sqadd v20.16b, v18.16b, v23.16b // c2 = clamp((w&hev)+3)
|
||||
sshr v19.16b, v19.16b, #3 // c1 >>= 3
|
||||
sshr v20.16b, v20.16b, #3 // c2 >>= 3
|
||||
sqsub v4.16b, v4.16b, v19.16b // QS0 = clamp(QS0-c1)
|
||||
sqadd v3.16b, v3.16b, v20.16b // PS0 = clamp(PS0+c2)
|
||||
bic v19.16b, v19.16b, v17.16b // c1 & ~hev
|
||||
eor v4.16b, v4.16b, v21.16b // Q0 = QS0 ^ 0x80
|
||||
srshr v19.16b, v19.16b, #1 // c3 >>= 1
|
||||
eor v3.16b, v3.16b, v21.16b // P0 = PS0 ^ 0x80
|
||||
sqsub v5.16b, v5.16b, v19.16b // QS1 = clamp(QS1-c3)
|
||||
sqadd v2.16b, v2.16b, v19.16b // PS1 = clamp(PS1+c3)
|
||||
eor v5.16b, v5.16b, v21.16b // Q1 = QS1 ^ 0x80
|
||||
eor v2.16b, v2.16b, v21.16b // P1 = PS1 ^ 0x80
|
||||
.else
|
||||
and v20.16b, v18.16b, v17.16b // w & hev
|
||||
sqadd v19.16b, v20.16b, v22.16b // c1 = clamp((w&hev)+4)
|
||||
sqadd v20.16b, v20.16b, v23.16b // c2 = clamp((w&hev)+3)
|
||||
sshr v19.16b, v19.16b, #3 // c1 >>= 3
|
||||
sshr v20.16b, v20.16b, #3 // c2 >>= 3
|
||||
bic v18.16b, v18.16b, v17.16b // w &= ~hev
|
||||
sqsub v4.16b, v4.16b, v19.16b // QS0 = clamp(QS0-c1)
|
||||
sqadd v3.16b, v3.16b, v20.16b // PS0 = clamp(PS0+c2)
|
||||
and v20.16b, v18.16b, v17.16b // w & hev
|
||||
sqadd v19.16b, v20.16b, v22.16b // c1 = clamp((w&hev)+4)
|
||||
sqadd v20.16b, v20.16b, v23.16b // c2 = clamp((w&hev)+3)
|
||||
sshr v19.16b, v19.16b, #3 // c1 >>= 3
|
||||
sshr v20.16b, v20.16b, #3 // c2 >>= 3
|
||||
bic v18.16b, v18.16b, v17.16b // w &= ~hev
|
||||
sqsub v4.16b, v4.16b, v19.16b // QS0 = clamp(QS0-c1)
|
||||
sqadd v3.16b, v3.16b, v20.16b // PS0 = clamp(PS0+c2)
|
||||
|
||||
// filter_mbedge:
|
||||
// a = clamp((27*w + 63) >> 7);
|
||||
@@ -424,35 +424,35 @@ endfunc
|
||||
// a = clamp((9*w + 63) >> 7);
|
||||
// Q2 = s2u(QS2 - a);
|
||||
// P2 = s2u(PS2 + a);
|
||||
movi v17.8h, #63
|
||||
sshll v22.8h, v18.8b, #3
|
||||
sshll2 v23.8h, v18.16b, #3
|
||||
saddw v22.8h, v22.8h, v18.8b
|
||||
saddw2 v23.8h, v23.8h, v18.16b
|
||||
add v16.8h, v17.8h, v22.8h
|
||||
add v17.8h, v17.8h, v23.8h // 9*w + 63
|
||||
add v19.8h, v16.8h, v22.8h
|
||||
add v20.8h, v17.8h, v23.8h // 18*w + 63
|
||||
add v22.8h, v19.8h, v22.8h
|
||||
add v23.8h, v20.8h, v23.8h // 27*w + 63
|
||||
sqshrn v16.8b, v16.8h, #7
|
||||
sqshrn2 v16.16b, v17.8h, #7 // clamp(( 9*w + 63)>>7)
|
||||
sqshrn v19.8b, v19.8h, #7
|
||||
sqshrn2 v19.16b, v20.8h, #7 // clamp((18*w + 63)>>7)
|
||||
sqshrn v22.8b, v22.8h, #7
|
||||
sqshrn2 v22.16b, v23.8h, #7 // clamp((27*w + 63)>>7)
|
||||
sqadd v1.16b, v1.16b, v16.16b // PS2 = clamp(PS2+a)
|
||||
sqsub v6.16b, v6.16b, v16.16b // QS2 = clamp(QS2-a)
|
||||
sqadd v2.16b, v2.16b, v19.16b // PS1 = clamp(PS1+a)
|
||||
sqsub v5.16b, v5.16b, v19.16b // QS1 = clamp(QS1-a)
|
||||
sqadd v3.16b, v3.16b, v22.16b // PS0 = clamp(PS0+a)
|
||||
sqsub v4.16b, v4.16b, v22.16b // QS0 = clamp(QS0-a)
|
||||
eor v3.16b, v3.16b, v21.16b // P0 = PS0 ^ 0x80
|
||||
eor v4.16b, v4.16b, v21.16b // Q0 = QS0 ^ 0x80
|
||||
eor v2.16b, v2.16b, v21.16b // P1 = PS1 ^ 0x80
|
||||
eor v5.16b, v5.16b, v21.16b // Q1 = QS1 ^ 0x80
|
||||
eor v1.16b, v1.16b, v21.16b // P2 = PS2 ^ 0x80
|
||||
eor v6.16b, v6.16b, v21.16b // Q2 = QS2 ^ 0x80
|
||||
movi v17.8h, #63
|
||||
sshll v22.8h, v18.8b, #3
|
||||
sshll2 v23.8h, v18.16b, #3
|
||||
saddw v22.8h, v22.8h, v18.8b
|
||||
saddw2 v23.8h, v23.8h, v18.16b
|
||||
add v16.8h, v17.8h, v22.8h
|
||||
add v17.8h, v17.8h, v23.8h // 9*w + 63
|
||||
add v19.8h, v16.8h, v22.8h
|
||||
add v20.8h, v17.8h, v23.8h // 18*w + 63
|
||||
add v22.8h, v19.8h, v22.8h
|
||||
add v23.8h, v20.8h, v23.8h // 27*w + 63
|
||||
sqshrn v16.8b, v16.8h, #7
|
||||
sqshrn2 v16.16b, v17.8h, #7 // clamp(( 9*w + 63)>>7)
|
||||
sqshrn v19.8b, v19.8h, #7
|
||||
sqshrn2 v19.16b, v20.8h, #7 // clamp((18*w + 63)>>7)
|
||||
sqshrn v22.8b, v22.8h, #7
|
||||
sqshrn2 v22.16b, v23.8h, #7 // clamp((27*w + 63)>>7)
|
||||
sqadd v1.16b, v1.16b, v16.16b // PS2 = clamp(PS2+a)
|
||||
sqsub v6.16b, v6.16b, v16.16b // QS2 = clamp(QS2-a)
|
||||
sqadd v2.16b, v2.16b, v19.16b // PS1 = clamp(PS1+a)
|
||||
sqsub v5.16b, v5.16b, v19.16b // QS1 = clamp(QS1-a)
|
||||
sqadd v3.16b, v3.16b, v22.16b // PS0 = clamp(PS0+a)
|
||||
sqsub v4.16b, v4.16b, v22.16b // QS0 = clamp(QS0-a)
|
||||
eor v3.16b, v3.16b, v21.16b // P0 = PS0 ^ 0x80
|
||||
eor v4.16b, v4.16b, v21.16b // Q0 = QS0 ^ 0x80
|
||||
eor v2.16b, v2.16b, v21.16b // P1 = PS1 ^ 0x80
|
||||
eor v5.16b, v5.16b, v21.16b // Q1 = QS1 ^ 0x80
|
||||
eor v1.16b, v1.16b, v21.16b // P2 = PS2 ^ 0x80
|
||||
eor v6.16b, v6.16b, v21.16b // Q2 = QS2 ^ 0x80
|
||||
.endif
|
||||
.endm
|
||||
|
||||
@@ -507,48 +507,48 @@ function ff_vp8_v_loop_filter8uv\name\()_neon, export=1
|
||||
sub x0, x0, x2, lsl #2
|
||||
sub x1, x1, x2, lsl #2
|
||||
// Load pixels:
|
||||
ld1 {v0.d}[0], [x0], x2 // P3
|
||||
ld1 {v0.d}[1], [x1], x2 // P3
|
||||
ld1 {v1.d}[0], [x0], x2 // P2
|
||||
ld1 {v1.d}[1], [x1], x2 // P2
|
||||
ld1 {v2.d}[0], [x0], x2 // P1
|
||||
ld1 {v2.d}[1], [x1], x2 // P1
|
||||
ld1 {v3.d}[0], [x0], x2 // P0
|
||||
ld1 {v3.d}[1], [x1], x2 // P0
|
||||
ld1 {v4.d}[0], [x0], x2 // Q0
|
||||
ld1 {v4.d}[1], [x1], x2 // Q0
|
||||
ld1 {v5.d}[0], [x0], x2 // Q1
|
||||
ld1 {v5.d}[1], [x1], x2 // Q1
|
||||
ld1 {v6.d}[0], [x0], x2 // Q2
|
||||
ld1 {v6.d}[1], [x1], x2 // Q2
|
||||
ld1 {v7.d}[0], [x0] // Q3
|
||||
ld1 {v7.d}[1], [x1] // Q3
|
||||
ld1 {v0.d}[0], [x0], x2 // P3
|
||||
ld1 {v0.d}[1], [x1], x2 // P3
|
||||
ld1 {v1.d}[0], [x0], x2 // P2
|
||||
ld1 {v1.d}[1], [x1], x2 // P2
|
||||
ld1 {v2.d}[0], [x0], x2 // P1
|
||||
ld1 {v2.d}[1], [x1], x2 // P1
|
||||
ld1 {v3.d}[0], [x0], x2 // P0
|
||||
ld1 {v3.d}[1], [x1], x2 // P0
|
||||
ld1 {v4.d}[0], [x0], x2 // Q0
|
||||
ld1 {v4.d}[1], [x1], x2 // Q0
|
||||
ld1 {v5.d}[0], [x0], x2 // Q1
|
||||
ld1 {v5.d}[1], [x1], x2 // Q1
|
||||
ld1 {v6.d}[0], [x0], x2 // Q2
|
||||
ld1 {v6.d}[1], [x1], x2 // Q2
|
||||
ld1 {v7.d}[0], [x0] // Q3
|
||||
ld1 {v7.d}[1], [x1] // Q3
|
||||
|
||||
dup v22.16b, w3 // flim_E
|
||||
dup v23.16b, w4 // flim_I
|
||||
dup v22.16b, w3 // flim_E
|
||||
dup v23.16b, w4 // flim_I
|
||||
|
||||
vp8_loop_filter inner=\inner, hev_thresh=w5
|
||||
|
||||
// back up to P2: u,v -= stride * 6
|
||||
sub x0, x0, x2, lsl #2
|
||||
sub x1, x1, x2, lsl #2
|
||||
sub x0, x0, x2, lsl #1
|
||||
sub x1, x1, x2, lsl #1
|
||||
sub x0, x0, x2, lsl #2
|
||||
sub x1, x1, x2, lsl #2
|
||||
sub x0, x0, x2, lsl #1
|
||||
sub x1, x1, x2, lsl #1
|
||||
|
||||
// Store pixels:
|
||||
|
||||
st1 {v1.d}[0], [x0], x2 // P2
|
||||
st1 {v1.d}[1], [x1], x2 // P2
|
||||
st1 {v2.d}[0], [x0], x2 // P1
|
||||
st1 {v2.d}[1], [x1], x2 // P1
|
||||
st1 {v3.d}[0], [x0], x2 // P0
|
||||
st1 {v3.d}[1], [x1], x2 // P0
|
||||
st1 {v4.d}[0], [x0], x2 // Q0
|
||||
st1 {v4.d}[1], [x1], x2 // Q0
|
||||
st1 {v5.d}[0], [x0], x2 // Q1
|
||||
st1 {v5.d}[1], [x1], x2 // Q1
|
||||
st1 {v6.d}[0], [x0] // Q2
|
||||
st1 {v6.d}[1], [x1] // Q2
|
||||
st1 {v1.d}[0], [x0], x2 // P2
|
||||
st1 {v1.d}[1], [x1], x2 // P2
|
||||
st1 {v2.d}[0], [x0], x2 // P1
|
||||
st1 {v2.d}[1], [x1], x2 // P1
|
||||
st1 {v3.d}[0], [x0], x2 // P0
|
||||
st1 {v3.d}[1], [x1], x2 // P0
|
||||
st1 {v4.d}[0], [x0], x2 // Q0
|
||||
st1 {v4.d}[1], [x1], x2 // Q0
|
||||
st1 {v5.d}[0], [x0], x2 // Q1
|
||||
st1 {v5.d}[1], [x1], x2 // Q1
|
||||
st1 {v6.d}[0], [x0] // Q2
|
||||
st1 {v6.d}[1], [x1] // Q2
|
||||
|
||||
ret
|
||||
endfunc
|
||||
@@ -579,7 +579,7 @@ function ff_vp8_h_loop_filter16\name\()_neon, export=1
|
||||
ld1 {v6.d}[1], [x0], x1
|
||||
ld1 {v7.d}[1], [x0], x1
|
||||
|
||||
transpose_8x16B v0, v1, v2, v3, v4, v5, v6, v7, v30, v31
|
||||
transpose_8x16B v0, v1, v2, v3, v4, v5, v6, v7, v30, v31
|
||||
|
||||
dup v22.16b, w2 // flim_E
|
||||
.if !\simple
|
||||
@@ -590,7 +590,7 @@ function ff_vp8_h_loop_filter16\name\()_neon, export=1
|
||||
|
||||
sub x0, x0, x1, lsl #4 // backup 16 rows
|
||||
|
||||
transpose_8x16B v0, v1, v2, v3, v4, v5, v6, v7, v30, v31
|
||||
transpose_8x16B v0, v1, v2, v3, v4, v5, v6, v7, v30, v31
|
||||
|
||||
// Store pixels:
|
||||
st1 {v0.d}[0], [x0], x1
|
||||
@@ -624,24 +624,24 @@ function ff_vp8_h_loop_filter8uv\name\()_neon, export=1
|
||||
sub x1, x1, #4
|
||||
|
||||
// Load pixels:
|
||||
ld1 {v0.d}[0], [x0], x2 // load u
|
||||
ld1 {v0.d}[1], [x1], x2 // load v
|
||||
ld1 {v1.d}[0], [x0], x2
|
||||
ld1 {v1.d}[1], [x1], x2
|
||||
ld1 {v2.d}[0], [x0], x2
|
||||
ld1 {v2.d}[1], [x1], x2
|
||||
ld1 {v3.d}[0], [x0], x2
|
||||
ld1 {v3.d}[1], [x1], x2
|
||||
ld1 {v4.d}[0], [x0], x2
|
||||
ld1 {v4.d}[1], [x1], x2
|
||||
ld1 {v5.d}[0], [x0], x2
|
||||
ld1 {v5.d}[1], [x1], x2
|
||||
ld1 {v6.d}[0], [x0], x2
|
||||
ld1 {v6.d}[1], [x1], x2
|
||||
ld1 {v7.d}[0], [x0], x2
|
||||
ld1 {v7.d}[1], [x1], x2
|
||||
ld1 {v0.d}[0], [x0], x2 // load u
|
||||
ld1 {v0.d}[1], [x1], x2 // load v
|
||||
ld1 {v1.d}[0], [x0], x2
|
||||
ld1 {v1.d}[1], [x1], x2
|
||||
ld1 {v2.d}[0], [x0], x2
|
||||
ld1 {v2.d}[1], [x1], x2
|
||||
ld1 {v3.d}[0], [x0], x2
|
||||
ld1 {v3.d}[1], [x1], x2
|
||||
ld1 {v4.d}[0], [x0], x2
|
||||
ld1 {v4.d}[1], [x1], x2
|
||||
ld1 {v5.d}[0], [x0], x2
|
||||
ld1 {v5.d}[1], [x1], x2
|
||||
ld1 {v6.d}[0], [x0], x2
|
||||
ld1 {v6.d}[1], [x1], x2
|
||||
ld1 {v7.d}[0], [x0], x2
|
||||
ld1 {v7.d}[1], [x1], x2
|
||||
|
||||
transpose_8x16B v0, v1, v2, v3, v4, v5, v6, v7, v30, v31
|
||||
transpose_8x16B v0, v1, v2, v3, v4, v5, v6, v7, v30, v31
|
||||
|
||||
dup v22.16b, w3 // flim_E
|
||||
dup v23.16b, w4 // flim_I
|
||||
@@ -651,25 +651,25 @@ function ff_vp8_h_loop_filter8uv\name\()_neon, export=1
|
||||
sub x0, x0, x2, lsl #3 // backup u 8 rows
|
||||
sub x1, x1, x2, lsl #3 // backup v 8 rows
|
||||
|
||||
transpose_8x16B v0, v1, v2, v3, v4, v5, v6, v7, v30, v31
|
||||
transpose_8x16B v0, v1, v2, v3, v4, v5, v6, v7, v30, v31
|
||||
|
||||
// Store pixels:
|
||||
st1 {v0.d}[0], [x0], x2 // load u
|
||||
st1 {v0.d}[1], [x1], x2 // load v
|
||||
st1 {v1.d}[0], [x0], x2
|
||||
st1 {v1.d}[1], [x1], x2
|
||||
st1 {v2.d}[0], [x0], x2
|
||||
st1 {v2.d}[1], [x1], x2
|
||||
st1 {v3.d}[0], [x0], x2
|
||||
st1 {v3.d}[1], [x1], x2
|
||||
st1 {v4.d}[0], [x0], x2
|
||||
st1 {v4.d}[1], [x1], x2
|
||||
st1 {v5.d}[0], [x0], x2
|
||||
st1 {v5.d}[1], [x1], x2
|
||||
st1 {v6.d}[0], [x0], x2
|
||||
st1 {v6.d}[1], [x1], x2
|
||||
st1 {v7.d}[0], [x0]
|
||||
st1 {v7.d}[1], [x1]
|
||||
st1 {v0.d}[0], [x0], x2 // load u
|
||||
st1 {v0.d}[1], [x1], x2 // load v
|
||||
st1 {v1.d}[0], [x0], x2
|
||||
st1 {v1.d}[1], [x1], x2
|
||||
st1 {v2.d}[0], [x0], x2
|
||||
st1 {v2.d}[1], [x1], x2
|
||||
st1 {v3.d}[0], [x0], x2
|
||||
st1 {v3.d}[1], [x1], x2
|
||||
st1 {v4.d}[0], [x0], x2
|
||||
st1 {v4.d}[1], [x1], x2
|
||||
st1 {v5.d}[0], [x0], x2
|
||||
st1 {v5.d}[1], [x1], x2
|
||||
st1 {v6.d}[0], [x0], x2
|
||||
st1 {v6.d}[1], [x1], x2
|
||||
st1 {v7.d}[0], [x0]
|
||||
st1 {v7.d}[1], [x1]
|
||||
|
||||
ret
|
||||
|
||||
|
||||
@@ -230,9 +230,6 @@ function \type\()_8tap_\size\()h_\idx1\idx2
|
||||
// reduced dst stride
|
||||
.if \size >= 16
|
||||
sub x1, x1, x5
|
||||
.elseif \size == 4
|
||||
add x12, x2, #8
|
||||
add x13, x7, #8
|
||||
.endif
|
||||
// size >= 16 loads two qwords and increments x2,
|
||||
// for size 4/8 it's enough with one qword and no
|
||||
@@ -251,14 +248,9 @@ function \type\()_8tap_\size\()h_\idx1\idx2
|
||||
.if \size >= 16
|
||||
ld1 {v4.8b, v5.8b, v6.8b}, [x2], #24
|
||||
ld1 {v16.8b, v17.8b, v18.8b}, [x7], #24
|
||||
.elseif \size == 8
|
||||
.else
|
||||
ld1 {v4.8b, v5.8b}, [x2]
|
||||
ld1 {v16.8b, v17.8b}, [x7]
|
||||
.else // \size == 4
|
||||
ld1 {v4.8b}, [x2]
|
||||
ld1 {v16.8b}, [x7]
|
||||
ld1 {v5.s}[0], [x12], x3
|
||||
ld1 {v17.s}[0], [x13], x3
|
||||
.endif
|
||||
uxtl v4.8h, v4.8b
|
||||
uxtl v5.8h, v5.8b
|
||||
|
||||
+17
-17
@@ -104,26 +104,26 @@ static int aasc_decode_frame(AVCodecContext *avctx,
|
||||
ff_msrle_decode(avctx, s->frame, 8, &s->gb);
|
||||
break;
|
||||
case MKTAG('A', 'A', 'S', 'C'):
|
||||
switch (compr) {
|
||||
case 0:
|
||||
stride = (avctx->width * psize + psize) & ~psize;
|
||||
if (buf_size < stride * avctx->height)
|
||||
return AVERROR_INVALIDDATA;
|
||||
for (i = avctx->height - 1; i >= 0; i--) {
|
||||
memcpy(s->frame->data[0] + i * s->frame->linesize[0], buf, avctx->width * psize);
|
||||
buf += stride;
|
||||
buf_size -= stride;
|
||||
}
|
||||
break;
|
||||
case 1:
|
||||
bytestream2_init(&s->gb, buf, buf_size);
|
||||
ff_msrle_decode(avctx, s->frame, 8, &s->gb);
|
||||
break;
|
||||
default:
|
||||
av_log(avctx, AV_LOG_ERROR, "Unknown compression type %d\n", compr);
|
||||
switch (compr) {
|
||||
case 0:
|
||||
stride = (avctx->width * psize + psize) & ~psize;
|
||||
if (buf_size < stride * avctx->height)
|
||||
return AVERROR_INVALIDDATA;
|
||||
for (i = avctx->height - 1; i >= 0; i--) {
|
||||
memcpy(s->frame->data[0] + i * s->frame->linesize[0], buf, avctx->width * psize);
|
||||
buf += stride;
|
||||
buf_size -= stride;
|
||||
}
|
||||
break;
|
||||
case 1:
|
||||
bytestream2_init(&s->gb, buf, buf_size);
|
||||
ff_msrle_decode(avctx, s->frame, 8, &s->gb);
|
||||
break;
|
||||
default:
|
||||
av_log(avctx, AV_LOG_ERROR, "Unknown compression type %d\n", compr);
|
||||
return AVERROR_INVALIDDATA;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
av_log(avctx, AV_LOG_ERROR, "Unknown FourCC: %X\n", avctx->codec_tag);
|
||||
return -1;
|
||||
|
||||
+1
-2
@@ -27,6 +27,7 @@
|
||||
#ifndef AVCODEC_AC3_H
|
||||
#define AVCODEC_AC3_H
|
||||
|
||||
#define AC3_MAX_CODED_FRAME_SIZE 3840 /* in bytes */
|
||||
#define EAC3_MAX_CHANNELS 16 /**< maximum number of channels in EAC3 */
|
||||
#define AC3_MAX_CHANNELS 7 /**< maximum number of channels, including coupling channel */
|
||||
#define CPL_CH 0 /**< coupling channel index */
|
||||
@@ -74,7 +75,6 @@
|
||||
#define AC3_DYNAMIC_RANGE1 0
|
||||
|
||||
typedef int INTFLOAT;
|
||||
typedef unsigned int UINTFLOAT;
|
||||
typedef int16_t SHORTFLOAT;
|
||||
|
||||
#else /* USE_FIXED */
|
||||
@@ -94,7 +94,6 @@ typedef int16_t SHORTFLOAT;
|
||||
#define AC3_DYNAMIC_RANGE1 1.0f
|
||||
|
||||
typedef float INTFLOAT;
|
||||
typedef float UINTFLOAT;
|
||||
typedef float SHORTFLOAT;
|
||||
|
||||
#endif /* USE_FIXED */
|
||||
|
||||
@@ -179,9 +179,7 @@ int av_ac3_parse_header(const uint8_t *buf, size_t size,
|
||||
AC3HeaderInfo hdr;
|
||||
int err;
|
||||
|
||||
err = init_get_bits8(&gb, buf, size);
|
||||
if (err < 0)
|
||||
return AVERROR_INVALIDDATA;
|
||||
init_get_bits8(&gb, buf, size);
|
||||
err = ff_ac3_parse_header(&gb, &hdr);
|
||||
if (err < 0)
|
||||
return AVERROR_INVALIDDATA;
|
||||
|
||||
+1
-1
@@ -1729,7 +1729,7 @@ static void ac3_output_frame(AC3EncodeContext *s, unsigned char *frame)
|
||||
{
|
||||
int blk;
|
||||
|
||||
init_put_bits(&s->pb, frame, s->frame_size);
|
||||
init_put_bits(&s->pb, frame, AC3_MAX_CODED_FRAME_SIZE);
|
||||
|
||||
s->output_frame_header(s);
|
||||
|
||||
|
||||
+14
-34
@@ -100,7 +100,7 @@ static const int8_t mtf_index_table[16] = {
|
||||
typedef struct ADPCMDecodeContext {
|
||||
ADPCMChannelStatus status[14];
|
||||
int vqa_version; /**< VQA version. Used for ADPCM_IMA_WS */
|
||||
int has_status; /**< Status flag. Reset to 0 after a flush. */
|
||||
int has_status;
|
||||
} ADPCMDecodeContext;
|
||||
|
||||
static av_cold int adpcm_decode_init(AVCodecContext * avctx)
|
||||
@@ -735,8 +735,6 @@ static int get_nb_samples(AVCodecContext *avctx, GetByteContext *gb,
|
||||
|
||||
if(ch <= 0)
|
||||
return 0;
|
||||
if (buf_size > INT_MAX / 2)
|
||||
return 0;
|
||||
|
||||
switch (avctx->codec->id) {
|
||||
/* constant, only check buf_size */
|
||||
@@ -1813,6 +1811,11 @@ static int adpcm_decode_frame(AVCodecContext *avctx, void *data,
|
||||
}
|
||||
break;
|
||||
case AV_CODEC_ID_ADPCM_AICA:
|
||||
if (!c->has_status) {
|
||||
for (channel = 0; channel < avctx->channels; channel++)
|
||||
c->status[channel].step = 0;
|
||||
c->has_status = 1;
|
||||
}
|
||||
for (channel = 0; channel < avctx->channels; channel++) {
|
||||
samples = samples_p[channel];
|
||||
for (n = nb_samples >> 1; n > 0; n--) {
|
||||
@@ -2074,6 +2077,13 @@ static int adpcm_decode_frame(AVCodecContext *avctx, void *data,
|
||||
}
|
||||
break;
|
||||
case AV_CODEC_ID_ADPCM_ZORK:
|
||||
if (!c->has_status) {
|
||||
for (channel = 0; channel < avctx->channels; channel++) {
|
||||
c->status[channel].predictor = 0;
|
||||
c->status[channel].step_index = 0;
|
||||
}
|
||||
c->has_status = 1;
|
||||
}
|
||||
for (n = 0; n < nb_samples * avctx->channels; n++) {
|
||||
int v = bytestream2_get_byteu(&gb);
|
||||
*samples++ = adpcm_zork_expand_nibble(&c->status[n % avctx->channels], v);
|
||||
@@ -2111,37 +2121,7 @@ static int adpcm_decode_frame(AVCodecContext *avctx, void *data,
|
||||
static void adpcm_flush(AVCodecContext *avctx)
|
||||
{
|
||||
ADPCMDecodeContext *c = avctx->priv_data;
|
||||
|
||||
switch(avctx->codec_id) {
|
||||
case AV_CODEC_ID_ADPCM_AICA:
|
||||
for (int channel = 0; channel < avctx->channels; channel++)
|
||||
c->status[channel].step = 0;
|
||||
break;
|
||||
|
||||
case AV_CODEC_ID_ADPCM_ARGO:
|
||||
for (int channel = 0; channel < avctx->channels; channel++) {
|
||||
c->status[channel].sample1 = 0;
|
||||
c->status[channel].sample2 = 0;
|
||||
}
|
||||
break;
|
||||
|
||||
case AV_CODEC_ID_ADPCM_IMA_ALP:
|
||||
case AV_CODEC_ID_ADPCM_IMA_CUNNING:
|
||||
case AV_CODEC_ID_ADPCM_IMA_SSI:
|
||||
case AV_CODEC_ID_ADPCM_ZORK:
|
||||
for (int channel = 0; channel < avctx->channels; channel++) {
|
||||
c->status[channel].predictor = 0;
|
||||
c->status[channel].step_index = 0;
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
/* Other codecs may want to handle this during decoding. */
|
||||
c->has_status = 0;
|
||||
return;
|
||||
}
|
||||
|
||||
c->has_status = 1;
|
||||
c->has_status = 0;
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -959,14 +959,14 @@ static const AVOption options[] = {
|
||||
{ NULL }
|
||||
};
|
||||
|
||||
static const AVClass adpcm_encoder_class = {
|
||||
.class_name = "ADPCM Encoder",
|
||||
.item_name = av_default_item_name,
|
||||
.option = options,
|
||||
.version = LIBAVUTIL_VERSION_INT,
|
||||
};
|
||||
|
||||
#define ADPCM_ENCODER(id_, name_, sample_fmts_, capabilities_, long_name_) \
|
||||
static const AVClass name_ ## _encoder_class = { \
|
||||
.class_name = #name_, \
|
||||
.item_name = av_default_item_name, \
|
||||
.option = options, \
|
||||
.version = LIBAVUTIL_VERSION_INT, \
|
||||
}; \
|
||||
\
|
||||
AVCodec ff_ ## name_ ## _encoder = { \
|
||||
.name = #name_, \
|
||||
.long_name = NULL_IF_CONFIG_SMALL(long_name_), \
|
||||
@@ -979,7 +979,7 @@ AVCodec ff_ ## name_ ## _encoder = { \
|
||||
.sample_fmts = sample_fmts_, \
|
||||
.capabilities = capabilities_, \
|
||||
.caps_internal = FF_CODEC_CAP_INIT_CLEANUP | FF_CODEC_CAP_INIT_THREADSAFE, \
|
||||
.priv_class = &name_ ## _encoder_class, \
|
||||
.priv_class = &adpcm_encoder_class, \
|
||||
}
|
||||
|
||||
ADPCM_ENCODER(AV_CODEC_ID_ADPCM_ARGO, adpcm_argo, sample_fmts_p, 0, "ADPCM Argonaut Games");
|
||||
|
||||
+2
-1
@@ -472,7 +472,8 @@ static av_cold int aic_decode_init(AVCodecContext *avctx)
|
||||
}
|
||||
}
|
||||
|
||||
ctx->slice_data = av_calloc(ctx->slice_width, AIC_BAND_COEFFS * sizeof(*ctx->slice_data));
|
||||
ctx->slice_data = av_malloc_array(ctx->slice_width, AIC_BAND_COEFFS
|
||||
* sizeof(*ctx->slice_data));
|
||||
if (!ctx->slice_data) {
|
||||
av_log(avctx, AV_LOG_ERROR, "Error allocating slice buffer\n");
|
||||
|
||||
|
||||
@@ -29,12 +29,12 @@ static void decorrelate_stereo(int32_t *buffer[2], int nb_samples,
|
||||
int i;
|
||||
|
||||
for (i = 0; i < nb_samples; i++) {
|
||||
uint32_t a, b;
|
||||
int32_t a, b;
|
||||
|
||||
a = buffer[0][i];
|
||||
b = buffer[1][i];
|
||||
|
||||
a -= (int)(b * decorr_left_weight) >> decorr_shift;
|
||||
a -= (b * decorr_left_weight) >> decorr_shift;
|
||||
b += a;
|
||||
|
||||
buffer[0][i] = b;
|
||||
|
||||
+8
-20
@@ -1017,7 +1017,7 @@ static int read_block(ALSDecContext *ctx, ALSBlockData *bd)
|
||||
|
||||
*bd->shift_lsbs = 0;
|
||||
|
||||
if (get_bits_left(gb) < 7)
|
||||
if (get_bits_left(gb) < 1)
|
||||
return AVERROR_INVALIDDATA;
|
||||
|
||||
// read block type flag and read the samples accordingly
|
||||
@@ -1529,12 +1529,8 @@ static int read_diff_float_data(ALSDecContext *ctx, unsigned int ra_frame) {
|
||||
return AVERROR_INVALIDDATA;
|
||||
}
|
||||
|
||||
j = 0;
|
||||
for (i = 0; i < frame_length; ++i) {
|
||||
if (ctx->raw_samples[c][i] == 0) {
|
||||
ctx->raw_mantissa[c][i] = AV_RB32(larray + j);
|
||||
j += 4;
|
||||
}
|
||||
ctx->raw_mantissa[c][i] = AV_RB32(larray);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1545,10 +1541,7 @@ static int read_diff_float_data(ALSDecContext *ctx, unsigned int ra_frame) {
|
||||
if (ctx->raw_samples[c][i] != 0) {
|
||||
//The following logic is taken from Tabel 14.45 and 14.46 from the ISO spec
|
||||
if (av_cmp_sf_ieee754(acf[c], FLOAT_1)) {
|
||||
int nbit = av_log2(FFABSU(ctx->raw_samples[c][i]));
|
||||
if (nbit > 23)
|
||||
return AVERROR_INVALIDDATA;
|
||||
nbits[i] = 23 - nbit;
|
||||
nbits[i] = 23 - av_log2(abs(ctx->raw_samples[c][i]));
|
||||
} else {
|
||||
nbits[i] = 23;
|
||||
}
|
||||
@@ -1622,7 +1615,7 @@ static int read_diff_float_data(ALSDecContext *ctx, unsigned int ra_frame) {
|
||||
tmp_32 = (sign << 31) | ((e + EXP_BIAS) << 23) | (mantissa);
|
||||
ctx->raw_samples[c][i] = tmp_32;
|
||||
} else {
|
||||
ctx->raw_samples[c][i] = raw_mantissa[c][i];
|
||||
ctx->raw_samples[c][i] = raw_mantissa[c][i] & 0x007fffffUL;
|
||||
}
|
||||
}
|
||||
align_get_bits(gb);
|
||||
@@ -1639,7 +1632,7 @@ static int read_frame_data(ALSDecContext *ctx, unsigned int ra_frame)
|
||||
AVCodecContext *avctx = ctx->avctx;
|
||||
GetBitContext *gb = &ctx->gb;
|
||||
unsigned int div_blocks[32]; ///< block sizes.
|
||||
int c;
|
||||
unsigned int c;
|
||||
unsigned int js_blocks[2];
|
||||
uint32_t bs_info = 0;
|
||||
int ret;
|
||||
@@ -1776,9 +1769,7 @@ static int read_frame_data(ALSDecContext *ctx, unsigned int ra_frame)
|
||||
}
|
||||
|
||||
if (sconf->floating) {
|
||||
ret = read_diff_float_data(ctx, ra_frame);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
read_diff_float_data(ctx, ra_frame);
|
||||
}
|
||||
|
||||
if (get_bits_left(gb) < 0) {
|
||||
@@ -1819,17 +1810,14 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame_ptr,
|
||||
else
|
||||
ctx->cur_frame_length = sconf->frame_length;
|
||||
|
||||
ctx->highest_decoded_channel = -1;
|
||||
ctx->highest_decoded_channel = 0;
|
||||
// decode the frame data
|
||||
if ((invalid_frame = read_frame_data(ctx, ra_frame)) < 0)
|
||||
av_log(ctx->avctx, AV_LOG_WARNING,
|
||||
"Reading frame data failed. Skipping RA unit.\n");
|
||||
|
||||
if (ctx->highest_decoded_channel == -1) {
|
||||
av_log(ctx->avctx, AV_LOG_WARNING,
|
||||
"No channel data decoded.\n");
|
||||
if (ctx->highest_decoded_channel == 0)
|
||||
return AVERROR_INVALIDDATA;
|
||||
}
|
||||
|
||||
ctx->frame_id++;
|
||||
|
||||
|
||||
+24
-44
@@ -102,7 +102,7 @@ typedef struct APEFilter {
|
||||
int16_t *historybuffer; ///< filter memory
|
||||
int16_t *delay; ///< filtered values
|
||||
|
||||
uint32_t avg;
|
||||
int avg;
|
||||
} APEFilter;
|
||||
|
||||
typedef struct APERice {
|
||||
@@ -879,7 +879,7 @@ static av_always_inline int filter_fast_3320(APEPredictor *p,
|
||||
}
|
||||
|
||||
predictionA = p->buf[delayA] * 2U - p->buf[delayA - 1];
|
||||
p->lastA[filter] = decoded + (unsigned)((int32_t)(predictionA * p->coeffsA[filter][0]) >> 9);
|
||||
p->lastA[filter] = decoded + ((int32_t)(predictionA * p->coeffsA[filter][0]) >> 9);
|
||||
|
||||
if ((decoded ^ predictionA) > 0)
|
||||
p->coeffsA[filter][0]++;
|
||||
@@ -909,8 +909,8 @@ static av_always_inline int filter_3800(APEPredictor *p,
|
||||
return predictionA;
|
||||
}
|
||||
d2 = p->buf[delayA];
|
||||
d1 = (p->buf[delayA] - (unsigned)p->buf[delayA - 1]) * 2;
|
||||
d0 = p->buf[delayA] + ((p->buf[delayA - 2] - (unsigned)p->buf[delayA - 1]) * 8);
|
||||
d1 = (p->buf[delayA] - p->buf[delayA - 1]) * 2U;
|
||||
d0 = p->buf[delayA] + ((p->buf[delayA - 2] - p->buf[delayA - 1]) * 8U);
|
||||
d3 = p->buf[delayB] * 2U - p->buf[delayB - 1];
|
||||
d4 = p->buf[delayB];
|
||||
|
||||
@@ -930,7 +930,7 @@ static av_always_inline int filter_3800(APEPredictor *p,
|
||||
p->coeffsB[filter][0] += (((d3 >> 29) & 4) - 2) * sign;
|
||||
p->coeffsB[filter][1] -= (((d4 >> 30) & 2) - 1) * sign;
|
||||
|
||||
p->filterB[filter] = p->lastA[filter] + (unsigned)(predictionB >> shift);
|
||||
p->filterB[filter] = p->lastA[filter] + (predictionB >> shift);
|
||||
p->filterA[filter] = p->filterB[filter] + (unsigned)((int)(p->filterA[filter] * 31U) >> 5);
|
||||
|
||||
return p->filterA[filter];
|
||||
@@ -955,7 +955,7 @@ static void long_filter_high_3800(int32_t *buffer, int order, int shift, int len
|
||||
dotprod += delay[j] * (unsigned)coeffs[j];
|
||||
coeffs[j] += ((delay[j] >> 31) | 1) * sign;
|
||||
}
|
||||
buffer[i] -= (unsigned)(dotprod >> shift);
|
||||
buffer[i] -= dotprod >> shift;
|
||||
for (j = 0; j < order - 1; j++)
|
||||
delay[j] = delay[j + 1];
|
||||
delay[order - 1] = buffer[i];
|
||||
@@ -979,7 +979,7 @@ static void long_filter_ehigh_3830(int32_t *buffer, int length)
|
||||
for (j = 7; j > 0; j--)
|
||||
delay[j] = delay[j - 1];
|
||||
delay[0] = buffer[i];
|
||||
buffer[i] -= (unsigned)(dotprod >> 9);
|
||||
buffer[i] -= dotprod >> 9;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1088,13 +1088,13 @@ static av_always_inline int predictor_update_3930(APEPredictor *p,
|
||||
const int delayA)
|
||||
{
|
||||
int32_t predictionA, sign;
|
||||
uint32_t d0, d1, d2, d3;
|
||||
int32_t d0, d1, d2, d3;
|
||||
|
||||
p->buf[delayA] = p->lastA[filter];
|
||||
d0 = p->buf[delayA ];
|
||||
d1 = p->buf[delayA ] - (unsigned)p->buf[delayA - 1];
|
||||
d2 = p->buf[delayA - 1] - (unsigned)p->buf[delayA - 2];
|
||||
d3 = p->buf[delayA - 2] - (unsigned)p->buf[delayA - 3];
|
||||
d1 = p->buf[delayA ] - p->buf[delayA - 1];
|
||||
d2 = p->buf[delayA - 1] - p->buf[delayA - 2];
|
||||
d3 = p->buf[delayA - 2] - p->buf[delayA - 3];
|
||||
|
||||
predictionA = d0 * p->coeffsA[filter][0] +
|
||||
d1 * p->coeffsA[filter][1] +
|
||||
@@ -1105,10 +1105,10 @@ static av_always_inline int predictor_update_3930(APEPredictor *p,
|
||||
p->filterA[filter] = p->lastA[filter] + ((int)(p->filterA[filter] * 31U) >> 5);
|
||||
|
||||
sign = APESIGN(decoded);
|
||||
p->coeffsA[filter][0] += (((int32_t)d0 < 0) * 2 - 1) * sign;
|
||||
p->coeffsA[filter][1] += (((int32_t)d1 < 0) * 2 - 1) * sign;
|
||||
p->coeffsA[filter][2] += (((int32_t)d2 < 0) * 2 - 1) * sign;
|
||||
p->coeffsA[filter][3] += (((int32_t)d3 < 0) * 2 - 1) * sign;
|
||||
p->coeffsA[filter][0] += ((d0 < 0) * 2 - 1) * sign;
|
||||
p->coeffsA[filter][1] += ((d1 < 0) * 2 - 1) * sign;
|
||||
p->coeffsA[filter][2] += ((d2 < 0) * 2 - 1) * sign;
|
||||
p->coeffsA[filter][3] += ((d3 < 0) * 2 - 1) * sign;
|
||||
|
||||
return p->filterA[filter];
|
||||
}
|
||||
@@ -1166,8 +1166,7 @@ static void predictor_decode_mono_3930(APEContext *ctx, int count)
|
||||
static av_always_inline int predictor_update_filter(APEPredictor64 *p,
|
||||
const int decoded, const int filter,
|
||||
const int delayA, const int delayB,
|
||||
const int adaptA, const int adaptB,
|
||||
int compression_level)
|
||||
const int adaptA, const int adaptB)
|
||||
{
|
||||
int64_t predictionA, predictionB;
|
||||
int32_t sign;
|
||||
@@ -1195,13 +1194,7 @@ static av_always_inline int predictor_update_filter(APEPredictor64 *p,
|
||||
p->buf[delayB - 3] * p->coeffsB[filter][3] +
|
||||
p->buf[delayB - 4] * p->coeffsB[filter][4];
|
||||
|
||||
if (compression_level < COMPRESSION_LEVEL_INSANE) {
|
||||
predictionA = (int32_t)predictionA;
|
||||
predictionB = (int32_t)predictionB;
|
||||
p->lastA[filter] = (int32_t)(decoded + (unsigned)((int32_t)(predictionA + (predictionB >> 1)) >> 10));
|
||||
} else {
|
||||
p->lastA[filter] = decoded + ((int64_t)((uint64_t)predictionA + (predictionB >> 1)) >> 10);
|
||||
}
|
||||
p->lastA[filter] = decoded + ((int64_t)((uint64_t)predictionA + (predictionB >> 1)) >> 10);
|
||||
p->filterA[filter] = p->lastA[filter] + ((int64_t)(p->filterA[filter] * 31ULL) >> 5);
|
||||
|
||||
sign = APESIGN(decoded);
|
||||
@@ -1229,12 +1222,10 @@ static void predictor_decode_stereo_3950(APEContext *ctx, int count)
|
||||
while (count--) {
|
||||
/* Predictor Y */
|
||||
*decoded0 = predictor_update_filter(p, *decoded0, 0, YDELAYA, YDELAYB,
|
||||
YADAPTCOEFFSA, YADAPTCOEFFSB,
|
||||
ctx->compression_level);
|
||||
YADAPTCOEFFSA, YADAPTCOEFFSB);
|
||||
decoded0++;
|
||||
*decoded1 = predictor_update_filter(p, *decoded1, 1, XDELAYA, XDELAYB,
|
||||
XADAPTCOEFFSA, XADAPTCOEFFSB,
|
||||
ctx->compression_level);
|
||||
XADAPTCOEFFSA, XADAPTCOEFFSB);
|
||||
decoded1++;
|
||||
|
||||
/* Combined */
|
||||
@@ -1346,7 +1337,7 @@ static void do_apply_filter(APEContext *ctx, int version, APEFilter *f,
|
||||
absres = FFABSU(res);
|
||||
if (absres)
|
||||
*f->adaptcoeffs = APESIGN(res) *
|
||||
(8 << ((absres > f->avg * 3LL) + (absres > (f->avg + f->avg / 3))));
|
||||
(8 << ((absres > f->avg * 3) + (absres > f->avg * 4 / 3)));
|
||||
/* equivalent to the following code
|
||||
if (absres <= f->avg * 4 / 3)
|
||||
*f->adaptcoeffs = APESIGN(res) * 8;
|
||||
@@ -1596,7 +1587,7 @@ static int ape_decode_frame(AVCodecContext *avctx, void *data,
|
||||
for (ch = 0; ch < s->channels; ch++) {
|
||||
sample8 = (uint8_t *)frame->data[ch];
|
||||
for (i = 0; i < blockstodecode; i++)
|
||||
*sample8++ = (s->decoded[ch][i] + 0x80U) & 0xff;
|
||||
*sample8++ = (s->decoded[ch][i] + 0x80) & 0xff;
|
||||
}
|
||||
break;
|
||||
case 16:
|
||||
@@ -1618,24 +1609,13 @@ static int ape_decode_frame(AVCodecContext *avctx, void *data,
|
||||
s->samples -= blockstodecode;
|
||||
|
||||
if (avctx->err_recognition & AV_EF_CRCCHECK &&
|
||||
s->fileversion >= 3900) {
|
||||
s->fileversion >= 3900 && s->bps < 24) {
|
||||
uint32_t crc = s->CRC_state;
|
||||
const AVCRC *crc_tab = av_crc_get_table(AV_CRC_32_IEEE_LE);
|
||||
int stride = s->bps == 24 ? 4 : (s->bps>>3);
|
||||
int offset = s->bps == 24;
|
||||
int bytes = s->bps >> 3;
|
||||
|
||||
for (i = 0; i < blockstodecode; i++) {
|
||||
for (ch = 0; ch < s->channels; ch++) {
|
||||
#if HAVE_BIGENDIAN
|
||||
uint8_t *smp_native = frame->data[ch] + i*stride;
|
||||
uint8_t smp[4];
|
||||
for(int j = 0; j<stride; j++)
|
||||
smp[j] = smp_native[stride-j-1];
|
||||
#else
|
||||
uint8_t *smp = frame->data[ch] + i*stride;
|
||||
#endif
|
||||
crc = av_crc(crc_tab, crc, smp+offset, bytes);
|
||||
uint8_t *smp = frame->data[ch] + (i*(s->bps >> 3));
|
||||
crc = av_crc(crc_tab, crc, smp, s->bps >> 3);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
+1
-9
@@ -59,7 +59,7 @@ static int decode_pal8(AVCodecContext *avctx, uint32_t *pal)
|
||||
return AVERROR_INVALIDDATA;
|
||||
|
||||
for (int i = 0; i < count; i++)
|
||||
pal[start + i] = (0xFFU << 24) | bytestream2_get_be24u(gb);
|
||||
pal[start + i] = (0xFF << 24U) | bytestream2_get_be24u(gb);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -608,9 +608,6 @@ static int decode_frame(AVCodecContext *avctx, void *data,
|
||||
uint32_t chunk;
|
||||
int ret;
|
||||
|
||||
if (avpkt->size < 4)
|
||||
return AVERROR_INVALIDDATA;
|
||||
|
||||
bytestream2_init(gb, avpkt->data, avpkt->size);
|
||||
|
||||
if ((ret = ff_reget_buffer(avctx, frame, 0)) < 0)
|
||||
@@ -688,11 +685,6 @@ static av_cold int decode_init(AVCodecContext *avctx)
|
||||
return AVERROR_PATCHWELCOME;
|
||||
}
|
||||
|
||||
if (avctx->width % 2 || avctx->height % 2) {
|
||||
avpriv_request_sample(s, "Odd dimensions\n");
|
||||
return AVERROR_PATCHWELCOME;
|
||||
}
|
||||
|
||||
s->frame = av_frame_alloc();
|
||||
if (!s->frame)
|
||||
return AVERROR(ENOMEM);
|
||||
|
||||
@@ -48,3 +48,4 @@ function ff_scalarproduct_int16_neon, export=1
|
||||
vmov.32 r0, d3[0]
|
||||
bx lr
|
||||
endfunc
|
||||
|
||||
|
||||
@@ -229,7 +229,7 @@ A .endif
|
||||
.endif
|
||||
|
||||
// Begin loop
|
||||
1:
|
||||
01:
|
||||
.if TOTAL_TAPS == 0
|
||||
// Things simplify a lot in this case
|
||||
// In fact this could be pipelined further if it's worth it...
|
||||
@@ -241,7 +241,7 @@ A .endif
|
||||
str ST0, [PST, #-4]!
|
||||
str ST0, [PST, #4 * (MAX_BLOCKSIZE + MAX_FIR_ORDER)]
|
||||
str ST0, [PSAMP], #4 * MAX_CHANNELS
|
||||
bne 1b
|
||||
bne 01b
|
||||
.else
|
||||
.if \fir_taps & 1
|
||||
.set LOAD_REG, 1
|
||||
@@ -333,7 +333,7 @@ T orr AC0, AC0, AC1
|
||||
str ST3, [PST, #-4]!
|
||||
str ST2, [PST, #4 * (MAX_BLOCKSIZE + MAX_FIR_ORDER)]
|
||||
str ST3, [PSAMP], #4 * MAX_CHANNELS
|
||||
bne 1b
|
||||
bne 01b
|
||||
.endif
|
||||
b 99f
|
||||
|
||||
|
||||
@@ -38,49 +38,49 @@ function ff_sbc_analyze_4_neon, export=1
|
||||
/* TODO: merge even and odd cases (or even merge all four calls to this
|
||||
* function) in order to have only aligned reads from 'in' array
|
||||
* and reduce number of load instructions */
|
||||
vld1.16 {d16, d17}, [r0, :64]!
|
||||
vld1.16 {d20, d21}, [r2, :128]!
|
||||
vld1.16 {d4, d5}, [r0, :64]!
|
||||
vld1.16 {d8, d9}, [r2, :128]!
|
||||
|
||||
vmull.s16 q0, d16, d20
|
||||
vld1.16 {d18, d19}, [r0, :64]!
|
||||
vmull.s16 q1, d17, d21
|
||||
vld1.16 {d22, d23}, [r2, :128]!
|
||||
vmull.s16 q0, d4, d8
|
||||
vld1.16 {d6, d7}, [r0, :64]!
|
||||
vmull.s16 q1, d5, d9
|
||||
vld1.16 {d10, d11}, [r2, :128]!
|
||||
|
||||
vmlal.s16 q0, d18, d22
|
||||
vld1.16 {d16, d17}, [r0, :64]!
|
||||
vmlal.s16 q1, d19, d23
|
||||
vld1.16 {d20, d21}, [r2, :128]!
|
||||
vmlal.s16 q0, d6, d10
|
||||
vld1.16 {d4, d5}, [r0, :64]!
|
||||
vmlal.s16 q1, d7, d11
|
||||
vld1.16 {d8, d9}, [r2, :128]!
|
||||
|
||||
vmlal.s16 q0, d16, d20
|
||||
vld1.16 {d18, d19}, [r0, :64]!
|
||||
vmlal.s16 q1, d17, d21
|
||||
vld1.16 {d22, d23}, [r2, :128]!
|
||||
vmlal.s16 q0, d4, d8
|
||||
vld1.16 {d6, d7}, [r0, :64]!
|
||||
vmlal.s16 q1, d5, d9
|
||||
vld1.16 {d10, d11}, [r2, :128]!
|
||||
|
||||
vmlal.s16 q0, d18, d22
|
||||
vld1.16 {d16, d17}, [r0, :64]!
|
||||
vmlal.s16 q1, d19, d23
|
||||
vld1.16 {d20, d21}, [r2, :128]!
|
||||
vmlal.s16 q0, d6, d10
|
||||
vld1.16 {d4, d5}, [r0, :64]!
|
||||
vmlal.s16 q1, d7, d11
|
||||
vld1.16 {d8, d9}, [r2, :128]!
|
||||
|
||||
vmlal.s16 q0, d16, d20
|
||||
vmlal.s16 q1, d17, d21
|
||||
vmlal.s16 q0, d4, d8
|
||||
vmlal.s16 q1, d5, d9
|
||||
|
||||
vpadd.s32 d0, d0, d1
|
||||
vpadd.s32 d1, d2, d3
|
||||
|
||||
vrshrn.s32 d0, q0, SBC_PROTO_FIXED_SCALE
|
||||
|
||||
vld1.16 {d16, d17, d18, d19}, [r2, :128]!
|
||||
vld1.16 {d2, d3, d4, d5}, [r2, :128]!
|
||||
|
||||
vdup.i32 d1, d0[1] /* TODO: can be eliminated */
|
||||
vdup.i32 d0, d0[0] /* TODO: can be eliminated */
|
||||
|
||||
vmull.s16 q10, d16, d0
|
||||
vmull.s16 q11, d17, d0
|
||||
vmlal.s16 q10, d18, d1
|
||||
vmlal.s16 q11, d19, d1
|
||||
vmull.s16 q3, d2, d0
|
||||
vmull.s16 q4, d3, d0
|
||||
vmlal.s16 q3, d4, d1
|
||||
vmlal.s16 q4, d5, d1
|
||||
|
||||
vpadd.s32 d0, d20, d21 /* TODO: can be eliminated */
|
||||
vpadd.s32 d1, d22, d23 /* TODO: can be eliminated */
|
||||
vpadd.s32 d0, d6, d7 /* TODO: can be eliminated */
|
||||
vpadd.s32 d1, d8, d9 /* TODO: can be eliminated */
|
||||
|
||||
vst1.32 {d0, d1}, [r1, :128]
|
||||
|
||||
@@ -91,57 +91,57 @@ function ff_sbc_analyze_8_neon, export=1
|
||||
/* TODO: merge even and odd cases (or even merge all four calls to this
|
||||
* function) in order to have only aligned reads from 'in' array
|
||||
* and reduce number of load instructions */
|
||||
vld1.16 {d16, d17}, [r0, :64]!
|
||||
vld1.16 {d20, d21}, [r2, :128]!
|
||||
vld1.16 {d4, d5}, [r0, :64]!
|
||||
vld1.16 {d8, d9}, [r2, :128]!
|
||||
|
||||
vmull.s16 q12, d16, d20
|
||||
vld1.16 {d18, d19}, [r0, :64]!
|
||||
vmull.s16 q13, d17, d21
|
||||
vld1.16 {d22, d23}, [r2, :128]!
|
||||
vmull.s16 q14, d18, d22
|
||||
vld1.16 {d16, d17}, [r0, :64]!
|
||||
vmull.s16 q15, d19, d23
|
||||
vld1.16 {d20, d21}, [r2, :128]!
|
||||
vmull.s16 q6, d4, d8
|
||||
vld1.16 {d6, d7}, [r0, :64]!
|
||||
vmull.s16 q7, d5, d9
|
||||
vld1.16 {d10, d11}, [r2, :128]!
|
||||
vmull.s16 q8, d6, d10
|
||||
vld1.16 {d4, d5}, [r0, :64]!
|
||||
vmull.s16 q9, d7, d11
|
||||
vld1.16 {d8, d9}, [r2, :128]!
|
||||
|
||||
vmlal.s16 q12, d16, d20
|
||||
vld1.16 {d18, d19}, [r0, :64]!
|
||||
vmlal.s16 q13, d17, d21
|
||||
vld1.16 {d22, d23}, [r2, :128]!
|
||||
vmlal.s16 q14, d18, d22
|
||||
vld1.16 {d16, d17}, [r0, :64]!
|
||||
vmlal.s16 q15, d19, d23
|
||||
vld1.16 {d20, d21}, [r2, :128]!
|
||||
vmlal.s16 q6, d4, d8
|
||||
vld1.16 {d6, d7}, [r0, :64]!
|
||||
vmlal.s16 q7, d5, d9
|
||||
vld1.16 {d10, d11}, [r2, :128]!
|
||||
vmlal.s16 q8, d6, d10
|
||||
vld1.16 {d4, d5}, [r0, :64]!
|
||||
vmlal.s16 q9, d7, d11
|
||||
vld1.16 {d8, d9}, [r2, :128]!
|
||||
|
||||
vmlal.s16 q12, d16, d20
|
||||
vld1.16 {d18, d19}, [r0, :64]!
|
||||
vmlal.s16 q13, d17, d21
|
||||
vld1.16 {d22, d23}, [r2, :128]!
|
||||
vmlal.s16 q14, d18, d22
|
||||
vld1.16 {d16, d17}, [r0, :64]!
|
||||
vmlal.s16 q15, d19, d23
|
||||
vld1.16 {d20, d21}, [r2, :128]!
|
||||
vmlal.s16 q6, d4, d8
|
||||
vld1.16 {d6, d7}, [r0, :64]!
|
||||
vmlal.s16 q7, d5, d9
|
||||
vld1.16 {d10, d11}, [r2, :128]!
|
||||
vmlal.s16 q8, d6, d10
|
||||
vld1.16 {d4, d5}, [r0, :64]!
|
||||
vmlal.s16 q9, d7, d11
|
||||
vld1.16 {d8, d9}, [r2, :128]!
|
||||
|
||||
vmlal.s16 q12, d16, d20
|
||||
vld1.16 {d18, d19}, [r0, :64]!
|
||||
vmlal.s16 q13, d17, d21
|
||||
vld1.16 {d22, d23}, [r2, :128]!
|
||||
vmlal.s16 q14, d18, d22
|
||||
vld1.16 {d16, d17}, [r0, :64]!
|
||||
vmlal.s16 q15, d19, d23
|
||||
vld1.16 {d20, d21}, [r2, :128]!
|
||||
vmlal.s16 q6, d4, d8
|
||||
vld1.16 {d6, d7}, [r0, :64]!
|
||||
vmlal.s16 q7, d5, d9
|
||||
vld1.16 {d10, d11}, [r2, :128]!
|
||||
vmlal.s16 q8, d6, d10
|
||||
vld1.16 {d4, d5}, [r0, :64]!
|
||||
vmlal.s16 q9, d7, d11
|
||||
vld1.16 {d8, d9}, [r2, :128]!
|
||||
|
||||
vmlal.s16 q12, d16, d20
|
||||
vld1.16 {d18, d19}, [r0, :64]!
|
||||
vmlal.s16 q13, d17, d21
|
||||
vld1.16 {d22, d23}, [r2, :128]!
|
||||
vmlal.s16 q6, d4, d8
|
||||
vld1.16 {d6, d7}, [r0, :64]!
|
||||
vmlal.s16 q7, d5, d9
|
||||
vld1.16 {d10, d11}, [r2, :128]!
|
||||
|
||||
vmlal.s16 q14, d18, d22
|
||||
vmlal.s16 q15, d19, d23
|
||||
vmlal.s16 q8, d6, d10
|
||||
vmlal.s16 q9, d7, d11
|
||||
|
||||
vpadd.s32 d0, d24, d25
|
||||
vpadd.s32 d1, d26, d27
|
||||
vpadd.s32 d2, d28, d29
|
||||
vpadd.s32 d3, d30, d31
|
||||
vpadd.s32 d0, d12, d13
|
||||
vpadd.s32 d1, d14, d15
|
||||
vpadd.s32 d2, d16, d17
|
||||
vpadd.s32 d3, d18, d19
|
||||
|
||||
vrshr.s32 q0, q0, SBC_PROTO_FIXED_SCALE
|
||||
vrshr.s32 q1, q1, SBC_PROTO_FIXED_SCALE
|
||||
@@ -153,38 +153,38 @@ function ff_sbc_analyze_8_neon, export=1
|
||||
vdup.i32 d1, d0[1] /* TODO: can be eliminated */
|
||||
vdup.i32 d0, d0[0] /* TODO: can be eliminated */
|
||||
|
||||
vld1.16 {d16, d17}, [r2, :128]!
|
||||
vmull.s16 q12, d16, d0
|
||||
vld1.16 {d18, d19}, [r2, :128]!
|
||||
vmull.s16 q13, d17, d0
|
||||
vmull.s16 q14, d18, d0
|
||||
vmull.s16 q15, d19, d0
|
||||
vld1.16 {d4, d5}, [r2, :128]!
|
||||
vmull.s16 q6, d4, d0
|
||||
vld1.16 {d6, d7}, [r2, :128]!
|
||||
vmull.s16 q7, d5, d0
|
||||
vmull.s16 q8, d6, d0
|
||||
vmull.s16 q9, d7, d0
|
||||
|
||||
vld1.16 {d16, d17}, [r2, :128]!
|
||||
vmlal.s16 q12, d16, d1
|
||||
vld1.16 {d18, d19}, [r2, :128]!
|
||||
vmlal.s16 q13, d17, d1
|
||||
vmlal.s16 q14, d18, d1
|
||||
vmlal.s16 q15, d19, d1
|
||||
vld1.16 {d4, d5}, [r2, :128]!
|
||||
vmlal.s16 q6, d4, d1
|
||||
vld1.16 {d6, d7}, [r2, :128]!
|
||||
vmlal.s16 q7, d5, d1
|
||||
vmlal.s16 q8, d6, d1
|
||||
vmlal.s16 q9, d7, d1
|
||||
|
||||
vld1.16 {d16, d17}, [r2, :128]!
|
||||
vmlal.s16 q12, d16, d2
|
||||
vld1.16 {d18, d19}, [r2, :128]!
|
||||
vmlal.s16 q13, d17, d2
|
||||
vmlal.s16 q14, d18, d2
|
||||
vmlal.s16 q15, d19, d2
|
||||
vld1.16 {d4, d5}, [r2, :128]!
|
||||
vmlal.s16 q6, d4, d2
|
||||
vld1.16 {d6, d7}, [r2, :128]!
|
||||
vmlal.s16 q7, d5, d2
|
||||
vmlal.s16 q8, d6, d2
|
||||
vmlal.s16 q9, d7, d2
|
||||
|
||||
vld1.16 {d16, d17}, [r2, :128]!
|
||||
vmlal.s16 q12, d16, d3
|
||||
vld1.16 {d18, d19}, [r2, :128]!
|
||||
vmlal.s16 q13, d17, d3
|
||||
vmlal.s16 q14, d18, d3
|
||||
vmlal.s16 q15, d19, d3
|
||||
vld1.16 {d4, d5}, [r2, :128]!
|
||||
vmlal.s16 q6, d4, d3
|
||||
vld1.16 {d6, d7}, [r2, :128]!
|
||||
vmlal.s16 q7, d5, d3
|
||||
vmlal.s16 q8, d6, d3
|
||||
vmlal.s16 q9, d7, d3
|
||||
|
||||
vpadd.s32 d0, d24, d25 /* TODO: can be eliminated */
|
||||
vpadd.s32 d1, d26, d27 /* TODO: can be eliminated */
|
||||
vpadd.s32 d2, d28, d29 /* TODO: can be eliminated */
|
||||
vpadd.s32 d3, d30, d31 /* TODO: can be eliminated */
|
||||
vpadd.s32 d0, d12, d13 /* TODO: can be eliminated */
|
||||
vpadd.s32 d1, d14, d15 /* TODO: can be eliminated */
|
||||
vpadd.s32 d2, d16, d17 /* TODO: can be eliminated */
|
||||
vpadd.s32 d3, d18, d19 /* TODO: can be eliminated */
|
||||
|
||||
vst1.32 {d0, d1, d2, d3}, [r1, :128]
|
||||
|
||||
|
||||
@@ -279,13 +279,11 @@ function \type\()_8tap_\size\()h_\idx1\idx2
|
||||
sub r1, r1, r5
|
||||
.endif
|
||||
@ size >= 16 loads two qwords and increments r2,
|
||||
@ size 4 loads 1 d word, increments r2 and loads 1 32-bit lane
|
||||
@ for size 8 it's enough with one qword and no postincrement
|
||||
@ for size 4/8 it's enough with one qword and no
|
||||
@ postincrement
|
||||
.if \size >= 16
|
||||
sub r3, r3, r5
|
||||
sub r3, r3, #8
|
||||
.elseif \size == 4
|
||||
sub r3, r3, #8
|
||||
.endif
|
||||
@ Load the filter vector
|
||||
vld1.16 {q0}, [r12,:128]
|
||||
@@ -297,14 +295,9 @@ function \type\()_8tap_\size\()h_\idx1\idx2
|
||||
.if \size >= 16
|
||||
vld1.8 {d18, d19, d20}, [r2]!
|
||||
vld1.8 {d24, d25, d26}, [r7]!
|
||||
.elseif \size == 8
|
||||
.else
|
||||
vld1.8 {q9}, [r2]
|
||||
vld1.8 {q12}, [r7]
|
||||
.else @ size == 4
|
||||
vld1.8 {d18}, [r2]!
|
||||
vld1.8 {d24}, [r7]!
|
||||
vld1.32 {d19[0]}, [r2]
|
||||
vld1.32 {d25[0]}, [r7]
|
||||
.endif
|
||||
vmovl.u8 q8, d18
|
||||
vmovl.u8 q9, d19
|
||||
|
||||
@@ -376,7 +376,7 @@ ASSSplitContext *ff_ass_split(const char *buf)
|
||||
ASSSplitContext *ctx = av_mallocz(sizeof(*ctx));
|
||||
if (!ctx)
|
||||
return NULL;
|
||||
if (buf && !strncmp(buf, "\xef\xbb\xbf", 3)) // Skip UTF-8 BOM header
|
||||
if (buf && !memcmp(buf, "\xef\xbb\xbf", 3)) // Skip UTF-8 BOM header
|
||||
buf += 3;
|
||||
ctx->current_section = -1;
|
||||
if (ass_split(ctx, buf) < 0) {
|
||||
|
||||
@@ -114,13 +114,6 @@ enum {
|
||||
AV1_WARP_MODEL_TRANSLATION = 1,
|
||||
AV1_WARP_MODEL_ROTZOOM = 2,
|
||||
AV1_WARP_MODEL_AFFINE = 3,
|
||||
AV1_WARP_PARAM_REDUCE_BITS = 6,
|
||||
|
||||
AV1_DIV_LUT_BITS = 8,
|
||||
AV1_DIV_LUT_PREC_BITS = 14,
|
||||
AV1_DIV_LUT_NUM = 257,
|
||||
|
||||
AV1_MAX_LOOP_FILTER = 63,
|
||||
};
|
||||
|
||||
|
||||
|
||||
@@ -28,7 +28,6 @@ typedef struct AV1MetadataContext {
|
||||
CBSBSFContext common;
|
||||
|
||||
int td;
|
||||
AV1RawOBU td_obu;
|
||||
|
||||
int color_primaries;
|
||||
int transfer_characteristics;
|
||||
@@ -108,11 +107,12 @@ static int av1_metadata_update_fragment(AVBSFContext *bsf, AVPacket *pkt,
|
||||
CodedBitstreamFragment *frag)
|
||||
{
|
||||
AV1MetadataContext *ctx = bsf->priv_data;
|
||||
AV1RawOBU td, *obu;
|
||||
int err, i;
|
||||
|
||||
for (i = 0; i < frag->nb_units; i++) {
|
||||
if (frag->units[i].type == AV1_OBU_SEQUENCE_HEADER) {
|
||||
AV1RawOBU *obu = frag->units[i].content;
|
||||
obu = frag->units[i].content;
|
||||
err = av1_metadata_update_sequence_header(bsf, &obu->obu.sequence_header);
|
||||
if (err < 0)
|
||||
return err;
|
||||
@@ -120,12 +120,16 @@ static int av1_metadata_update_fragment(AVBSFContext *bsf, AVPacket *pkt,
|
||||
}
|
||||
|
||||
// If a Temporal Delimiter is present, it must be the first OBU.
|
||||
if (frag->nb_units && frag->units[0].type == AV1_OBU_TEMPORAL_DELIMITER) {
|
||||
if (frag->units[0].type == AV1_OBU_TEMPORAL_DELIMITER) {
|
||||
if (ctx->td == BSF_ELEMENT_REMOVE)
|
||||
ff_cbs_delete_unit(frag, 0);
|
||||
} else if (pkt && ctx->td == BSF_ELEMENT_INSERT) {
|
||||
td = (AV1RawOBU) {
|
||||
.header.obu_type = AV1_OBU_TEMPORAL_DELIMITER,
|
||||
};
|
||||
|
||||
err = ff_cbs_insert_unit_content(frag, 0, AV1_OBU_TEMPORAL_DELIMITER,
|
||||
&ctx->td_obu, NULL);
|
||||
&td, NULL);
|
||||
if (err < 0) {
|
||||
av_log(bsf, AV_LOG_ERROR, "Failed to insert Temporal Delimiter.\n");
|
||||
return err;
|
||||
@@ -151,12 +155,6 @@ static const CBSBSFType av1_metadata_type = {
|
||||
|
||||
static int av1_metadata_init(AVBSFContext *bsf)
|
||||
{
|
||||
AV1MetadataContext *ctx = bsf->priv_data;
|
||||
|
||||
ctx->td_obu = (AV1RawOBU) {
|
||||
.header.obu_type = AV1_OBU_TEMPORAL_DELIMITER,
|
||||
};
|
||||
|
||||
return ff_cbs_bsf_generic_init(bsf, &av1_metadata_type);
|
||||
}
|
||||
|
||||
|
||||
+5
-111
@@ -28,34 +28,6 @@
|
||||
#include "internal.h"
|
||||
#include "profiles.h"
|
||||
|
||||
/**< same with Div_Lut defined in spec 7.11.3.7 */
|
||||
static const uint16_t div_lut[AV1_DIV_LUT_NUM] = {
|
||||
16384, 16320, 16257, 16194, 16132, 16070, 16009, 15948, 15888, 15828, 15768,
|
||||
15709, 15650, 15592, 15534, 15477, 15420, 15364, 15308, 15252, 15197, 15142,
|
||||
15087, 15033, 14980, 14926, 14873, 14821, 14769, 14717, 14665, 14614, 14564,
|
||||
14513, 14463, 14413, 14364, 14315, 14266, 14218, 14170, 14122, 14075, 14028,
|
||||
13981, 13935, 13888, 13843, 13797, 13752, 13707, 13662, 13618, 13574, 13530,
|
||||
13487, 13443, 13400, 13358, 13315, 13273, 13231, 13190, 13148, 13107, 13066,
|
||||
13026, 12985, 12945, 12906, 12866, 12827, 12788, 12749, 12710, 12672, 12633,
|
||||
12596, 12558, 12520, 12483, 12446, 12409, 12373, 12336, 12300, 12264, 12228,
|
||||
12193, 12157, 12122, 12087, 12053, 12018, 11984, 11950, 11916, 11882, 11848,
|
||||
11815, 11782, 11749, 11716, 11683, 11651, 11619, 11586, 11555, 11523, 11491,
|
||||
11460, 11429, 11398, 11367, 11336, 11305, 11275, 11245, 11215, 11185, 11155,
|
||||
11125, 11096, 11067, 11038, 11009, 10980, 10951, 10923, 10894, 10866, 10838,
|
||||
10810, 10782, 10755, 10727, 10700, 10673, 10645, 10618, 10592, 10565, 10538,
|
||||
10512, 10486, 10460, 10434, 10408, 10382, 10356, 10331, 10305, 10280, 10255,
|
||||
10230, 10205, 10180, 10156, 10131, 10107, 10082, 10058, 10034, 10010, 9986,
|
||||
9963, 9939, 9916, 9892, 9869, 9846, 9823, 9800, 9777, 9754, 9732,
|
||||
9709, 9687, 9664, 9642, 9620, 9598, 9576, 9554, 9533, 9511, 9489,
|
||||
9468, 9447, 9425, 9404, 9383, 9362, 9341, 9321, 9300, 9279, 9259,
|
||||
9239, 9218, 9198, 9178, 9158, 9138, 9118, 9098, 9079, 9059, 9039,
|
||||
9020, 9001, 8981, 8962, 8943, 8924, 8905, 8886, 8867, 8849, 8830,
|
||||
8812, 8793, 8775, 8756, 8738, 8720, 8702, 8684, 8666, 8648, 8630,
|
||||
8613, 8595, 8577, 8560, 8542, 8525, 8508, 8490, 8473, 8456, 8439,
|
||||
8422, 8405, 8389, 8372, 8355, 8339, 8322, 8306, 8289, 8273, 8257,
|
||||
8240, 8224, 8208, 8192
|
||||
};
|
||||
|
||||
static uint32_t inverse_recenter(int r, uint32_t v)
|
||||
{
|
||||
if (v > 2 * r)
|
||||
@@ -85,11 +57,12 @@ static int32_t decode_signed_subexp_with_ref(uint32_t sub_exp, int low,
|
||||
|
||||
static void read_global_param(AV1DecContext *s, int type, int ref, int idx)
|
||||
{
|
||||
int primary_frame;
|
||||
uint8_t primary_frame, prev_frame;
|
||||
uint32_t abs_bits, prec_bits, round, prec_diff, sub, mx;
|
||||
int32_t r, prev_gm_param;
|
||||
|
||||
primary_frame = s->raw_frame_header->primary_ref_frame;
|
||||
prev_frame = s->raw_frame_header->ref_frame_idx[primary_frame];
|
||||
abs_bits = AV1_GM_ABS_ALPHA_BITS;
|
||||
prec_bits = AV1_GM_ALPHA_PREC_BITS;
|
||||
|
||||
@@ -99,10 +72,8 @@ static void read_global_param(AV1DecContext *s, int type, int ref, int idx)
|
||||
*/
|
||||
if (s->raw_frame_header->primary_ref_frame == AV1_PRIMARY_REF_NONE)
|
||||
prev_gm_param = s->cur_frame.gm_params[ref][idx];
|
||||
else {
|
||||
int prev_frame = s->raw_frame_header->ref_frame_idx[primary_frame];
|
||||
else
|
||||
prev_gm_param = s->ref[prev_frame].gm_params[ref][idx];
|
||||
}
|
||||
|
||||
if (idx < 2) {
|
||||
if (type == AV1_WARP_MODEL_TRANSLATION) {
|
||||
@@ -126,70 +97,6 @@ static void read_global_param(AV1DecContext *s, int type, int ref, int idx)
|
||||
-mx, mx + 1, r) << prec_diff) + round;
|
||||
}
|
||||
|
||||
static uint64_t round_two(uint64_t x, uint16_t n)
|
||||
{
|
||||
if (n == 0)
|
||||
return x;
|
||||
return ((x + ((uint64_t)1 << (n - 1))) >> n);
|
||||
}
|
||||
|
||||
static int64_t round_two_signed(int64_t x, uint16_t n)
|
||||
{
|
||||
return ((x<0) ? -((int64_t)round_two(-x, n)) : (int64_t)round_two(x, n));
|
||||
}
|
||||
|
||||
/**
|
||||
* Resolve divisor process.
|
||||
* see spec 7.11.3.7
|
||||
*/
|
||||
static int16_t resolve_divisor(uint32_t d, uint16_t *shift)
|
||||
{
|
||||
int32_t e, f;
|
||||
|
||||
*shift = av_log2(d);
|
||||
e = d - (1 << (*shift));
|
||||
if (*shift > AV1_DIV_LUT_BITS)
|
||||
f = round_two(e, *shift - AV1_DIV_LUT_BITS);
|
||||
else
|
||||
f = e << (AV1_DIV_LUT_BITS - (*shift));
|
||||
|
||||
*shift += AV1_DIV_LUT_PREC_BITS;
|
||||
|
||||
return div_lut[f];
|
||||
}
|
||||
|
||||
/**
|
||||
* check if global motion params is valid.
|
||||
* see spec 7.11.3.6
|
||||
*/
|
||||
static uint8_t get_shear_params_valid(AV1DecContext *s, int idx)
|
||||
{
|
||||
int16_t alpha, beta, gamma, delta, divf, divs;
|
||||
int64_t v, w;
|
||||
int32_t *param = &s->cur_frame.gm_params[idx][0];
|
||||
if (param[2] <= 0)
|
||||
return 0;
|
||||
|
||||
alpha = av_clip_int16(param[2] - (1 << AV1_WARPEDMODEL_PREC_BITS));
|
||||
beta = av_clip_int16(param[3]);
|
||||
divf = resolve_divisor(abs(param[2]), &divs);
|
||||
v = (int64_t)param[4] * (1 << AV1_WARPEDMODEL_PREC_BITS);
|
||||
w = (int64_t)param[3] * param[4];
|
||||
gamma = av_clip_int16((int)round_two_signed((v * divf), divs));
|
||||
delta = av_clip_int16(param[5] - (int)round_two_signed((w * divf), divs) - (1 << AV1_WARPEDMODEL_PREC_BITS));
|
||||
|
||||
alpha = round_two_signed(alpha, AV1_WARP_PARAM_REDUCE_BITS) << AV1_WARP_PARAM_REDUCE_BITS;
|
||||
beta = round_two_signed(beta, AV1_WARP_PARAM_REDUCE_BITS) << AV1_WARP_PARAM_REDUCE_BITS;
|
||||
gamma = round_two_signed(gamma, AV1_WARP_PARAM_REDUCE_BITS) << AV1_WARP_PARAM_REDUCE_BITS;
|
||||
delta = round_two_signed(delta, AV1_WARP_PARAM_REDUCE_BITS) << AV1_WARP_PARAM_REDUCE_BITS;
|
||||
|
||||
if ((4 * abs(alpha) + 7 * abs(beta)) >= (1 << AV1_WARPEDMODEL_PREC_BITS) ||
|
||||
(4 * abs(gamma) + 4 * abs(delta)) >= (1 << AV1_WARPEDMODEL_PREC_BITS))
|
||||
return 0;
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
/**
|
||||
* update gm type/params, since cbs already implemented part of this funcation,
|
||||
* so we don't need to full implement spec.
|
||||
@@ -237,9 +144,6 @@ static void global_motion_params(AV1DecContext *s)
|
||||
read_global_param(s, type, ref, 0);
|
||||
read_global_param(s, type, ref, 1);
|
||||
}
|
||||
if (type <= AV1_WARP_MODEL_AFFINE) {
|
||||
s->cur_frame.gm_invalid[ref] = !get_shear_params_valid(s, ref);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -605,9 +509,6 @@ static int av1_frame_ref(AVCodecContext *avctx, AV1Frame *dst, const AV1Frame *s
|
||||
|
||||
dst->spatial_id = src->spatial_id;
|
||||
dst->temporal_id = src->temporal_id;
|
||||
memcpy(dst->gm_invalid,
|
||||
src->gm_invalid,
|
||||
AV1_NUM_REF_FRAMES * sizeof(uint8_t));
|
||||
memcpy(dst->gm_type,
|
||||
src->gm_type,
|
||||
AV1_NUM_REF_FRAMES * sizeof(uint8_t));
|
||||
@@ -662,7 +563,7 @@ static int set_context_with_sequence(AVCodecContext *avctx,
|
||||
avctx->color_range =
|
||||
seq->color_config.color_range ? AVCOL_RANGE_JPEG : AVCOL_RANGE_MPEG;
|
||||
avctx->color_primaries = seq->color_config.color_primaries;
|
||||
avctx->colorspace = seq->color_config.matrix_coefficients;
|
||||
avctx->colorspace = seq->color_config.color_primaries;
|
||||
avctx->color_trc = seq->color_config.transfer_characteristics;
|
||||
|
||||
switch (seq->color_config.chroma_sample_position) {
|
||||
@@ -1018,8 +919,6 @@ static int av1_decode_frame(AVCodecContext *avctx, void *frame,
|
||||
}
|
||||
|
||||
s->raw_seq = &obu->obu.sequence_header;
|
||||
s->raw_frame_header = NULL;
|
||||
raw_tile_group = NULL;
|
||||
|
||||
ret = set_context_with_sequence(avctx, s->raw_seq);
|
||||
if (ret < 0) {
|
||||
@@ -1069,8 +968,6 @@ static int av1_decode_frame(AVCodecContext *avctx, void *frame,
|
||||
goto end;
|
||||
}
|
||||
|
||||
raw_tile_group = NULL;
|
||||
|
||||
if (unit->type == AV1_OBU_FRAME)
|
||||
s->raw_frame_header = &obu->obu.frame.header;
|
||||
else
|
||||
@@ -1149,11 +1046,8 @@ static int av1_decode_frame(AVCodecContext *avctx, void *frame,
|
||||
}
|
||||
}
|
||||
break;
|
||||
case AV1_OBU_TEMPORAL_DELIMITER:
|
||||
s->raw_frame_header = NULL;
|
||||
raw_tile_group = NULL;
|
||||
// fall-through
|
||||
case AV1_OBU_TILE_LIST:
|
||||
case AV1_OBU_TEMPORAL_DELIMITER:
|
||||
case AV1_OBU_PADDING:
|
||||
case AV1_OBU_METADATA:
|
||||
break;
|
||||
|
||||
@@ -42,7 +42,6 @@ typedef struct AV1Frame {
|
||||
int temporal_id;
|
||||
int spatial_id;
|
||||
|
||||
uint8_t gm_invalid[AV1_NUM_REF_FRAMES];
|
||||
uint8_t gm_type[AV1_NUM_REF_FRAMES];
|
||||
int32_t gm_params[AV1_NUM_REF_FRAMES][6];
|
||||
|
||||
|
||||
+24
-30
@@ -318,13 +318,6 @@ int attribute_align_arg avcodec_open2(AVCodecContext *avctx, const AVCodec *code
|
||||
avctx->time_base.den = avctx->sample_rate;
|
||||
}
|
||||
|
||||
if (av_codec_is_encoder(avctx->codec))
|
||||
ret = ff_encode_preinit(avctx);
|
||||
else
|
||||
ret = ff_decode_preinit(avctx);
|
||||
if (ret < 0)
|
||||
goto free_and_end;
|
||||
|
||||
if (!HAVE_THREADS)
|
||||
av_log(avctx, AV_LOG_WARNING, "Warning: not compiled with thread support, using thread emulation\n");
|
||||
|
||||
@@ -346,6 +339,13 @@ int attribute_align_arg avcodec_open2(AVCodecContext *avctx, const AVCodec *code
|
||||
if (!HAVE_THREADS && !(codec->caps_internal & FF_CODEC_CAP_AUTO_THREADS))
|
||||
avctx->thread_count = 1;
|
||||
|
||||
if (av_codec_is_encoder(avctx->codec))
|
||||
ret = ff_encode_preinit(avctx);
|
||||
else
|
||||
ret = ff_decode_preinit(avctx);
|
||||
if (ret < 0)
|
||||
goto free_and_end;
|
||||
|
||||
if ( avctx->codec->init && (!(avctx->active_thread_type&FF_THREAD_FRAME)
|
||||
|| avci->frame_thread_encoder)) {
|
||||
ret = avctx->codec->init(avctx);
|
||||
@@ -644,11 +644,6 @@ FF_ENABLE_DEPRECATION_WARNINGS
|
||||
return 0;
|
||||
}
|
||||
|
||||
static const char *unknown_if_null(const char *str)
|
||||
{
|
||||
return str ? str : "unknown";
|
||||
}
|
||||
|
||||
void avcodec_string(char *buf, int buf_size, AVCodecContext *enc, int encode)
|
||||
{
|
||||
const char *codec_type;
|
||||
@@ -658,7 +653,6 @@ void avcodec_string(char *buf, int buf_size, AVCodecContext *enc, int encode)
|
||||
int new_line = 0;
|
||||
AVRational display_aspect_ratio;
|
||||
const char *separator = enc->dump_separator ? (const char *)enc->dump_separator : ", ";
|
||||
const char *str;
|
||||
|
||||
if (!buf || buf_size <= 0)
|
||||
return;
|
||||
@@ -694,27 +688,28 @@ void avcodec_string(char *buf, int buf_size, AVCodecContext *enc, int encode)
|
||||
av_strlcat(buf, separator, buf_size);
|
||||
|
||||
snprintf(buf + strlen(buf), buf_size - strlen(buf),
|
||||
"%s", enc->pix_fmt == AV_PIX_FMT_NONE ? "none" :
|
||||
unknown_if_null(av_get_pix_fmt_name(enc->pix_fmt)));
|
||||
"%s", enc->pix_fmt == AV_PIX_FMT_NONE ? "none" :
|
||||
av_get_pix_fmt_name(enc->pix_fmt));
|
||||
if (enc->bits_per_raw_sample && enc->pix_fmt != AV_PIX_FMT_NONE &&
|
||||
enc->bits_per_raw_sample < av_pix_fmt_desc_get(enc->pix_fmt)->comp[0].depth)
|
||||
av_strlcatf(detail, sizeof(detail), "%d bpc, ", enc->bits_per_raw_sample);
|
||||
if (enc->color_range != AVCOL_RANGE_UNSPECIFIED &&
|
||||
(str = av_color_range_name(enc->color_range)))
|
||||
av_strlcatf(detail, sizeof(detail), "%s, ", str);
|
||||
if (enc->color_range != AVCOL_RANGE_UNSPECIFIED)
|
||||
av_strlcatf(detail, sizeof(detail), "%s, ",
|
||||
av_color_range_name(enc->color_range));
|
||||
|
||||
if (enc->colorspace != AVCOL_SPC_UNSPECIFIED ||
|
||||
enc->color_primaries != AVCOL_PRI_UNSPECIFIED ||
|
||||
enc->color_trc != AVCOL_TRC_UNSPECIFIED) {
|
||||
const char *col = unknown_if_null(av_color_space_name(enc->colorspace));
|
||||
const char *pri = unknown_if_null(av_color_primaries_name(enc->color_primaries));
|
||||
const char *trc = unknown_if_null(av_color_transfer_name(enc->color_trc));
|
||||
if (strcmp(col, pri) || strcmp(col, trc)) {
|
||||
if (enc->colorspace != (int)enc->color_primaries ||
|
||||
enc->colorspace != (int)enc->color_trc) {
|
||||
new_line = 1;
|
||||
av_strlcatf(detail, sizeof(detail), "%s/%s/%s, ",
|
||||
col, pri, trc);
|
||||
av_color_space_name(enc->colorspace),
|
||||
av_color_primaries_name(enc->color_primaries),
|
||||
av_color_transfer_name(enc->color_trc));
|
||||
} else
|
||||
av_strlcatf(detail, sizeof(detail), "%s, ", col);
|
||||
av_strlcatf(detail, sizeof(detail), "%s, ",
|
||||
av_get_colorspace_name(enc->colorspace));
|
||||
}
|
||||
|
||||
if (enc->field_order != AV_FIELD_UNKNOWN) {
|
||||
@@ -732,9 +727,9 @@ void avcodec_string(char *buf, int buf_size, AVCodecContext *enc, int encode)
|
||||
}
|
||||
|
||||
if (av_log_get_level() >= AV_LOG_VERBOSE &&
|
||||
enc->chroma_sample_location != AVCHROMA_LOC_UNSPECIFIED &&
|
||||
(str = av_chroma_location_name(enc->chroma_sample_location)))
|
||||
av_strlcatf(detail, sizeof(detail), "%s, ", str);
|
||||
enc->chroma_sample_location != AVCHROMA_LOC_UNSPECIFIED)
|
||||
av_strlcatf(detail, sizeof(detail), "%s, ",
|
||||
av_chroma_location_name(enc->chroma_sample_location));
|
||||
|
||||
if (strlen(detail) > 1) {
|
||||
detail[strlen(detail) - 2] = 0;
|
||||
@@ -792,10 +787,9 @@ void avcodec_string(char *buf, int buf_size, AVCodecContext *enc, int encode)
|
||||
"%d Hz, ", enc->sample_rate);
|
||||
}
|
||||
av_get_channel_layout_string(buf + strlen(buf), buf_size - strlen(buf), enc->channels, enc->channel_layout);
|
||||
if (enc->sample_fmt != AV_SAMPLE_FMT_NONE &&
|
||||
(str = av_get_sample_fmt_name(enc->sample_fmt))) {
|
||||
if (enc->sample_fmt != AV_SAMPLE_FMT_NONE) {
|
||||
snprintf(buf + strlen(buf), buf_size - strlen(buf),
|
||||
", %s", str);
|
||||
", %s", av_get_sample_fmt_name(enc->sample_fmt));
|
||||
}
|
||||
if ( enc->bits_per_raw_sample > 0
|
||||
&& enc->bits_per_raw_sample != av_get_bytes_per_sample(enc->sample_fmt) * 8)
|
||||
|
||||
@@ -1304,10 +1304,6 @@ typedef struct AVCodecContext {
|
||||
* this callback and filled with the extra buffers if there are more
|
||||
* buffers than buf[] can hold. extended_buf will be freed in
|
||||
* av_frame_unref().
|
||||
* Decoders will generally initialize the whole buffer before it is output
|
||||
* but it can in rare error conditions happen that uninitialized data is passed
|
||||
* through. \important The buffers returned by get_buffer* should thus not contain sensitive
|
||||
* data.
|
||||
*
|
||||
* If AV_CODEC_CAP_DR1 is not set then get_buffer2() must call
|
||||
* avcodec_default_get_buffer2() instead of providing buffers allocated by
|
||||
|
||||
+5
-5
@@ -869,7 +869,7 @@ static int binkb_decode_plane(BinkContext *c, AVFrame *frame, GetBitContext *gb,
|
||||
|
||||
binkb_init_bundles(c);
|
||||
ref_start = frame->data[plane_idx];
|
||||
ref_end = frame->data[plane_idx] + ((bh - 1) * frame->linesize[plane_idx] + bw - 1) * 8;
|
||||
ref_end = frame->data[plane_idx] + (bh * frame->linesize[plane_idx] + bw) * 8;
|
||||
|
||||
for (i = 0; i < 64; i++)
|
||||
coordmap[i] = (i & 7) + (i >> 3) * stride;
|
||||
@@ -925,7 +925,7 @@ static int binkb_decode_plane(BinkContext *c, AVFrame *frame, GetBitContext *gb,
|
||||
xoff = binkb_get_value(c, BINKB_SRC_X_OFF);
|
||||
yoff = binkb_get_value(c, BINKB_SRC_Y_OFF) + ybias;
|
||||
ref = dst + xoff + yoff * stride;
|
||||
if (ref < ref_start || ref > ref_end) {
|
||||
if (ref < ref_start || ref + 8*stride > ref_end) {
|
||||
av_log(c->avctx, AV_LOG_WARNING, "Reference block is out of bounds\n");
|
||||
} else if (ref + 8*stride < dst || ref >= dst + 8*stride) {
|
||||
c->put_pixels_tab(dst, ref, stride, 8);
|
||||
@@ -941,7 +941,7 @@ static int binkb_decode_plane(BinkContext *c, AVFrame *frame, GetBitContext *gb,
|
||||
xoff = binkb_get_value(c, BINKB_SRC_X_OFF);
|
||||
yoff = binkb_get_value(c, BINKB_SRC_Y_OFF) + ybias;
|
||||
ref = dst + xoff + yoff * stride;
|
||||
if (ref < ref_start || ref > ref_end) {
|
||||
if (ref < ref_start || ref + 8 * stride > ref_end) {
|
||||
av_log(c->avctx, AV_LOG_WARNING, "Reference block is out of bounds\n");
|
||||
} else if (ref + 8*stride < dst || ref >= dst + 8*stride) {
|
||||
c->put_pixels_tab(dst, ref, stride, 8);
|
||||
@@ -973,7 +973,7 @@ static int binkb_decode_plane(BinkContext *c, AVFrame *frame, GetBitContext *gb,
|
||||
xoff = binkb_get_value(c, BINKB_SRC_X_OFF);
|
||||
yoff = binkb_get_value(c, BINKB_SRC_Y_OFF) + ybias;
|
||||
ref = dst + xoff + yoff * stride;
|
||||
if (ref < ref_start || ref > ref_end) {
|
||||
if (ref < ref_start || ref + 8 * stride > ref_end) {
|
||||
av_log(c->avctx, AV_LOG_WARNING, "Reference block is out of bounds\n");
|
||||
} else if (ref + 8*stride < dst || ref >= dst + 8*stride) {
|
||||
c->put_pixels_tab(dst, ref, stride, 8);
|
||||
@@ -1086,7 +1086,7 @@ static int bink_decode_plane(BinkContext *c, AVFrame *frame, GetBitContext *gb,
|
||||
for (bx = 0; bx < bw; bx++, dst += 8, prev += 8) {
|
||||
blk = get_value(c, BINK_SRC_BLOCK_TYPES);
|
||||
// 16x16 block type on odd line means part of the already decoded block, so skip it
|
||||
if (((by & 1) || (bx & 1)) && blk == SCALED_BLOCK) {
|
||||
if ((by & 1) && blk == SCALED_BLOCK) {
|
||||
bx++;
|
||||
dst += 8;
|
||||
prev += 8;
|
||||
|
||||
@@ -70,7 +70,7 @@ static av_cold int decode_init(AVCodecContext *avctx)
|
||||
BinkAudioContext *s = avctx->priv_data;
|
||||
int sample_rate = avctx->sample_rate;
|
||||
int sample_rate_half;
|
||||
int i, ret;
|
||||
int i;
|
||||
int frame_len_bits;
|
||||
|
||||
/* determine frame length */
|
||||
@@ -132,13 +132,11 @@ static av_cold int decode_init(AVCodecContext *avctx)
|
||||
s->first = 1;
|
||||
|
||||
if (CONFIG_BINKAUDIO_RDFT_DECODER && avctx->codec->id == AV_CODEC_ID_BINKAUDIO_RDFT)
|
||||
ret = ff_rdft_init(&s->trans.rdft, frame_len_bits, DFT_C2R);
|
||||
ff_rdft_init(&s->trans.rdft, frame_len_bits, DFT_C2R);
|
||||
else if (CONFIG_BINKAUDIO_DCT_DECODER)
|
||||
ret = ff_dct_init(&s->trans.dct, frame_len_bits, DCT_III);
|
||||
ff_dct_init(&s->trans.dct, frame_len_bits, DCT_III);
|
||||
else
|
||||
av_assert0(0);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
s->pkt = av_packet_alloc();
|
||||
if (!s->pkt)
|
||||
@@ -347,7 +345,6 @@ AVCodec ff_binkaudio_rdft_decoder = {
|
||||
.close = decode_end,
|
||||
.receive_frame = binkaudio_receive_frame,
|
||||
.capabilities = AV_CODEC_CAP_DELAY | AV_CODEC_CAP_DR1,
|
||||
.caps_internal = FF_CODEC_CAP_INIT_CLEANUP,
|
||||
};
|
||||
|
||||
AVCodec ff_binkaudio_dct_decoder = {
|
||||
@@ -360,5 +357,4 @@ AVCodec ff_binkaudio_dct_decoder = {
|
||||
.close = decode_end,
|
||||
.receive_frame = binkaudio_receive_frame,
|
||||
.capabilities = AV_CODEC_CAP_DELAY | AV_CODEC_CAP_DR1,
|
||||
.caps_internal = FF_CODEC_CAP_INIT_CLEANUP,
|
||||
};
|
||||
|
||||
+1
-1
@@ -130,7 +130,7 @@ static int bmp_decode_frame(AVCodecContext *avctx,
|
||||
rgb[1] = bytestream_get_le32(&buf);
|
||||
rgb[2] = bytestream_get_le32(&buf);
|
||||
if (ihsize > 40)
|
||||
alpha = bytestream_get_le32(&buf);
|
||||
alpha = bytestream_get_le32(&buf);
|
||||
}
|
||||
|
||||
ret = ff_set_dimensions(avctx, width, height > 0 ? height : -(unsigned)height);
|
||||
|
||||
+19
-20
@@ -45,15 +45,14 @@ void av_bsf_free(AVBSFContext **pctx)
|
||||
return;
|
||||
ctx = *pctx;
|
||||
|
||||
if (ctx->internal) {
|
||||
if (ctx->filter->close)
|
||||
ctx->filter->close(ctx);
|
||||
av_packet_free(&ctx->internal->buffer_pkt);
|
||||
av_freep(&ctx->internal);
|
||||
}
|
||||
if (ctx->filter->close)
|
||||
ctx->filter->close(ctx);
|
||||
if (ctx->filter->priv_class && ctx->priv_data)
|
||||
av_opt_free(ctx->priv_data);
|
||||
|
||||
if (ctx->internal)
|
||||
av_packet_free(&ctx->internal->buffer_pkt);
|
||||
av_freep(&ctx->internal);
|
||||
av_freep(&ctx->priv_data);
|
||||
|
||||
avcodec_parameters_free(&ctx->par_in);
|
||||
@@ -111,20 +110,7 @@ int av_bsf_alloc(const AVBitStreamFilter *filter, AVBSFContext **pctx)
|
||||
ret = AVERROR(ENOMEM);
|
||||
goto fail;
|
||||
}
|
||||
/* allocate priv data and init private options */
|
||||
if (filter->priv_data_size) {
|
||||
ctx->priv_data = av_mallocz(filter->priv_data_size);
|
||||
if (!ctx->priv_data) {
|
||||
ret = AVERROR(ENOMEM);
|
||||
goto fail;
|
||||
}
|
||||
if (filter->priv_class) {
|
||||
*(const AVClass **)ctx->priv_data = filter->priv_class;
|
||||
av_opt_set_defaults(ctx->priv_data);
|
||||
}
|
||||
}
|
||||
/* Allocate AVBSFInternal; must happen after priv_data has been allocated
|
||||
* so that a filter->close needing priv_data is never called without. */
|
||||
|
||||
bsfi = av_mallocz(sizeof(*bsfi));
|
||||
if (!bsfi) {
|
||||
ret = AVERROR(ENOMEM);
|
||||
@@ -138,6 +124,19 @@ int av_bsf_alloc(const AVBitStreamFilter *filter, AVBSFContext **pctx)
|
||||
goto fail;
|
||||
}
|
||||
|
||||
/* allocate priv data and init private options */
|
||||
if (filter->priv_data_size) {
|
||||
ctx->priv_data = av_mallocz(filter->priv_data_size);
|
||||
if (!ctx->priv_data) {
|
||||
ret = AVERROR(ENOMEM);
|
||||
goto fail;
|
||||
}
|
||||
if (filter->priv_class) {
|
||||
*(const AVClass **)ctx->priv_data = filter->priv_class;
|
||||
av_opt_set_defaults(ctx->priv_data);
|
||||
}
|
||||
}
|
||||
|
||||
*pctx = ctx;
|
||||
return 0;
|
||||
fail:
|
||||
|
||||
+3
-14
@@ -37,7 +37,7 @@ static int cbs_av1_read_uvlc(CodedBitstreamContext *ctx, GetBitContext *gbc,
|
||||
position = get_bits_count(gbc);
|
||||
|
||||
zeroes = 0;
|
||||
while (zeroes < 32) {
|
||||
while (1) {
|
||||
if (get_bits_left(gbc) < 1) {
|
||||
av_log(ctx->log_ctx, AV_LOG_ERROR, "Invalid uvlc code at "
|
||||
"%s: bitstream ended.\n", name);
|
||||
@@ -50,18 +50,7 @@ static int cbs_av1_read_uvlc(CodedBitstreamContext *ctx, GetBitContext *gbc,
|
||||
}
|
||||
|
||||
if (zeroes >= 32) {
|
||||
// The spec allows at least thirty-two zero bits followed by a
|
||||
// one to mean 2^32-1, with no constraint on the number of
|
||||
// zeroes. The libaom reference decoder does not match this,
|
||||
// instead reading thirty-two zeroes but not the following one
|
||||
// to mean 2^32-1. These two interpretations are incompatible
|
||||
// and other implementations may follow one or the other.
|
||||
// Therefore we reject thirty-two zeroes because the intended
|
||||
// behaviour is not clear.
|
||||
av_log(ctx->log_ctx, AV_LOG_ERROR, "Thirty-two zero bits in "
|
||||
"%s uvlc code: considered invalid due to conflicting "
|
||||
"standard and reference decoder behaviour.\n", name);
|
||||
return AVERROR_INVALIDDATA;
|
||||
value = MAX_UINT_BITS(32);
|
||||
} else {
|
||||
if (get_bits_left(gbc) < zeroes) {
|
||||
av_log(ctx->log_ctx, AV_LOG_ERROR, "Invalid uvlc code at "
|
||||
@@ -390,7 +379,7 @@ static int cbs_av1_write_increment(CodedBitstreamContext *ctx, PutBitContext *pb
|
||||
}
|
||||
|
||||
if (len > 0)
|
||||
put_bits(pbc, len, (1U << len) - 1 - (value != range_max));
|
||||
put_bits(pbc, len, (1 << len) - 1 - (value != range_max));
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -355,7 +355,7 @@ static int FUNC(set_frame_refs)(CodedBitstreamContext *ctx, RWContext *rw,
|
||||
AV1_REF_FRAME_ALTREF2, AV1_REF_FRAME_ALTREF
|
||||
};
|
||||
int8_t ref_frame_idx[AV1_REFS_PER_FRAME], used_frame[AV1_NUM_REF_FRAMES];
|
||||
int16_t shifted_order_hints[AV1_NUM_REF_FRAMES];
|
||||
int8_t shifted_order_hints[AV1_NUM_REF_FRAMES];
|
||||
int cur_frame_hint, latest_order_hint, earliest_order_hint, ref;
|
||||
int i, j;
|
||||
|
||||
|
||||
@@ -728,7 +728,7 @@ static int FUNC(sps_scc_extension)(CodedBitstreamContext *ctx, RWContext *rw,
|
||||
|
||||
flag(sps_palette_predictor_initializer_present_flag);
|
||||
if (current->sps_palette_predictor_initializer_present_flag) {
|
||||
ue(sps_num_palette_predictor_initializer_minus1, 0, 127);
|
||||
ue(sps_num_palette_predictor_initializer_minus1, 0, 128);
|
||||
for (comp = 0; comp < (current->chroma_format_idc ? 3 : 1); comp++) {
|
||||
int bit_depth = comp == 0 ? current->bit_depth_luma_minus8 + 8
|
||||
: current->bit_depth_chroma_minus8 + 8;
|
||||
|
||||
@@ -166,13 +166,13 @@ static int cbs_jpeg_split_fragment(CodedBitstreamContext *ctx,
|
||||
}
|
||||
} else {
|
||||
i = start;
|
||||
if (i > frag->data_size - 2) {
|
||||
if (i + 2 > frag->data_size) {
|
||||
av_log(ctx->log_ctx, AV_LOG_ERROR, "Invalid JPEG image: "
|
||||
"truncated at %02x marker.\n", marker);
|
||||
return AVERROR_INVALIDDATA;
|
||||
}
|
||||
length = AV_RB16(frag->data + i);
|
||||
if (length > frag->data_size - i) {
|
||||
if (i + length > frag->data_size) {
|
||||
av_log(ctx->log_ctx, AV_LOG_ERROR, "Invalid JPEG image: "
|
||||
"truncated at %02x marker segment.\n", marker);
|
||||
return AVERROR_INVALIDDATA;
|
||||
|
||||
@@ -422,7 +422,7 @@ static int cbs_vp9_split_fragment(CodedBitstreamContext *ctx,
|
||||
superframe_header = frag->data[frag->data_size - 1];
|
||||
|
||||
if ((superframe_header & 0xe0) == 0xc0) {
|
||||
VP9RawSuperframeIndex sfi = {0};
|
||||
VP9RawSuperframeIndex sfi;
|
||||
GetBitContext gbc;
|
||||
size_t index_size, pos;
|
||||
int i;
|
||||
|
||||
@@ -239,7 +239,7 @@ static void cdg_scroll(CDGraphicsContext *cc, uint8_t *data,
|
||||
for (y = FFMAX(0, vinc); y < FFMIN(CDG_FULL_HEIGHT + vinc, CDG_FULL_HEIGHT); y++)
|
||||
memcpy(out + FFMAX(0, hinc) + stride * y,
|
||||
in + FFMAX(0, hinc) - hinc + (y - vinc) * stride,
|
||||
FFABS(stride) - FFABS(hinc));
|
||||
FFMIN(stride + hinc, stride));
|
||||
|
||||
if (vinc > 0)
|
||||
cdg_fill_wrapper(0, 0, out,
|
||||
|
||||
@@ -78,7 +78,7 @@ int64_t ff_dot_product(const int16_t *a, const int16_t *b, int length);
|
||||
*
|
||||
* @return value << offset, if offset>=0; value >> -offset - otherwise
|
||||
*/
|
||||
static inline unsigned bidir_sal(unsigned value, int offset)
|
||||
static inline int bidir_sal(int value, int offset)
|
||||
{
|
||||
if(offset < 0) return value >> -offset;
|
||||
else return value << offset;
|
||||
|
||||
+17
-73
@@ -221,7 +221,6 @@ static void free_buffers(CFHDContext *s)
|
||||
int i, j;
|
||||
|
||||
for (i = 0; i < FF_ARRAY_ELEMS(s->plane); i++) {
|
||||
Plane *p = &s->plane[i];
|
||||
av_freep(&s->plane[i].idwt_buf);
|
||||
av_freep(&s->plane[i].idwt_tmp);
|
||||
s->plane[i].idwt_size = 0;
|
||||
@@ -231,16 +230,9 @@ static void free_buffers(CFHDContext *s)
|
||||
|
||||
for (j = 0; j < 10; j++)
|
||||
s->plane[i].l_h[j] = NULL;
|
||||
|
||||
for (j = 0; j < DWT_LEVELS_3D; j++)
|
||||
p->band[j][0].read_ok =
|
||||
p->band[j][1].read_ok =
|
||||
p->band[j][2].read_ok =
|
||||
p->band[j][3].read_ok = 0;
|
||||
}
|
||||
s->a_height = 0;
|
||||
s->a_width = 0;
|
||||
s->a_transform_type = INT_MIN;
|
||||
}
|
||||
|
||||
static int alloc_buffers(AVCodecContext *avctx)
|
||||
@@ -274,9 +266,6 @@ static int alloc_buffers(AVCodecContext *avctx)
|
||||
int height = (i || bayer) ? s->coded_height >> chroma_y_shift : s->coded_height;
|
||||
ptrdiff_t stride = (FFALIGN(width / 8, 8) + 64) * 8;
|
||||
|
||||
if ((ret = av_image_check_size2(stride, height, avctx->max_pixels, s->coded_format, 0, avctx)) < 0)
|
||||
return ret;
|
||||
|
||||
if (chroma_y_shift && !bayer)
|
||||
height = FFALIGN(height / 8, 2) * 8;
|
||||
s->plane[i].width = width;
|
||||
@@ -367,7 +356,6 @@ static int alloc_buffers(AVCodecContext *avctx)
|
||||
}
|
||||
}
|
||||
|
||||
s->a_transform_type = s->transform_type;
|
||||
s->a_height = s->coded_height;
|
||||
s->a_width = s->coded_width;
|
||||
s->a_format = s->coded_format;
|
||||
@@ -639,7 +627,7 @@ static int cfhd_decode(AVCodecContext *avctx, void *data, int *got_frame,
|
||||
} else
|
||||
av_log(avctx, AV_LOG_DEBUG, "Unknown tag %i data %x\n", tag, data);
|
||||
|
||||
if (tag == BitstreamMarker && data == CoefficientSegment &&
|
||||
if (tag == BitstreamMarker && data == 0xf0f &&
|
||||
s->coded_format != AV_PIX_FMT_NONE) {
|
||||
int lowpass_height = s->plane[s->channel_num].band[0][0].height;
|
||||
int lowpass_width = s->plane[s->channel_num].band[0][0].width;
|
||||
@@ -667,8 +655,7 @@ static int cfhd_decode(AVCodecContext *avctx, void *data, int *got_frame,
|
||||
s->coded_height = s->a_height;
|
||||
|
||||
if (s->a_width != s->coded_width || s->a_height != s->coded_height ||
|
||||
s->a_format != s->coded_format ||
|
||||
s->transform_type != s->a_transform_type) {
|
||||
s->a_format != s->coded_format) {
|
||||
free_buffers(s);
|
||||
if ((ret = alloc_buffers(avctx)) < 0) {
|
||||
free_buffers(s);
|
||||
@@ -708,26 +695,14 @@ static int cfhd_decode(AVCodecContext *avctx, void *data, int *got_frame,
|
||||
|
||||
if (s->subband_num_actual == 255)
|
||||
goto finish;
|
||||
|
||||
if (tag == BitstreamMarker && data == CoefficientSegment || tag == BandHeader || tag == BandSecondPass || s->peak.level)
|
||||
if (s->transform_type != s->a_transform_type)
|
||||
return AVERROR_PATCHWELCOME;
|
||||
|
||||
coeff_data = s->plane[s->channel_num].subband[s->subband_num_actual];
|
||||
|
||||
/* Lowpass coefficients */
|
||||
if (tag == BitstreamMarker && data == CoefficientSegment) {
|
||||
int lowpass_height, lowpass_width, lowpass_a_height, lowpass_a_width;
|
||||
|
||||
if (!s->a_width || !s->a_height) {
|
||||
ret = AVERROR_INVALIDDATA;
|
||||
goto end;
|
||||
}
|
||||
|
||||
lowpass_height = s->plane[s->channel_num].band[0][0].height;
|
||||
lowpass_width = s->plane[s->channel_num].band[0][0].width;
|
||||
lowpass_a_height = s->plane[s->channel_num].band[0][0].a_height;
|
||||
lowpass_a_width = s->plane[s->channel_num].band[0][0].a_width;
|
||||
if (tag == BitstreamMarker && data == 0xf0f && s->a_width && s->a_height) {
|
||||
int lowpass_height = s->plane[s->channel_num].band[0][0].height;
|
||||
int lowpass_width = s->plane[s->channel_num].band[0][0].width;
|
||||
int lowpass_a_height = s->plane[s->channel_num].band[0][0].a_height;
|
||||
int lowpass_a_width = s->plane[s->channel_num].band[0][0].a_width;
|
||||
|
||||
if (lowpass_width < 3 ||
|
||||
lowpass_width > lowpass_a_width) {
|
||||
@@ -774,30 +749,20 @@ static int cfhd_decode(AVCodecContext *avctx, void *data, int *got_frame,
|
||||
lowpass_width * sizeof(*coeff_data));
|
||||
}
|
||||
|
||||
s->plane[s->channel_num].band[0][0].read_ok = 1;
|
||||
|
||||
av_log(avctx, AV_LOG_DEBUG, "Lowpass coefficients %d\n", lowpass_width * lowpass_height);
|
||||
}
|
||||
|
||||
av_assert0(s->subband_num_actual != 255);
|
||||
if (tag == BandHeader || tag == BandSecondPass) {
|
||||
int highpass_height, highpass_width, highpass_a_width, highpass_a_height, highpass_stride, a_expected;
|
||||
if ((tag == BandHeader || tag == BandSecondPass) && s->subband_num_actual != 255 && s->a_width && s->a_height) {
|
||||
int highpass_height = s->plane[s->channel_num].band[s->level][s->subband_num].height;
|
||||
int highpass_width = s->plane[s->channel_num].band[s->level][s->subband_num].width;
|
||||
int highpass_a_width = s->plane[s->channel_num].band[s->level][s->subband_num].a_width;
|
||||
int highpass_a_height = s->plane[s->channel_num].band[s->level][s->subband_num].a_height;
|
||||
int highpass_stride = s->plane[s->channel_num].band[s->level][s->subband_num].stride;
|
||||
int expected;
|
||||
int a_expected = highpass_a_height * highpass_a_width;
|
||||
int level, run, coeff;
|
||||
int count = 0, bytes;
|
||||
|
||||
if (!s->a_width || !s->a_height) {
|
||||
ret = AVERROR_INVALIDDATA;
|
||||
goto end;
|
||||
}
|
||||
|
||||
highpass_height = s->plane[s->channel_num].band[s->level][s->subband_num].height;
|
||||
highpass_width = s->plane[s->channel_num].band[s->level][s->subband_num].width;
|
||||
highpass_a_width = s->plane[s->channel_num].band[s->level][s->subband_num].a_width;
|
||||
highpass_a_height = s->plane[s->channel_num].band[s->level][s->subband_num].a_height;
|
||||
highpass_stride = s->plane[s->channel_num].band[s->level][s->subband_num].stride;
|
||||
a_expected = highpass_a_height * highpass_a_width;
|
||||
|
||||
if (!got_buffer) {
|
||||
av_log(avctx, AV_LOG_ERROR, "No end of header tag found\n");
|
||||
ret = AVERROR(EINVAL);
|
||||
@@ -846,7 +811,7 @@ static int cfhd_decode(AVCodecContext *avctx, void *data, int *got_frame,
|
||||
const uint16_t q = s->quantisation;
|
||||
|
||||
for (i = 0; i < run; i++) {
|
||||
*coeff_data |= coeff * 256U;
|
||||
*coeff_data |= coeff * 256;
|
||||
*coeff_data++ *= q;
|
||||
}
|
||||
} else {
|
||||
@@ -877,7 +842,7 @@ static int cfhd_decode(AVCodecContext *avctx, void *data, int *got_frame,
|
||||
const uint16_t q = s->quantisation;
|
||||
|
||||
for (i = 0; i < run; i++) {
|
||||
*coeff_data |= coeff * 256U;
|
||||
*coeff_data |= coeff * 256;
|
||||
*coeff_data++ *= q;
|
||||
}
|
||||
} else {
|
||||
@@ -908,7 +873,6 @@ static int cfhd_decode(AVCodecContext *avctx, void *data, int *got_frame,
|
||||
bytestream2_seek(&gb, bytes, SEEK_CUR);
|
||||
|
||||
av_log(avctx, AV_LOG_DEBUG, "End subband coeffs %i extra %i\n", count, count - expected);
|
||||
s->plane[s->channel_num].band[s->level][s->subband_num].read_ok = 1;
|
||||
finish:
|
||||
if (s->subband_num_actual != 255)
|
||||
s->codebook = 0;
|
||||
@@ -924,7 +888,6 @@ finish:
|
||||
ff_thread_finish_setup(avctx);
|
||||
|
||||
if (!s->a_width || !s->a_height || s->a_format == AV_PIX_FMT_NONE ||
|
||||
s->a_transform_type == INT_MIN ||
|
||||
s->coded_width || s->coded_height || s->coded_format != AV_PIX_FMT_NONE) {
|
||||
av_log(avctx, AV_LOG_ERROR, "Invalid dimensions\n");
|
||||
ret = AVERROR(EINVAL);
|
||||
@@ -937,22 +900,6 @@ finish:
|
||||
goto end;
|
||||
}
|
||||
|
||||
for (plane = 0; plane < s->planes; plane++) {
|
||||
int o, level;
|
||||
|
||||
for (level = 0; level < (s->transform_type == 0 ? DWT_LEVELS : DWT_LEVELS_3D) ; level++) {
|
||||
if (s->transform_type == 2)
|
||||
if (level == 2 || level == 5)
|
||||
continue;
|
||||
for (o = !!level; o < 4 ; o++) {
|
||||
if (!s->plane[plane].band[level][o].read_ok) {
|
||||
ret = AVERROR_INVALIDDATA;
|
||||
goto end;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (s->transform_type == 0 && s->sample_type != 1) {
|
||||
for (plane = 0; plane < s->planes && !ret; plane++) {
|
||||
/* level 1 */
|
||||
@@ -1434,14 +1381,12 @@ static int update_thread_context(AVCodecContext *dst, const AVCodecContext *src)
|
||||
if (pdst->plane[0].idwt_size != psrc->plane[0].idwt_size ||
|
||||
pdst->a_format != psrc->a_format ||
|
||||
pdst->a_width != psrc->a_width ||
|
||||
pdst->a_height != psrc->a_height ||
|
||||
pdst->a_transform_type != psrc->a_transform_type)
|
||||
pdst->a_height != psrc->a_height)
|
||||
free_buffers(pdst);
|
||||
|
||||
pdst->a_format = psrc->a_format;
|
||||
pdst->a_width = psrc->a_width;
|
||||
pdst->a_height = psrc->a_height;
|
||||
pdst->a_transform_type = psrc->a_transform_type;
|
||||
pdst->transform_type = psrc->transform_type;
|
||||
pdst->progressive = psrc->progressive;
|
||||
pdst->planes = psrc->planes;
|
||||
@@ -1450,7 +1395,6 @@ static int update_thread_context(AVCodecContext *dst, const AVCodecContext *src)
|
||||
pdst->coded_width = pdst->a_width;
|
||||
pdst->coded_height = pdst->a_height;
|
||||
pdst->coded_format = pdst->a_format;
|
||||
pdst->transform_type = pdst->a_transform_type;
|
||||
ret = alloc_buffers(dst);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
@@ -95,15 +95,6 @@ enum CFHDParam {
|
||||
ChannelHeight = 105,
|
||||
};
|
||||
|
||||
enum CFHDSegment {
|
||||
LowPassSegment = 0x1a4a,
|
||||
LowPassEndSegment = 0x1b4b,
|
||||
HighPassSegment = 0x0d0d,
|
||||
BandSegment = 0x0e0e,
|
||||
HighPassEndSegment = 0x0c0c,
|
||||
CoefficientSegment = 0x0f0f,
|
||||
};
|
||||
|
||||
#define VLC_BITS 9
|
||||
#define SUBBAND_COUNT 10
|
||||
#define SUBBAND_COUNT_3D 17
|
||||
@@ -123,7 +114,6 @@ typedef struct SubBand {
|
||||
int width;
|
||||
int a_height;
|
||||
int height;
|
||||
int8_t read_ok;
|
||||
} SubBand;
|
||||
|
||||
typedef struct Plane {
|
||||
@@ -175,7 +165,6 @@ typedef struct CFHDContext {
|
||||
int a_width;
|
||||
int a_height;
|
||||
int a_format;
|
||||
int a_transform_type;
|
||||
|
||||
int bpc; // bits per channel/component
|
||||
int channel_cnt;
|
||||
|
||||
+7
-12
@@ -258,11 +258,6 @@ static av_cold int cfhd_encode_init(AVCodecContext *avctx)
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
if (avctx->height < 32) {
|
||||
av_log(avctx, AV_LOG_ERROR, "Height must be >= 32.\n");
|
||||
return AVERROR_INVALIDDATA;
|
||||
}
|
||||
|
||||
if (avctx->width & 15) {
|
||||
av_log(avctx, AV_LOG_ERROR, "Width must be multiple of 16.\n");
|
||||
return AVERROR_INVALIDDATA;
|
||||
@@ -552,7 +547,7 @@ static int cfhd_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
|
||||
width, height * 2);
|
||||
}
|
||||
|
||||
ret = ff_alloc_packet2(avctx, pkt, 256LL + s->planes * (4LL * avctx->width * (avctx->height + 15) + 2048LL), 0);
|
||||
ret = ff_alloc_packet2(avctx, pkt, 64LL + s->planes * (2LL * avctx->width * avctx->height + 1000LL), 0);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
@@ -624,7 +619,7 @@ static int cfhd_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
|
||||
}
|
||||
|
||||
bytestream2_put_be16(pby, BitstreamMarker);
|
||||
bytestream2_put_be16(pby, LowPassSegment);
|
||||
bytestream2_put_be16(pby, 0x1a4a);
|
||||
|
||||
pos = bytestream2_tell_p(pby);
|
||||
|
||||
@@ -650,7 +645,7 @@ static int cfhd_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
|
||||
bytestream2_put_be16(pby, 16);
|
||||
|
||||
bytestream2_put_be16(pby, BitstreamMarker);
|
||||
bytestream2_put_be16(pby, CoefficientSegment);
|
||||
bytestream2_put_be16(pby, 0x0f0f);
|
||||
|
||||
for (int i = 0; i < height; i++) {
|
||||
for (int j = 0; j < width; j++)
|
||||
@@ -659,7 +654,7 @@ static int cfhd_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
|
||||
}
|
||||
|
||||
bytestream2_put_be16(pby, BitstreamMarker);
|
||||
bytestream2_put_be16(pby, LowPassEndSegment);
|
||||
bytestream2_put_be16(pby, 0x1b4b);
|
||||
|
||||
for (int l = 0; l < 3; l++) {
|
||||
for (int i = 0; i < 3; i++) {
|
||||
@@ -674,7 +669,7 @@ static int cfhd_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
|
||||
int height = s->plane[p].band[l][0].height;
|
||||
|
||||
bytestream2_put_be16(pby, BitstreamMarker);
|
||||
bytestream2_put_be16(pby, HighPassSegment);
|
||||
bytestream2_put_be16(pby, 0x0d0d);
|
||||
|
||||
bytestream2_put_be16(pby, WaveletType);
|
||||
bytestream2_put_be16(pby, 3 + 2 * (l == 2));
|
||||
@@ -711,7 +706,7 @@ static int cfhd_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
|
||||
int count = 0, padd = 0;
|
||||
|
||||
bytestream2_put_be16(pby, BitstreamMarker);
|
||||
bytestream2_put_be16(pby, BandSegment);
|
||||
bytestream2_put_be16(pby, 0x0e0e);
|
||||
|
||||
bytestream2_put_be16(pby, SubbandNumber);
|
||||
bytestream2_put_be16(pby, i + 1);
|
||||
@@ -781,7 +776,7 @@ static int cfhd_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
|
||||
}
|
||||
|
||||
bytestream2_put_be16(pby, BitstreamMarker);
|
||||
bytestream2_put_be16(pby, HighPassEndSegment);
|
||||
bytestream2_put_be16(pby, 0x0c0c);
|
||||
}
|
||||
|
||||
s->plane[p].size = bytestream2_tell_p(pby) - pos;
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user