Compare commits
1 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| c5079bf3bc |
@@ -1,23 +0,0 @@
|
||||
exclude: ^tests/ref/
|
||||
|
||||
repos:
|
||||
- repo: https://github.com/pre-commit/pre-commit-hooks
|
||||
rev: v5.0.0
|
||||
hooks:
|
||||
- id: check-case-conflict
|
||||
- id: check-executables-have-shebangs
|
||||
- id: check-illegal-windows-names
|
||||
- id: check-shebang-scripts-are-executable
|
||||
- id: check-yaml
|
||||
- id: end-of-file-fixer
|
||||
- id: fix-byte-order-marker
|
||||
- id: mixed-line-ending
|
||||
- id: trailing-whitespace
|
||||
- repo: local
|
||||
hooks:
|
||||
- id: aarch64-asm-indent
|
||||
name: fix aarch64 assembly indentation
|
||||
files: ^.*/aarch64/.*\.S$
|
||||
language: script
|
||||
entry: ./tools/check_arm_indent.sh --apply
|
||||
pass_filenames: false
|
||||
@@ -1,29 +0,0 @@
|
||||
name: Lint
|
||||
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- release/4.3
|
||||
pull_request:
|
||||
|
||||
jobs:
|
||||
lint:
|
||||
name: Pre-Commit
|
||||
runs-on: utilities
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
- name: Install pre-commit CI
|
||||
id: install
|
||||
run: |
|
||||
python3 -m venv ~/pre-commit
|
||||
~/pre-commit/bin/pip install --upgrade pip setuptools
|
||||
~/pre-commit/bin/pip install pre-commit
|
||||
echo "envhash=$({ python3 --version && cat .forgejo/pre-commit/config.yaml; } | sha256sum | cut -d' ' -f1)" >> $FORGEJO_OUTPUT
|
||||
- name: Cache
|
||||
uses: actions/cache@v4
|
||||
with:
|
||||
path: ~/.cache/pre-commit
|
||||
key: pre-commit-${{ steps.install.outputs.envhash }}
|
||||
- name: Run pre-commit CI
|
||||
run: ~/pre-commit/bin/pre-commit run -c .forgejo/pre-commit/config.yaml --show-diff-on-failure --color=always --all-files
|
||||
@@ -1,80 +0,0 @@
|
||||
name: Test
|
||||
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- release/4.3
|
||||
pull_request:
|
||||
|
||||
jobs:
|
||||
run_fate:
|
||||
name: Fate (${{ matrix.runner }}, ${{ matrix.shared }}, ${{ matrix.bits }} bit)
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
runner: [linux-aarch64]
|
||||
shared: ['static']
|
||||
bits: ['64']
|
||||
include:
|
||||
- runner: linux-amd64
|
||||
shared: 'static'
|
||||
bits: '32'
|
||||
- runner: linux-amd64
|
||||
shared: 'shared'
|
||||
bits: '64'
|
||||
runs-on: ${{ matrix.runner }}
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
- name: Configure
|
||||
run: |
|
||||
./configure --enable-gpl --enable-nonfree --enable-memory-poisoning --assert-level=2 \
|
||||
$([ "${{ matrix.bits }}" != "32" ] || echo --arch=x86_32 --extra-cflags=-m32 --extra-cxxflags=-m32 --extra-ldflags=-m32) \
|
||||
$([ "${{ matrix.shared }}" != "shared" ] || echo --enable-shared --disable-static) \
|
||||
|| CFGRES=$? && CFGRES=$?
|
||||
cat ffbuild/config.log
|
||||
exit $CFGRES
|
||||
- name: Build
|
||||
run: make -j$(nproc)
|
||||
- name: Restore Cached Fate-Suite
|
||||
id: cache
|
||||
uses: actions/cache/restore@v4
|
||||
with:
|
||||
path: fate-suite
|
||||
key: fate-suite
|
||||
restore-keys: |
|
||||
fate-suite-
|
||||
- name: Sync Fate-Suite
|
||||
id: fate
|
||||
run: |
|
||||
make fate-rsync SAMPLES=$PWD/fate-suite
|
||||
echo "hash=$(find fate-suite -type f -printf "%P %s %T@\n" | sort | sha256sum | cut -d' ' -f1)" >> $FORGEJO_OUTPUT
|
||||
- name: Cache Fate-Suite
|
||||
uses: actions/cache/save@v4
|
||||
if: ${{ format('fate-suite-{0}', steps.fate.outputs.hash) != steps.cache.outputs.cache-matched-key }}
|
||||
with:
|
||||
path: fate-suite
|
||||
key: fate-suite-${{ steps.fate.outputs.hash }}
|
||||
- name: Run Fate
|
||||
run: LD_LIBRARY_PATH="$(printf "%s:" "$PWD"/lib*)$PWD" make fate fate-build SAMPLES=$PWD/fate-suite -j$(nproc)
|
||||
compile_only:
|
||||
name: Fate (Win64, Build-Only)
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
image: ["ghcr.io/btbn/ffmpeg-builds/win64-gpl-4.3:latest"]
|
||||
runs-on: linux-amd64
|
||||
container: ${{ matrix.image }}
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
- name: Configure
|
||||
run: |
|
||||
./configure --pkg-config-flags="--static" $FFBUILD_TARGET_FLAGS $FF_CONFIGURE \
|
||||
--cc="$CC" --cxx="$CXX" --ar="$AR" --ranlib="$RANLIB" --nm="$NM" \
|
||||
--extra-cflags="$FF_CFLAGS" --extra-cxxflags="$FF_CXXFLAGS" \
|
||||
--extra-libs="$FF_LIBS" --extra-ldflags="$FF_LDFLAGS" --extra-ldexeflags="$FF_LDEXEFLAGS"
|
||||
- name: Build
|
||||
run: make -j$(nproc)
|
||||
- name: Run Fate
|
||||
run: make -j$(nproc) fate-build
|
||||
+9
-9
@@ -55,7 +55,7 @@ modified by someone else and passed on, the recipients should know
|
||||
that what they have is not the original version, so that the original
|
||||
author's reputation will not be affected by problems that might be
|
||||
introduced by others.
|
||||
|
||||
|
||||
Finally, software patents pose a constant threat to the existence of
|
||||
any free program. We wish to make sure that a company cannot
|
||||
effectively restrict the users of a free program by obtaining a
|
||||
@@ -111,7 +111,7 @@ modification follow. Pay close attention to the difference between a
|
||||
"work based on the library" and a "work that uses the library". The
|
||||
former contains code derived from the library, whereas the latter must
|
||||
be combined with the library in order to run.
|
||||
|
||||
|
||||
GNU LESSER GENERAL PUBLIC LICENSE
|
||||
TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
|
||||
|
||||
@@ -158,7 +158,7 @@ Library.
|
||||
You may charge a fee for the physical act of transferring a copy,
|
||||
and you may at your option offer warranty protection in exchange for a
|
||||
fee.
|
||||
|
||||
|
||||
2. You may modify your copy or copies of the Library or any portion
|
||||
of it, thus forming a work based on the Library, and copy and
|
||||
distribute such modifications or work under the terms of Section 1
|
||||
@@ -216,7 +216,7 @@ instead of to this License. (If a newer version than version 2 of the
|
||||
ordinary GNU General Public License has appeared, then you can specify
|
||||
that version instead if you wish.) Do not make any other change in
|
||||
these notices.
|
||||
|
||||
|
||||
Once this change is made in a given copy, it is irreversible for
|
||||
that copy, so the ordinary GNU General Public License applies to all
|
||||
subsequent copies and derivative works made from that copy.
|
||||
@@ -267,7 +267,7 @@ Library will still fall under Section 6.)
|
||||
distribute the object code for the work under the terms of Section 6.
|
||||
Any executables containing that work also fall under Section 6,
|
||||
whether or not they are linked directly with the Library itself.
|
||||
|
||||
|
||||
6. As an exception to the Sections above, you may also combine or
|
||||
link a "work that uses the Library" with the Library to produce a
|
||||
work containing portions of the Library, and distribute that work
|
||||
@@ -329,7 +329,7 @@ restrictions of other proprietary libraries that do not normally
|
||||
accompany the operating system. Such a contradiction means you cannot
|
||||
use both them and the Library together in an executable that you
|
||||
distribute.
|
||||
|
||||
|
||||
7. You may place library facilities that are a work based on the
|
||||
Library side-by-side in a single library together with other library
|
||||
facilities not covered by this License, and distribute such a combined
|
||||
@@ -370,7 +370,7 @@ subject to these terms and conditions. You may not impose any further
|
||||
restrictions on the recipients' exercise of the rights granted herein.
|
||||
You are not responsible for enforcing compliance by third parties with
|
||||
this License.
|
||||
|
||||
|
||||
11. If, as a consequence of a court judgment or allegation of patent
|
||||
infringement or for any other reason (not limited to patent issues),
|
||||
conditions are imposed on you (whether by court order, agreement or
|
||||
@@ -422,7 +422,7 @@ conditions either of that version or of any later version published by
|
||||
the Free Software Foundation. If the Library does not specify a
|
||||
license version number, you may choose any version ever published by
|
||||
the Free Software Foundation.
|
||||
|
||||
|
||||
14. If you wish to incorporate parts of the Library into other free
|
||||
programs whose distribution conditions are incompatible with these,
|
||||
write to the author to ask for permission. For software which is
|
||||
@@ -456,7 +456,7 @@ SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
|
||||
DAMAGES.
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
||||
|
||||
How to Apply These Terms to Your New Libraries
|
||||
|
||||
If you develop a new library, and you want it to be of the greatest
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
See the Git history of the project (https://git.ffmpeg.org/ffmpeg) to
|
||||
See the Git history of the project (git://source.ffmpeg.org/ffmpeg) to
|
||||
get the names of people who have contributed to FFmpeg.
|
||||
|
||||
To check the log, you can type the command "git log" in the FFmpeg
|
||||
source directory, or browse the online repository at
|
||||
https://git.ffmpeg.org/ffmpeg
|
||||
http://source.ffmpeg.org.
|
||||
|
||||
@@ -15,11 +15,3 @@ NOTICE
|
||||
------
|
||||
|
||||
- Non system dependencies (e.g. libx264, libvpx) are disabled by default.
|
||||
|
||||
NOTICE for Package Maintainers
|
||||
------------------------------
|
||||
|
||||
- It is recommended to build FFmpeg twice, first with minimal external dependencies so
|
||||
that 3rd party packages, which depend on FFmpegs libavutil/libavfilter/libavcodec/libavformat
|
||||
can then be built. And last build FFmpeg with full dependancies (which may in turn depend on
|
||||
some of these 3rd party packages). This avoids circular dependencies during build.
|
||||
|
||||
+3
-6
@@ -577,12 +577,10 @@ wm4
|
||||
Releases
|
||||
========
|
||||
|
||||
7.0 Michael Niedermayer
|
||||
6.1 Michael Niedermayer
|
||||
5.1 Michael Niedermayer
|
||||
4.4 Michael Niedermayer
|
||||
3.4 Michael Niedermayer
|
||||
2.8 Michael Niedermayer
|
||||
2.7 Michael Niedermayer
|
||||
2.6 Michael Niedermayer
|
||||
2.5 Michael Niedermayer
|
||||
|
||||
If you want to maintain an older release, please contact us
|
||||
|
||||
@@ -612,7 +610,6 @@ Loren Merritt ABD9 08F4 C920 3F65 D8BE 35D7 1540 DAA7 060F 56DE
|
||||
Lou Logan (llogan) 7D68 DC73 CBEF EABB 671A B6CF 621C 2E28 82F8 DC3A
|
||||
Lynne FE50 139C 6805 72CA FD52 1F8D A2FE A5F0 3F03 4464
|
||||
Michael Niedermayer 9FF2 128B 147E F673 0BAD F133 611E C787 040B 0FAB
|
||||
DD1E C9E8 DE08 5C62 9B3E 1846 B18E 8928 B394 8D64
|
||||
Nicolas George 24CE 01CE 9ACC 5CEB 74D8 8D9D B063 D997 36E5 4C93
|
||||
Nikolay Aleksandrov 8978 1D8C FB71 588E 4B27 EAA8 C4F0 B5FC E011 13B1
|
||||
Panagiotis Issaris 6571 13A3 33D9 3726 F728 AA98 F643 B12E ECF3 E029
|
||||
|
||||
@@ -1,15 +0,0 @@
|
||||
|
||||
┌────────────────────────────────────┐
|
||||
│ RELEASE NOTES for FFmpeg 4.3 "4:3" │
|
||||
└────────────────────────────────────┘
|
||||
|
||||
The FFmpeg Project proudly presents FFmpeg 4.3 "4:3", about 10
|
||||
months after the release of FFmpeg 4.2.
|
||||
|
||||
A complete Changelog is available at the root of the project, and the
|
||||
complete Git history on https://git.ffmpeg.org/gitweb/ffmpeg.git
|
||||
|
||||
We hope you will like this release as much as we enjoyed working on it, and
|
||||
as usual, if you have any questions about it, or any FFmpeg related topic,
|
||||
feel free to join us on the #ffmpeg IRC channel (on irc.libera.chat) or ask
|
||||
on the mailing-lists.
|
||||
@@ -532,7 +532,7 @@ die(){
|
||||
|
||||
If you think configure made a mistake, make sure you are using the latest
|
||||
version from Git. If the latest version fails, report the problem to the
|
||||
ffmpeg-user@ffmpeg.org mailing list or IRC #ffmpeg on irc.libera.chat.
|
||||
ffmpeg-user@ffmpeg.org mailing list or IRC #ffmpeg on irc.freenode.net.
|
||||
EOF
|
||||
if disabled logging; then
|
||||
cat <<EOF
|
||||
@@ -2330,7 +2330,6 @@ HAVE_LIST="
|
||||
opencl_vaapi_intel_media
|
||||
perl
|
||||
pod2man
|
||||
posix_ioctl
|
||||
texi2html
|
||||
"
|
||||
|
||||
@@ -3237,7 +3236,7 @@ librav1e_encoder_deps="librav1e"
|
||||
librav1e_encoder_select="extract_extradata_bsf"
|
||||
librsvg_decoder_deps="librsvg"
|
||||
libshine_encoder_deps="libshine"
|
||||
libshine_encoder_select="audio_frame_queue mpegaudioheader"
|
||||
libshine_encoder_select="audio_frame_queue"
|
||||
libspeex_decoder_deps="libspeex"
|
||||
libspeex_encoder_deps="libspeex"
|
||||
libspeex_encoder_select="audio_frame_queue"
|
||||
@@ -5331,7 +5330,6 @@ case $target_os in
|
||||
;;
|
||||
netbsd)
|
||||
disable symver
|
||||
enable section_data_rel_ro
|
||||
oss_indev_extralibs="-lossaudio"
|
||||
oss_outdev_extralibs="-lossaudio"
|
||||
enabled gcc || check_ldflags -Wl,-zmuldefs
|
||||
@@ -5350,7 +5348,6 @@ case $target_os in
|
||||
disable symver
|
||||
;;
|
||||
freebsd)
|
||||
enable section_data_rel_ro
|
||||
;;
|
||||
bsd/os)
|
||||
add_extralibs -lpoll -lgnugetopt
|
||||
@@ -6496,7 +6493,7 @@ fi
|
||||
|
||||
if enabled sdl2; then
|
||||
SDL2_CONFIG="${cross_prefix}sdl2-config"
|
||||
test_pkg_config sdl2 "sdl2 >= 2.0.1 sdl2 < 3.0.0" SDL_events.h SDL_PollEvent
|
||||
test_pkg_config sdl2 "sdl2 >= 2.0.1 sdl2 < 2.1.0" SDL_events.h SDL_PollEvent
|
||||
if disabled sdl2 && "${SDL2_CONFIG}" --version > /dev/null 2>&1; then
|
||||
sdl2_cflags=$("${SDL2_CONFIG}" --cflags)
|
||||
sdl2_extralibs=$("${SDL2_CONFIG}" --libs)
|
||||
@@ -6542,13 +6539,11 @@ perl -v > /dev/null 2>&1 && enable perl || disable perl
|
||||
pod2man --help > /dev/null 2>&1 && enable pod2man || disable pod2man
|
||||
rsync --help 2> /dev/null | grep -q 'contimeout' && enable rsync_contimeout || disable rsync_contimeout
|
||||
|
||||
check_headers linux/fb.h
|
||||
check_headers linux/videodev2.h
|
||||
test_code cc linux/videodev2.h "struct v4l2_frmsizeenum vfse; vfse.discrete.width = 0;" && enable_sanitized struct_v4l2_frmivalenum_discrete
|
||||
test_code cc sys/ioctl.h "int ioctl(int, int, ...)" && enable posix_ioctl
|
||||
|
||||
# check V4L2 codecs available in the API
|
||||
if enabled v4l2_m2m; then
|
||||
check_headers linux/fb.h
|
||||
check_headers linux/videodev2.h
|
||||
test_code cc linux/videodev2.h "struct v4l2_frmsizeenum vfse; vfse.discrete.width = 0;" && enable_sanitized struct_v4l2_frmivalenum_discrete
|
||||
check_cc v4l2_m2m linux/videodev2.h "int i = V4L2_CAP_VIDEO_M2M_MPLANE | V4L2_CAP_VIDEO_M2M | V4L2_BUF_FLAG_LAST;"
|
||||
check_cc vc1_v4l2_m2m linux/videodev2.h "int i = V4L2_PIX_FMT_VC1_ANNEX_G;"
|
||||
check_cc mpeg1_v4l2_m2m linux/videodev2.h "int i = V4L2_PIX_FMT_MPEG1;"
|
||||
@@ -6593,7 +6588,7 @@ enabled alsa && { check_pkg_config alsa alsa "alsa/asoundlib.h" snd_pcm_htimesta
|
||||
enabled libjack &&
|
||||
require_pkg_config libjack jack jack/jack.h jack_port_get_latency_range
|
||||
|
||||
enabled sndio && check_pkg_config sndio sndio sndio.h sio_open
|
||||
enabled sndio && check_lib sndio sndio.h sio_open -lsndio
|
||||
|
||||
if enabled libcdio; then
|
||||
check_pkg_config libcdio libcdio_paranoia "cdio/cdda.h cdio/paranoia.h" cdio_cddap_open ||
|
||||
@@ -6690,7 +6685,7 @@ enabled vulkan &&
|
||||
|
||||
if enabled x86; then
|
||||
case $target_os in
|
||||
freebsd|mingw32*|mingw64*|win32|win64|linux|cygwin*)
|
||||
mingw32*|mingw64*|win32|win64|linux|cygwin*)
|
||||
;;
|
||||
*)
|
||||
disable ffnvcodec cuvid nvdec nvenc
|
||||
@@ -7518,7 +7513,7 @@ cat > $TMPH <<EOF
|
||||
#define FFMPEG_CONFIG_H
|
||||
#define FFMPEG_CONFIGURATION "$(c_escape $FFMPEG_CONFIGURATION)"
|
||||
#define FFMPEG_LICENSE "$(c_escape $license)"
|
||||
#define CONFIG_THIS_YEAR 2025
|
||||
#define CONFIG_THIS_YEAR 2020
|
||||
#define FFMPEG_DATADIR "$(eval c_escape $datadir)"
|
||||
#define AVCONV_DATADIR "$(eval c_escape $datadir)"
|
||||
#define CC_IDENT "$(c_escape ${cc_ident:-Unknown compiler})"
|
||||
|
||||
+1
-1
@@ -38,7 +38,7 @@ PROJECT_NAME = FFmpeg
|
||||
# could be handy for archiving the generated documentation or if some version
|
||||
# control system is used.
|
||||
|
||||
PROJECT_NUMBER = 4.3.9
|
||||
PROJECT_NUMBER =
|
||||
|
||||
# Using the PROJECT_BRIEF tag one can provide an optional one line description
|
||||
# for a project that appears at the top of each page and should give viewer a
|
||||
|
||||
+2
-2
@@ -3,9 +3,9 @@
|
||||
The FFmpeg developers.
|
||||
|
||||
For details about the authorship, see the Git history of the project
|
||||
(https://git.ffmpeg.org/ffmpeg), e.g. by typing the command
|
||||
(git://source.ffmpeg.org/ffmpeg), e.g. by typing the command
|
||||
@command{git log} in the FFmpeg source directory, or browsing the
|
||||
online repository at @url{https://git.ffmpeg.org/ffmpeg}.
|
||||
online repository at @url{http://source.ffmpeg.org}.
|
||||
|
||||
Maintainers for the specific components are listed in the file
|
||||
@file{MAINTAINERS} in the source code tree.
|
||||
|
||||
Vendored
+1
-1
File diff suppressed because one or more lines are too long
@@ -63,3 +63,4 @@ make -j<num>
|
||||
make -k
|
||||
Continue build in case of errors, this is useful for the regression tests
|
||||
sometimes but note that it will still not run all reg tests.
|
||||
|
||||
|
||||
+1
-1
@@ -317,7 +317,7 @@ list are dropped. You may use the special @code{*} string to match all pages,
|
||||
or @code{subtitle} to match all subtitle pages.
|
||||
Default value is *.
|
||||
@item txt_default_region
|
||||
Set default character set used for decoding, a value between 0 and 87 (see
|
||||
Set default G0 character set used for decoding, a value between 0 and 80 (see
|
||||
ETS 300 706, Section 15, Table 32). Default value is -1, which does not
|
||||
override the libzvbi default. This option is needed for some legacy level 1.0
|
||||
transmissions which cannot signal the proper charset.
|
||||
|
||||
@@ -327,13 +327,6 @@ segment index to start live streams at (negative values are from the end).
|
||||
@item allowed_extensions
|
||||
',' separated list of file extensions that hls is allowed to access.
|
||||
|
||||
@item extension_picky
|
||||
This blocks disallowed extensions from probing
|
||||
It also requires all available segments to have matching extensions to the format
|
||||
except mpegts, which is always allowed.
|
||||
It is recommended to set the whitelists correctly instead of depending on extensions
|
||||
Enabled by default.
|
||||
|
||||
@item max_reload
|
||||
Maximum number of times a insufficient list is attempted to be reloaded.
|
||||
Default value is 1000.
|
||||
|
||||
@@ -762,25 +762,6 @@ In case you need finer control over how valgrind is invoked, use the
|
||||
@code{--target-exec='valgrind <your_custom_valgrind_options>} option in
|
||||
your configure line instead.
|
||||
|
||||
@anchor{Maintenance}
|
||||
@chapter Maintenance process
|
||||
|
||||
@anchor{MAINTAINERS}
|
||||
@section MAINTAINERS
|
||||
|
||||
The developers maintaining each part of the codebase are listed in @file{MAINTAINERS}.
|
||||
Being listed in @file{MAINTAINERS}, gives one the right to have git write access to
|
||||
the specific repository.
|
||||
|
||||
@anchor{Becoming a maintainer}
|
||||
@section Becoming a maintainer
|
||||
|
||||
People add themselves to @file{MAINTAINERS} by sending a patch like any other code
|
||||
change. These get reviewed by the community like any other patch. It is expected
|
||||
that, if someone has an objection to a new maintainer, she is willing to object
|
||||
in public with her full name and is willing to take over maintainership for the area.
|
||||
|
||||
|
||||
@anchor{Release process}
|
||||
@chapter Release process
|
||||
|
||||
|
||||
@@ -137,9 +137,11 @@ static int decode_packet(AVCodecContext *dec, const AVPacket *pkt)
|
||||
ret = output_audio_frame(frame);
|
||||
|
||||
av_frame_unref(frame);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
}
|
||||
|
||||
return ret;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int open_codec_context(int *stream_idx,
|
||||
|
||||
@@ -221,8 +221,10 @@ static int dec_enc(AVPacket *pkt, AVCodec *enc_codec)
|
||||
|
||||
fail:
|
||||
av_frame_free(&frame);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
}
|
||||
return ret;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int main(int argc, char **argv)
|
||||
|
||||
+2
-22
@@ -53,7 +53,7 @@ Most distribution and operating system provide a package for it.
|
||||
@section Cloning the source tree
|
||||
|
||||
@example
|
||||
git clone https://git.ffmpeg.org/ffmpeg.git <target>
|
||||
git clone git://source.ffmpeg.org/ffmpeg <target>
|
||||
@end example
|
||||
|
||||
This will put the FFmpeg sources into the directory @var{<target>}.
|
||||
@@ -187,18 +187,11 @@ to make sure you don't have untracked files or deletions.
|
||||
git add [-i|-p|-A] <filenames/dirnames>
|
||||
@end example
|
||||
|
||||
Make sure you have told Git your name, email address and GPG key
|
||||
Make sure you have told Git your name and email address
|
||||
|
||||
@example
|
||||
git config --global user.name "My Name"
|
||||
git config --global user.email my@@email.invalid
|
||||
git config --global user.signingkey ABCDEF0123245
|
||||
@end example
|
||||
|
||||
Enable signing all commits or use -S
|
||||
|
||||
@example
|
||||
git config --global commit.gpgsign true
|
||||
@end example
|
||||
|
||||
Use @option{--global} to set the global configuration for all your Git checkouts.
|
||||
@@ -400,19 +393,6 @@ git checkout -b svn_23456 $SHA1
|
||||
where @var{$SHA1} is the commit hash from the @command{git log} output.
|
||||
|
||||
|
||||
@chapter gpg key generation
|
||||
|
||||
If you have no gpg key yet, we recommend that you create a ed25519 based key as it
|
||||
is small, fast and secure. Especially it results in small signatures in git.
|
||||
|
||||
@example
|
||||
gpg --default-new-key-algo "ed25519/cert,sign+cv25519/encr" --quick-generate-key "human@@server.com"
|
||||
@end example
|
||||
|
||||
When generating a key, make sure the email specified matches the email used in git as some sites like
|
||||
github consider mismatches a reason to declare such commits unverified. After generating a key you
|
||||
can add it to the MAINTAINER file and upload it to a keyserver.
|
||||
|
||||
@chapter Pre-push checklist
|
||||
|
||||
Once you have a set of commits that you feel are ready for pushing,
|
||||
|
||||
@@ -157,3 +157,4 @@ PFD[32] would for example be signed 32 bit little-endian IEEE float
|
||||
@item XVID @tab non-compliant MPEG-4 generated by old Xvid
|
||||
@item XVIX @tab non-compliant MPEG-4 generated by old Xvid with interlacing bug
|
||||
@end multitable
|
||||
|
||||
|
||||
+19
-103
@@ -20,45 +20,8 @@
|
||||
# License along with FFmpeg; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
|
||||
# Texinfo 7.0 changed the syntax of various functions.
|
||||
# Provide a shim for older versions.
|
||||
sub ff_set_from_init_file($$) {
|
||||
my $key = shift;
|
||||
my $value = shift;
|
||||
if (exists &{'texinfo_set_from_init_file'}) {
|
||||
texinfo_set_from_init_file($key, $value);
|
||||
} else {
|
||||
set_from_init_file($key, $value);
|
||||
}
|
||||
}
|
||||
|
||||
sub ff_get_conf($) {
|
||||
my $key = shift;
|
||||
if (exists &{'texinfo_get_conf'}) {
|
||||
texinfo_get_conf($key);
|
||||
} else {
|
||||
get_conf($key);
|
||||
}
|
||||
}
|
||||
|
||||
sub get_formatting_function($$) {
|
||||
my $obj = shift;
|
||||
my $func = shift;
|
||||
|
||||
my $sub = $obj->can('formatting_function');
|
||||
if ($sub) {
|
||||
return $obj->formatting_function($func);
|
||||
} else {
|
||||
return $obj->{$func};
|
||||
}
|
||||
}
|
||||
|
||||
# determine texinfo version
|
||||
my $program_version_num = version->declare(ff_get_conf('PACKAGE_VERSION'))->numify;
|
||||
my $program_version_6_8 = $program_version_num >= 6.008000;
|
||||
|
||||
# no navigation elements
|
||||
ff_set_from_init_file('HEADERS', 0);
|
||||
set_from_init_file('HEADERS', 0);
|
||||
|
||||
sub ffmpeg_heading_command($$$$$)
|
||||
{
|
||||
@@ -92,7 +55,7 @@ sub ffmpeg_heading_command($$$$$)
|
||||
$element = $command->{'parent'};
|
||||
}
|
||||
if ($element) {
|
||||
$result .= &{get_formatting_function($self, 'format_element_header')}($self, $cmdname,
|
||||
$result .= &{$self->{'format_element_header'}}($self, $cmdname,
|
||||
$command, $element);
|
||||
}
|
||||
|
||||
@@ -149,11 +112,7 @@ sub ffmpeg_heading_command($$$$$)
|
||||
$cmdname
|
||||
= $Texinfo::Common::level_to_structuring_command{$cmdname}->[$heading_level];
|
||||
}
|
||||
# format_heading_text expects an array of headings for texinfo >= 7.0
|
||||
if ($program_version_num >= 7.000000) {
|
||||
$heading = [$heading];
|
||||
}
|
||||
$result .= &{get_formatting_function($self,'format_heading_text')}(
|
||||
$result .= &{$self->{'format_heading_text'}}(
|
||||
$self, $cmdname, $heading,
|
||||
$heading_level +
|
||||
$self->get_conf('CHAPTER_HEADER_LEVEL') - 1, $command);
|
||||
@@ -168,18 +127,14 @@ foreach my $command (keys(%Texinfo::Common::sectioning_commands), 'node') {
|
||||
}
|
||||
|
||||
# print the TOC where @contents is used
|
||||
if ($program_version_6_8) {
|
||||
ff_set_from_init_file('CONTENTS_OUTPUT_LOCATION', 'inline');
|
||||
} else {
|
||||
ff_set_from_init_file('INLINE_CONTENTS', 1);
|
||||
}
|
||||
set_from_init_file('INLINE_CONTENTS', 1);
|
||||
|
||||
# make chapters <h2>
|
||||
ff_set_from_init_file('CHAPTER_HEADER_LEVEL', 2);
|
||||
set_from_init_file('CHAPTER_HEADER_LEVEL', 2);
|
||||
|
||||
# Do not add <hr>
|
||||
ff_set_from_init_file('DEFAULT_RULE', '');
|
||||
ff_set_from_init_file('BIG_RULE', '');
|
||||
set_from_init_file('DEFAULT_RULE', '');
|
||||
set_from_init_file('BIG_RULE', '');
|
||||
|
||||
# Customized file beginning
|
||||
sub ffmpeg_begin_file($$$)
|
||||
@@ -196,18 +151,7 @@ sub ffmpeg_begin_file($$$)
|
||||
my ($title, $description, $encoding, $date, $css_lines,
|
||||
$doctype, $bodytext, $copying_comment, $after_body_open,
|
||||
$extra_head, $program_and_version, $program_homepage,
|
||||
$program, $generator);
|
||||
if ($program_version_num >= 7.000000) {
|
||||
($title, $description, $encoding, $date, $css_lines,
|
||||
$doctype, $bodytext, $copying_comment, $after_body_open,
|
||||
$extra_head, $program_and_version, $program_homepage,
|
||||
$program, $generator) = $self->_file_header_information($command);
|
||||
} else {
|
||||
($title, $description, $encoding, $date, $css_lines,
|
||||
$doctype, $bodytext, $copying_comment, $after_body_open,
|
||||
$extra_head, $program_and_version, $program_homepage,
|
||||
$program, $generator) = $self->_file_header_informations($command);
|
||||
}
|
||||
$program, $generator) = $self->_file_header_informations($command);
|
||||
|
||||
my $links = $self->_get_links ($filename, $element);
|
||||
|
||||
@@ -240,11 +184,7 @@ EOT
|
||||
|
||||
return $head1 . $head_title . $head2 . $head_title . $head3;
|
||||
}
|
||||
if ($program_version_6_8) {
|
||||
texinfo_register_formatting_function('format_begin_file', \&ffmpeg_begin_file);
|
||||
} else {
|
||||
texinfo_register_formatting_function('begin_file', \&ffmpeg_begin_file);
|
||||
}
|
||||
texinfo_register_formatting_function('begin_file', \&ffmpeg_begin_file);
|
||||
|
||||
sub ffmpeg_program_string($)
|
||||
{
|
||||
@@ -261,17 +201,13 @@ sub ffmpeg_program_string($)
|
||||
$self->gdt('This document was generated automatically.'));
|
||||
}
|
||||
}
|
||||
if ($program_version_6_8) {
|
||||
texinfo_register_formatting_function('format_program_string', \&ffmpeg_program_string);
|
||||
} else {
|
||||
texinfo_register_formatting_function('program_string', \&ffmpeg_program_string);
|
||||
}
|
||||
texinfo_register_formatting_function('program_string', \&ffmpeg_program_string);
|
||||
|
||||
# Customized file ending
|
||||
sub ffmpeg_end_file($)
|
||||
{
|
||||
my $self = shift;
|
||||
my $program_string = &{get_formatting_function($self,'format_program_string')}($self);
|
||||
my $program_string = &{$self->{'format_program_string'}}($self);
|
||||
my $program_text = <<EOT;
|
||||
<p style="font-size: small;">
|
||||
$program_string
|
||||
@@ -284,15 +220,11 @@ EOT
|
||||
EOT
|
||||
return $program_text . $footer;
|
||||
}
|
||||
if ($program_version_6_8) {
|
||||
texinfo_register_formatting_function('format_end_file', \&ffmpeg_end_file);
|
||||
} else {
|
||||
texinfo_register_formatting_function('end_file', \&ffmpeg_end_file);
|
||||
}
|
||||
texinfo_register_formatting_function('end_file', \&ffmpeg_end_file);
|
||||
|
||||
# Dummy title command
|
||||
# Ignore title. Title is handled through ffmpeg_begin_file().
|
||||
ff_set_from_init_file('USE_TITLEPAGE_FOR_TITLE', 1);
|
||||
set_from_init_file('USE_TITLEPAGE_FOR_TITLE', 1);
|
||||
sub ffmpeg_title($$$$)
|
||||
{
|
||||
return '';
|
||||
@@ -310,14 +242,8 @@ sub ffmpeg_float($$$$$)
|
||||
my $args = shift;
|
||||
my $content = shift;
|
||||
|
||||
my ($caption, $prepended);
|
||||
if ($program_version_num >= 7.000000) {
|
||||
($caption, $prepended) = Texinfo::Convert::Converter::float_name_caption($self,
|
||||
$command);
|
||||
} else {
|
||||
($caption, $prepended) = Texinfo::Common::float_name_caption($self,
|
||||
$command);
|
||||
}
|
||||
my ($caption, $prepended) = Texinfo::Common::float_name_caption($self,
|
||||
$command);
|
||||
my $caption_text = '';
|
||||
my $prepended_text;
|
||||
my $prepended_save = '';
|
||||
@@ -389,13 +315,8 @@ sub ffmpeg_float($$$$$)
|
||||
$caption->{'args'}->[0], 'float caption');
|
||||
}
|
||||
if ($prepended_text.$caption_text ne '') {
|
||||
if ($program_version_num >= 7.000000) {
|
||||
$prepended_text = $self->html_attribute_class('div',['float-caption']). '>'
|
||||
. $prepended_text;
|
||||
} else {
|
||||
$prepended_text = $self->_attribute_class('div','float-caption'). '>'
|
||||
. $prepended_text;
|
||||
}
|
||||
$prepended_text = $self->_attribute_class('div','float-caption'). '>'
|
||||
. $prepended_text;
|
||||
$caption_text .= '</div>';
|
||||
}
|
||||
my $html_class = '';
|
||||
@@ -408,13 +329,8 @@ sub ffmpeg_float($$$$$)
|
||||
$prepended_text = '';
|
||||
$caption_text = '';
|
||||
}
|
||||
if ($program_version_num >= 7.000000) {
|
||||
return $self->html_attribute_class('div', [$html_class]). '>' . "\n" .
|
||||
$prepended_text . $caption_text . $content . '</div>';
|
||||
} else {
|
||||
return $self->_attribute_class('div', $html_class). '>' . "\n" .
|
||||
$prepended_text . $caption_text . $content . '</div>';
|
||||
}
|
||||
return $self->_attribute_class('div', $html_class). '>' . "\n" .
|
||||
$prepended_text . $caption_text . $content . '</div>';
|
||||
}
|
||||
|
||||
texinfo_register_command_formatting('float',
|
||||
|
||||
Executable → Regular
Executable → Regular
@@ -44,3 +44,4 @@ a+b*c;
|
||||
here the reader knows that a,b,c are meant to be signed integers but for C
|
||||
standard compliance / to avoid undefined behavior they are stored in unsigned
|
||||
ints.
|
||||
|
||||
|
||||
@@ -418,4 +418,4 @@ done:
|
||||
|
||||
When all of this is done, you can submit your patch to the ffmpeg-devel
|
||||
mailing-list for review. If you need any help, feel free to come on our IRC
|
||||
channel, #ffmpeg-devel on irc.libera.chat.
|
||||
channel, #ffmpeg-devel on irc.freenode.net.
|
||||
|
||||
@@ -1,5 +1,3 @@
|
||||
#!/bin/sh
|
||||
|
||||
toupper(){
|
||||
echo "$@" | tr abcdefghijklmnopqrstuvwxyz ABCDEFGHIJKLMNOPQRSTUVWXYZ
|
||||
}
|
||||
|
||||
+1
-1
@@ -538,7 +538,7 @@ static const AVOption *opt_find(void *obj, const char *name, const char *unit,
|
||||
return o;
|
||||
}
|
||||
|
||||
#define FLAGS ((o->type == AV_OPT_TYPE_FLAGS && (arg[0]=='-' || arg[0]=='+')) ? AV_DICT_APPEND : 0)
|
||||
#define FLAGS (o->type == AV_OPT_TYPE_FLAGS && (arg[0]=='-' || arg[0]=='+')) ? AV_DICT_APPEND : 0
|
||||
int opt_default(void *optctx, const char *opt, const char *arg)
|
||||
{
|
||||
const AVOption *o;
|
||||
|
||||
+2
-4
@@ -468,9 +468,8 @@ static int read_key(void)
|
||||
}
|
||||
//Read it
|
||||
if(nchars != 0) {
|
||||
if (read(0, &ch, 1) == 1)
|
||||
return ch;
|
||||
return 0;
|
||||
read(0, &ch, 1);
|
||||
return ch;
|
||||
}else{
|
||||
return -1;
|
||||
}
|
||||
@@ -529,7 +528,6 @@ static void ffmpeg_cleanup(int ret)
|
||||
for (j = 0; j < fg->nb_outputs; j++) {
|
||||
OutputFilter *ofilter = fg->outputs[j];
|
||||
|
||||
avfilter_inout_free(&ofilter->out_tmp);
|
||||
av_freep(&ofilter->name);
|
||||
av_freep(&ofilter->formats);
|
||||
av_freep(&ofilter->channel_layouts);
|
||||
|
||||
+2
-2
@@ -1151,8 +1151,6 @@ static void video_audio_display(VideoState *s)
|
||||
if (realloc_texture(&s->vis_texture, SDL_PIXELFORMAT_ARGB8888, s->width, s->height, SDL_BLENDMODE_NONE, 1) < 0)
|
||||
return;
|
||||
|
||||
if (s->xpos >= s->width)
|
||||
s->xpos = 0;
|
||||
nb_display_channels= FFMIN(nb_display_channels, 2);
|
||||
if (rdft_bits != s->rdft_bits) {
|
||||
av_rdft_end(s->rdft);
|
||||
@@ -1202,6 +1200,8 @@ static void video_audio_display(VideoState *s)
|
||||
}
|
||||
if (!s->paused)
|
||||
s->xpos++;
|
||||
if (s->xpos >= s->width)
|
||||
s->xpos= s->xleft;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
+2
-2
@@ -131,8 +131,8 @@ static int zero12v_decode_frame(AVCodecContext *avctx, void *data,
|
||||
u = x/2 + (uint16_t *)(pic->data[1] + line * pic->linesize[1]);
|
||||
v = x/2 + (uint16_t *)(pic->data[2] + line * pic->linesize[2]);
|
||||
memcpy(y, y_temp, sizeof(*y) * (width - x));
|
||||
memcpy(u, u_temp, sizeof(*u) * ((width - x + 1) / 2));
|
||||
memcpy(v, v_temp, sizeof(*v) * ((width - x + 1) / 2));
|
||||
memcpy(u, u_temp, sizeof(*u) * (width - x + 1) / 2);
|
||||
memcpy(v, v_temp, sizeof(*v) * (width - x + 1) / 2);
|
||||
}
|
||||
|
||||
line_end += stride;
|
||||
|
||||
+2
-4
@@ -498,8 +498,8 @@ static int decode_i_block(FourXContext *f, int16_t *block)
|
||||
{
|
||||
int code, i, j, level, val;
|
||||
|
||||
if (get_bits_left(&f->pre_gb) < 2) {
|
||||
av_log(f->avctx, AV_LOG_ERROR, "%d bits left before decode_i_block()\n", get_bits_left(&f->pre_gb));
|
||||
if (get_bits_left(&f->gb) < 2){
|
||||
av_log(f->avctx, AV_LOG_ERROR, "%d bits left before decode_i_block()\n", get_bits_left(&f->gb));
|
||||
return AVERROR_INVALIDDATA;
|
||||
}
|
||||
|
||||
@@ -885,8 +885,6 @@ static int decode_frame(AVCodecContext *avctx, void *data,
|
||||
}
|
||||
|
||||
if (i >= CFRAME_BUFFER_COUNT) {
|
||||
if (free_index < 0)
|
||||
return AVERROR_INVALIDDATA;
|
||||
i = free_index;
|
||||
f->cfrm[i].id = id;
|
||||
}
|
||||
|
||||
@@ -70,9 +70,6 @@ static int decode_frame(AVCodecContext *avctx, void *data,
|
||||
unsigned char *planemap = c->planemap;
|
||||
int ret;
|
||||
|
||||
if (buf_size < planes * height *2)
|
||||
return AVERROR_INVALIDDATA;
|
||||
|
||||
if ((ret = ff_get_buffer(avctx, frame, 0)) < 0)
|
||||
return ret;
|
||||
|
||||
|
||||
+1
-1
@@ -1174,7 +1174,7 @@ SKIPHEADERS-$(CONFIG_QSV) += qsv.h qsv_internal.h
|
||||
SKIPHEADERS-$(CONFIG_QSVDEC) += qsvdec.h
|
||||
SKIPHEADERS-$(CONFIG_QSVENC) += qsvenc.h
|
||||
SKIPHEADERS-$(CONFIG_XVMC) += xvmc.h
|
||||
SKIPHEADERS-$(CONFIG_VAAPI) += vaapi_decode.h vaapi_hevc.h vaapi_encode.h
|
||||
SKIPHEADERS-$(CONFIG_VAAPI) += vaapi_decode.h vaapi_encode.h
|
||||
SKIPHEADERS-$(CONFIG_VDPAU) += vdpau.h vdpau_internal.h
|
||||
SKIPHEADERS-$(CONFIG_VIDEOTOOLBOX) += videotoolbox.h vt_internal.h
|
||||
SKIPHEADERS-$(CONFIG_V4L2_M2M) += v4l2_buffers.h v4l2_context.h v4l2_m2m.h
|
||||
|
||||
@@ -407,7 +407,6 @@ AVCodec ff_a64multi_encoder = {
|
||||
.close = a64multi_close_encoder,
|
||||
.pix_fmts = (const enum AVPixelFormat[]) {AV_PIX_FMT_GRAY8, AV_PIX_FMT_NONE},
|
||||
.capabilities = AV_CODEC_CAP_DELAY,
|
||||
.caps_internal = FF_CODEC_CAP_INIT_CLEANUP,
|
||||
};
|
||||
#endif
|
||||
#if CONFIG_A64MULTI5_ENCODER
|
||||
@@ -422,6 +421,5 @@ AVCodec ff_a64multi5_encoder = {
|
||||
.close = a64multi_close_encoder,
|
||||
.pix_fmts = (const enum AVPixelFormat[]) {AV_PIX_FMT_GRAY8, AV_PIX_FMT_NONE},
|
||||
.capabilities = AV_CODEC_CAP_DELAY,
|
||||
.caps_internal = FF_CODEC_CAP_INIT_CLEANUP,
|
||||
};
|
||||
#endif
|
||||
|
||||
@@ -843,25 +843,25 @@ static void search_for_ms(AACEncContext *s, ChannelElement *cpe)
|
||||
sce0->ics.swb_sizes[g],
|
||||
sce0->sf_idx[w*16+g],
|
||||
sce0->band_type[w*16+g],
|
||||
lambda / (band0->threshold + FLT_MIN), INFINITY, &b1, NULL, 0);
|
||||
lambda / band0->threshold, INFINITY, &b1, NULL, 0);
|
||||
dist1 += quantize_band_cost(s, &sce1->coeffs[start + (w+w2)*128],
|
||||
R34,
|
||||
sce1->ics.swb_sizes[g],
|
||||
sce1->sf_idx[w*16+g],
|
||||
sce1->band_type[w*16+g],
|
||||
lambda / (band1->threshold + FLT_MIN), INFINITY, &b2, NULL, 0);
|
||||
lambda / band1->threshold, INFINITY, &b2, NULL, 0);
|
||||
dist2 += quantize_band_cost(s, M,
|
||||
M34,
|
||||
sce0->ics.swb_sizes[g],
|
||||
mididx,
|
||||
midcb,
|
||||
lambda / (minthr + FLT_MIN), INFINITY, &b3, NULL, 0);
|
||||
lambda / minthr, INFINITY, &b3, NULL, 0);
|
||||
dist2 += quantize_band_cost(s, S,
|
||||
S34,
|
||||
sce1->ics.swb_sizes[g],
|
||||
sididx,
|
||||
sidcb,
|
||||
mslambda / (minthr * bmax + FLT_MIN), INFINITY, &b4, NULL, 0);
|
||||
mslambda / (minthr * bmax), INFINITY, &b4, NULL, 0);
|
||||
B0 += b1+b2;
|
||||
B1 += b3+b4;
|
||||
dist1 -= b1+b2;
|
||||
|
||||
@@ -155,9 +155,9 @@ static void vector_pow43(int *coefs, int len)
|
||||
for (i=0; i<len; i++) {
|
||||
coef = coefs[i];
|
||||
if (coef < 0)
|
||||
coef = -(int)ff_cbrt_tab_fixed[(-coef) & 8191];
|
||||
coef = -(int)ff_cbrt_tab_fixed[-coef];
|
||||
else
|
||||
coef = (int)ff_cbrt_tab_fixed[ coef & 8191];
|
||||
coef = (int)ff_cbrt_tab_fixed[coef];
|
||||
coefs[i] = coef;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -974,18 +974,14 @@ static int decode_audio_specific_config_gb(AACContext *ac,
|
||||
{
|
||||
int i, ret;
|
||||
GetBitContext gbc = *gb;
|
||||
MPEG4AudioConfig m4ac_bak = *m4ac;
|
||||
|
||||
if ((i = ff_mpeg4audio_get_config_gb(m4ac, &gbc, sync_extension, avctx)) < 0) {
|
||||
*m4ac = m4ac_bak;
|
||||
if ((i = ff_mpeg4audio_get_config_gb(m4ac, &gbc, sync_extension, avctx)) < 0)
|
||||
return AVERROR_INVALIDDATA;
|
||||
}
|
||||
|
||||
if (m4ac->sampling_index > 12) {
|
||||
av_log(avctx, AV_LOG_ERROR,
|
||||
"invalid sampling rate index %d\n",
|
||||
m4ac->sampling_index);
|
||||
*m4ac = m4ac_bak;
|
||||
return AVERROR_INVALIDDATA;
|
||||
}
|
||||
if (m4ac->object_type == AOT_ER_AAC_LD &&
|
||||
@@ -993,7 +989,6 @@ static int decode_audio_specific_config_gb(AACContext *ac,
|
||||
av_log(avctx, AV_LOG_ERROR,
|
||||
"invalid low delay sampling rate index %d\n",
|
||||
m4ac->sampling_index);
|
||||
*m4ac = m4ac_bak;
|
||||
return AVERROR_INVALIDDATA;
|
||||
}
|
||||
|
||||
@@ -2812,7 +2807,7 @@ static void imdct_and_windowing_ld(AACContext *ac, SingleChannelElement *sce)
|
||||
|
||||
static void imdct_and_windowing_eld(AACContext *ac, SingleChannelElement *sce)
|
||||
{
|
||||
UINTFLOAT *in = sce->coeffs;
|
||||
INTFLOAT *in = sce->coeffs;
|
||||
INTFLOAT *out = sce->ret;
|
||||
INTFLOAT *saved = sce->saved;
|
||||
INTFLOAT *buf = ac->buf_mdct;
|
||||
|
||||
+2
-3
@@ -28,7 +28,6 @@
|
||||
* TODOs:
|
||||
* add sane pulse detection
|
||||
***********************************/
|
||||
#include <float.h>
|
||||
|
||||
#include "libavutil/libm.h"
|
||||
#include "libavutil/thread.h"
|
||||
@@ -857,7 +856,7 @@ static int aac_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
|
||||
/* Not so fast though */
|
||||
ratio = sqrtf(ratio);
|
||||
}
|
||||
s->lambda = av_clipf(s->lambda * ratio, FLT_EPSILON, 65536.f);
|
||||
s->lambda = FFMIN(s->lambda * ratio, 65536.f);
|
||||
|
||||
/* Keep iterating if we must reduce and lambda is in the sky */
|
||||
if (ratio > 0.9f && ratio < 1.1f) {
|
||||
@@ -902,7 +901,7 @@ static av_cold int aac_encode_end(AVCodecContext *avctx)
|
||||
{
|
||||
AACEncContext *s = avctx->priv_data;
|
||||
|
||||
av_log(avctx, AV_LOG_INFO, "Qavg: %.3f\n", s->lambda_count ? s->lambda_sum / s->lambda_count : NAN);
|
||||
av_log(avctx, AV_LOG_INFO, "Qavg: %.3f\n", s->lambda_sum / s->lambda_count);
|
||||
|
||||
ff_mdct_end(&s->mdct1024);
|
||||
ff_mdct_end(&s->mdct128);
|
||||
|
||||
+13
-27
@@ -173,7 +173,6 @@ void ff_aac_search_for_tns(AACEncContext *s, SingleChannelElement *sce)
|
||||
sce->ics.window_sequence[0] == LONG_START_SEQUENCE ? 0 : 2;
|
||||
const int sfb_len = sfb_end - sfb_start;
|
||||
const int coef_len = sce->ics.swb_offset[sfb_end] - sce->ics.swb_offset[sfb_start];
|
||||
const int n_filt = is8 ? 1 : order != TNS_MAX_ORDER ? 2 : 3;
|
||||
|
||||
if (coef_len <= 0 || sfb_len <= 0) {
|
||||
sce->tns.present = 0;
|
||||
@@ -181,30 +180,16 @@ void ff_aac_search_for_tns(AACEncContext *s, SingleChannelElement *sce)
|
||||
}
|
||||
|
||||
for (w = 0; w < sce->ics.num_windows; w++) {
|
||||
float en[4] = {0.0f, 0.0f, 0.0f, 0.0f};
|
||||
int oc_start = 0;
|
||||
float en[2] = {0.0f, 0.0f};
|
||||
int oc_start = 0, os_start = 0;
|
||||
int coef_start = sce->ics.swb_offset[sfb_start];
|
||||
|
||||
if (n_filt == 2) {
|
||||
for (g = sfb_start; g < sce->ics.num_swb && g <= sfb_end; g++) {
|
||||
FFPsyBand *band = &s->psy.ch[s->cur_channel].psy_bands[w*16+g];
|
||||
if (g > sfb_start + (sfb_len/2))
|
||||
en[1] += band->energy; /* End */
|
||||
else
|
||||
en[0] += band->energy; /* Start */
|
||||
}
|
||||
en[2] = en[0];
|
||||
} else {
|
||||
for (g = sfb_start; g < sce->ics.num_swb && g <= sfb_end; g++) {
|
||||
FFPsyBand *band = &s->psy.ch[s->cur_channel].psy_bands[w*16+g];
|
||||
if (g > sfb_start + (sfb_len/2) + (sfb_len/4))
|
||||
en[2] += band->energy; /* End */
|
||||
else if (g > sfb_start + (sfb_len/2) - (sfb_len/4))
|
||||
en[1] += band->energy; /* Middle */
|
||||
else
|
||||
en[0] += band->energy; /* Start */
|
||||
}
|
||||
en[3] = en[0];
|
||||
for (g = sfb_start; g < sce->ics.num_swb && g <= sfb_end; g++) {
|
||||
FFPsyBand *band = &s->psy.ch[s->cur_channel].psy_bands[w*16+g];
|
||||
if (g > sfb_start + (sfb_len/2))
|
||||
en[1] += band->energy;
|
||||
else
|
||||
en[0] += band->energy;
|
||||
}
|
||||
|
||||
/* LPC */
|
||||
@@ -214,14 +199,15 @@ void ff_aac_search_for_tns(AACEncContext *s, SingleChannelElement *sce)
|
||||
if (!order || !isfinite(gain) || gain < TNS_GAIN_THRESHOLD_LOW || gain > TNS_GAIN_THRESHOLD_HIGH)
|
||||
continue;
|
||||
|
||||
tns->n_filt[w] = n_filt;
|
||||
tns->n_filt[w] = is8 ? 1 : order != TNS_MAX_ORDER ? 2 : 3;
|
||||
for (g = 0; g < tns->n_filt[w]; g++) {
|
||||
tns->direction[w][g] = slant != 2 ? slant : en[g] < en[g + 1];
|
||||
tns->order[w][g] = order/tns->n_filt[w];
|
||||
tns->length[w][g] = sfb_len/tns->n_filt[w];
|
||||
tns->direction[w][g] = slant != 2 ? slant : en[g] < en[!g];
|
||||
tns->order[w][g] = g < tns->n_filt[w] ? order/tns->n_filt[w] : order - oc_start;
|
||||
tns->length[w][g] = g < tns->n_filt[w] ? sfb_len/tns->n_filt[w] : sfb_len - os_start;
|
||||
quantize_coefs(&coefs[oc_start], tns->coef_idx[w][g], tns->coef[w][g],
|
||||
tns->order[w][g], c_bits);
|
||||
oc_start += tns->order[w][g];
|
||||
os_start += tns->length[w][g];
|
||||
}
|
||||
count++;
|
||||
}
|
||||
|
||||
+1
-4
@@ -308,9 +308,6 @@ static av_cold int psy_3gpp_init(FFPsyContext *ctx) {
|
||||
const int bandwidth = ctx->cutoff ? ctx->cutoff : AAC_CUTOFF(ctx->avctx);
|
||||
const float num_bark = calc_bark((float)bandwidth);
|
||||
|
||||
if (bandwidth <= 0)
|
||||
return AVERROR(EINVAL);
|
||||
|
||||
ctx->model_priv_data = av_mallocz(sizeof(AacPsyContext));
|
||||
if (!ctx->model_priv_data)
|
||||
return AVERROR(ENOMEM);
|
||||
@@ -797,7 +794,7 @@ static void psy_3gpp_analyze_channel(FFPsyContext *ctx, int channel,
|
||||
|
||||
if (pe < 1.15f * desired_pe) {
|
||||
/* 6.6.1.3.6 "Final threshold modification by linearization" */
|
||||
norm_fac = norm_fac ? 1.0f / norm_fac : 0;
|
||||
norm_fac = 1.0f / norm_fac;
|
||||
for (w = 0; w < wi->num_windows*16; w += 16) {
|
||||
for (g = 0; g < num_bands; g++) {
|
||||
AacPsyBand *band = &pch->band[w+g];
|
||||
|
||||
@@ -592,7 +592,6 @@ static int sbr_make_f_derived(AACContext *ac, SpectralBandReplication *sbr)
|
||||
|
||||
if (sbr->n_q > 5) {
|
||||
av_log(ac->avctx, AV_LOG_ERROR, "Too many noise floor scale factors: %d\n", sbr->n_q);
|
||||
sbr->n_q = 1;
|
||||
return -1;
|
||||
}
|
||||
|
||||
|
||||
+109
-109
@@ -19,130 +19,130 @@
|
||||
#include "libavutil/aarch64/asm.S"
|
||||
|
||||
function ff_ps_add_squares_neon, export=1
|
||||
1: ld1 {v0.4s,v1.4s}, [x1], #32
|
||||
fmul v0.4s, v0.4s, v0.4s
|
||||
fmul v1.4s, v1.4s, v1.4s
|
||||
faddp v2.4s, v0.4s, v1.4s
|
||||
ld1 {v3.4s}, [x0]
|
||||
fadd v3.4s, v3.4s, v2.4s
|
||||
st1 {v3.4s}, [x0], #16
|
||||
subs w2, w2, #4
|
||||
b.gt 1b
|
||||
1: ld1 {v0.4S,v1.4S}, [x1], #32
|
||||
fmul v0.4S, v0.4S, v0.4S
|
||||
fmul v1.4S, v1.4S, v1.4S
|
||||
faddp v2.4S, v0.4S, v1.4S
|
||||
ld1 {v3.4S}, [x0]
|
||||
fadd v3.4S, v3.4S, v2.4S
|
||||
st1 {v3.4S}, [x0], #16
|
||||
subs w2, w2, #4
|
||||
b.gt 1b
|
||||
ret
|
||||
endfunc
|
||||
|
||||
function ff_ps_mul_pair_single_neon, export=1
|
||||
1: ld1 {v0.4s,v1.4s}, [x1], #32
|
||||
ld1 {v2.4s}, [x2], #16
|
||||
zip1 v3.4s, v2.4s, v2.4s
|
||||
zip2 v4.4s, v2.4s, v2.4s
|
||||
fmul v0.4s, v0.4s, v3.4s
|
||||
fmul v1.4s, v1.4s, v4.4s
|
||||
st1 {v0.4s,v1.4s}, [x0], #32
|
||||
subs w3, w3, #4
|
||||
b.gt 1b
|
||||
1: ld1 {v0.4S,v1.4S}, [x1], #32
|
||||
ld1 {v2.4S}, [x2], #16
|
||||
zip1 v3.4S, v2.4S, v2.4S
|
||||
zip2 v4.4S, v2.4S, v2.4S
|
||||
fmul v0.4S, v0.4S, v3.4S
|
||||
fmul v1.4S, v1.4S, v4.4S
|
||||
st1 {v0.4S,v1.4S}, [x0], #32
|
||||
subs w3, w3, #4
|
||||
b.gt 1b
|
||||
ret
|
||||
endfunc
|
||||
|
||||
function ff_ps_stereo_interpolate_neon, export=1
|
||||
ld1 {v0.4s}, [x2]
|
||||
ld1 {v1.4s}, [x3]
|
||||
zip1 v4.4s, v0.4s, v0.4s
|
||||
zip2 v5.4s, v0.4s, v0.4s
|
||||
zip1 v6.4s, v1.4s, v1.4s
|
||||
zip2 v7.4s, v1.4s, v1.4s
|
||||
1: ld1 {v2.2s}, [x0]
|
||||
ld1 {v3.2s}, [x1]
|
||||
fadd v4.4s, v4.4s, v6.4s
|
||||
fadd v5.4s, v5.4s, v7.4s
|
||||
mov v2.d[1], v2.d[0]
|
||||
mov v3.d[1], v3.d[0]
|
||||
fmul v2.4s, v2.4s, v4.4s
|
||||
fmla v2.4s, v3.4s, v5.4s
|
||||
st1 {v2.d}[0], [x0], #8
|
||||
st1 {v2.d}[1], [x1], #8
|
||||
subs w4, w4, #1
|
||||
b.gt 1b
|
||||
ld1 {v0.4S}, [x2]
|
||||
ld1 {v1.4S}, [x3]
|
||||
zip1 v4.4S, v0.4S, v0.4S
|
||||
zip2 v5.4S, v0.4S, v0.4S
|
||||
zip1 v6.4S, v1.4S, v1.4S
|
||||
zip2 v7.4S, v1.4S, v1.4S
|
||||
1: ld1 {v2.2S}, [x0]
|
||||
ld1 {v3.2S}, [x1]
|
||||
fadd v4.4S, v4.4S, v6.4S
|
||||
fadd v5.4S, v5.4S, v7.4S
|
||||
mov v2.D[1], v2.D[0]
|
||||
mov v3.D[1], v3.D[0]
|
||||
fmul v2.4S, v2.4S, v4.4S
|
||||
fmla v2.4S, v3.4S, v5.4S
|
||||
st1 {v2.D}[0], [x0], #8
|
||||
st1 {v2.D}[1], [x1], #8
|
||||
subs w4, w4, #1
|
||||
b.gt 1b
|
||||
ret
|
||||
endfunc
|
||||
|
||||
function ff_ps_stereo_interpolate_ipdopd_neon, export=1
|
||||
ld1 {v0.4s,v1.4s}, [x2]
|
||||
ld1 {v6.4s,v7.4s}, [x3]
|
||||
fneg v2.4s, v1.4s
|
||||
fneg v3.4s, v7.4s
|
||||
zip1 v16.4s, v0.4s, v0.4s
|
||||
zip2 v17.4s, v0.4s, v0.4s
|
||||
zip1 v18.4s, v2.4s, v1.4s
|
||||
zip2 v19.4s, v2.4s, v1.4s
|
||||
zip1 v20.4s, v6.4s, v6.4s
|
||||
zip2 v21.4s, v6.4s, v6.4s
|
||||
zip1 v22.4s, v3.4s, v7.4s
|
||||
zip2 v23.4s, v3.4s, v7.4s
|
||||
1: ld1 {v2.2s}, [x0]
|
||||
ld1 {v3.2s}, [x1]
|
||||
fadd v16.4s, v16.4s, v20.4s
|
||||
fadd v17.4s, v17.4s, v21.4s
|
||||
mov v2.d[1], v2.d[0]
|
||||
mov v3.d[1], v3.d[0]
|
||||
fmul v4.4s, v2.4s, v16.4s
|
||||
fmla v4.4s, v3.4s, v17.4s
|
||||
fadd v18.4s, v18.4s, v22.4s
|
||||
fadd v19.4s, v19.4s, v23.4s
|
||||
ext v2.16b, v2.16b, v2.16b, #4
|
||||
ext v3.16b, v3.16b, v3.16b, #4
|
||||
fmla v4.4s, v2.4s, v18.4s
|
||||
fmla v4.4s, v3.4s, v19.4s
|
||||
st1 {v4.d}[0], [x0], #8
|
||||
st1 {v4.d}[1], [x1], #8
|
||||
subs w4, w4, #1
|
||||
b.gt 1b
|
||||
ld1 {v0.4S,v1.4S}, [x2]
|
||||
ld1 {v6.4S,v7.4S}, [x3]
|
||||
fneg v2.4S, v1.4S
|
||||
fneg v3.4S, v7.4S
|
||||
zip1 v16.4S, v0.4S, v0.4S
|
||||
zip2 v17.4S, v0.4S, v0.4S
|
||||
zip1 v18.4S, v2.4S, v1.4S
|
||||
zip2 v19.4S, v2.4S, v1.4S
|
||||
zip1 v20.4S, v6.4S, v6.4S
|
||||
zip2 v21.4S, v6.4S, v6.4S
|
||||
zip1 v22.4S, v3.4S, v7.4S
|
||||
zip2 v23.4S, v3.4S, v7.4S
|
||||
1: ld1 {v2.2S}, [x0]
|
||||
ld1 {v3.2S}, [x1]
|
||||
fadd v16.4S, v16.4S, v20.4S
|
||||
fadd v17.4S, v17.4S, v21.4S
|
||||
mov v2.D[1], v2.D[0]
|
||||
mov v3.D[1], v3.D[0]
|
||||
fmul v4.4S, v2.4S, v16.4S
|
||||
fmla v4.4S, v3.4S, v17.4S
|
||||
fadd v18.4S, v18.4S, v22.4S
|
||||
fadd v19.4S, v19.4S, v23.4S
|
||||
ext v2.16B, v2.16B, v2.16B, #4
|
||||
ext v3.16B, v3.16B, v3.16B, #4
|
||||
fmla v4.4S, v2.4S, v18.4S
|
||||
fmla v4.4S, v3.4S, v19.4S
|
||||
st1 {v4.D}[0], [x0], #8
|
||||
st1 {v4.D}[1], [x1], #8
|
||||
subs w4, w4, #1
|
||||
b.gt 1b
|
||||
ret
|
||||
endfunc
|
||||
|
||||
function ff_ps_hybrid_analysis_neon, export=1
|
||||
lsl x3, x3, #3
|
||||
ld2 {v0.4s,v1.4s}, [x1], #32
|
||||
ld2 {v2.2s,v3.2s}, [x1], #16
|
||||
ld1 {v24.2s}, [x1], #8
|
||||
ld2 {v4.2s,v5.2s}, [x1], #16
|
||||
ld2 {v6.4s,v7.4s}, [x1]
|
||||
rev64 v6.4s, v6.4s
|
||||
rev64 v7.4s, v7.4s
|
||||
ext v6.16b, v6.16b, v6.16b, #8
|
||||
ext v7.16b, v7.16b, v7.16b, #8
|
||||
rev64 v4.2s, v4.2s
|
||||
rev64 v5.2s, v5.2s
|
||||
mov v2.d[1], v3.d[0]
|
||||
mov v4.d[1], v5.d[0]
|
||||
mov v5.d[1], v2.d[0]
|
||||
mov v3.d[1], v4.d[0]
|
||||
fadd v16.4s, v0.4s, v6.4s
|
||||
fadd v17.4s, v1.4s, v7.4s
|
||||
fsub v18.4s, v1.4s, v7.4s
|
||||
fsub v19.4s, v0.4s, v6.4s
|
||||
fadd v22.4s, v2.4s, v4.4s
|
||||
fsub v23.4s, v5.4s, v3.4s
|
||||
trn1 v20.2d, v22.2d, v23.2d // {re4+re8, re5+re7, im8-im4, im7-im5}
|
||||
trn2 v21.2d, v22.2d, v23.2d // {im4+im8, im5+im7, re4-re8, re5-re7}
|
||||
1: ld2 {v2.4s,v3.4s}, [x2], #32
|
||||
ld2 {v4.2s,v5.2s}, [x2], #16
|
||||
ld1 {v6.2s}, [x2], #8
|
||||
add x2, x2, #8
|
||||
mov v4.d[1], v5.d[0]
|
||||
mov v6.s[1], v6.s[0]
|
||||
fmul v6.2s, v6.2s, v24.2s
|
||||
fmul v0.4s, v2.4s, v16.4s
|
||||
fmul v1.4s, v2.4s, v17.4s
|
||||
fmls v0.4s, v3.4s, v18.4s
|
||||
fmla v1.4s, v3.4s, v19.4s
|
||||
fmla v0.4s, v4.4s, v20.4s
|
||||
fmla v1.4s, v4.4s, v21.4s
|
||||
faddp v0.4s, v0.4s, v1.4s
|
||||
faddp v0.4s, v0.4s, v0.4s
|
||||
fadd v0.2s, v0.2s, v6.2s
|
||||
st1 {v0.2s}, [x0], x3
|
||||
subs w4, w4, #1
|
||||
b.gt 1b
|
||||
lsl x3, x3, #3
|
||||
ld2 {v0.4S,v1.4S}, [x1], #32
|
||||
ld2 {v2.2S,v3.2S}, [x1], #16
|
||||
ld1 {v24.2S}, [x1], #8
|
||||
ld2 {v4.2S,v5.2S}, [x1], #16
|
||||
ld2 {v6.4S,v7.4S}, [x1]
|
||||
rev64 v6.4S, v6.4S
|
||||
rev64 v7.4S, v7.4S
|
||||
ext v6.16B, v6.16B, v6.16B, #8
|
||||
ext v7.16B, v7.16B, v7.16B, #8
|
||||
rev64 v4.2S, v4.2S
|
||||
rev64 v5.2S, v5.2S
|
||||
mov v2.D[1], v3.D[0]
|
||||
mov v4.D[1], v5.D[0]
|
||||
mov v5.D[1], v2.D[0]
|
||||
mov v3.D[1], v4.D[0]
|
||||
fadd v16.4S, v0.4S, v6.4S
|
||||
fadd v17.4S, v1.4S, v7.4S
|
||||
fsub v18.4S, v1.4S, v7.4S
|
||||
fsub v19.4S, v0.4S, v6.4S
|
||||
fadd v22.4S, v2.4S, v4.4S
|
||||
fsub v23.4S, v5.4S, v3.4S
|
||||
trn1 v20.2D, v22.2D, v23.2D // {re4+re8, re5+re7, im8-im4, im7-im5}
|
||||
trn2 v21.2D, v22.2D, v23.2D // {im4+im8, im5+im7, re4-re8, re5-re7}
|
||||
1: ld2 {v2.4S,v3.4S}, [x2], #32
|
||||
ld2 {v4.2S,v5.2S}, [x2], #16
|
||||
ld1 {v6.2S}, [x2], #8
|
||||
add x2, x2, #8
|
||||
mov v4.D[1], v5.D[0]
|
||||
mov v6.S[1], v6.S[0]
|
||||
fmul v6.2S, v6.2S, v24.2S
|
||||
fmul v0.4S, v2.4S, v16.4S
|
||||
fmul v1.4S, v2.4S, v17.4S
|
||||
fmls v0.4S, v3.4S, v18.4S
|
||||
fmla v1.4S, v3.4S, v19.4S
|
||||
fmla v0.4S, v4.4S, v20.4S
|
||||
fmla v1.4S, v4.4S, v21.4S
|
||||
faddp v0.4S, v0.4S, v1.4S
|
||||
faddp v0.4S, v0.4S, v0.4S
|
||||
fadd v0.2S, v0.2S, v6.2S
|
||||
st1 {v0.2S}, [x0], x3
|
||||
subs w4, w4, #1
|
||||
b.gt 1b
|
||||
ret
|
||||
endfunc
|
||||
|
||||
@@ -353,18 +353,18 @@ function fft\n\()_neon, align=6
|
||||
endfunc
|
||||
.endm
|
||||
|
||||
def_fft 32, 16, 8
|
||||
def_fft 64, 32, 16
|
||||
def_fft 128, 64, 32
|
||||
def_fft 256, 128, 64
|
||||
def_fft 512, 256, 128
|
||||
def_fft 1024, 512, 256
|
||||
def_fft 2048, 1024, 512
|
||||
def_fft 4096, 2048, 1024
|
||||
def_fft 8192, 4096, 2048
|
||||
def_fft 16384, 8192, 4096
|
||||
def_fft 32768, 16384, 8192
|
||||
def_fft 65536, 32768, 16384
|
||||
def_fft 32, 16, 8
|
||||
def_fft 64, 32, 16
|
||||
def_fft 128, 64, 32
|
||||
def_fft 256, 128, 64
|
||||
def_fft 512, 256, 128
|
||||
def_fft 1024, 512, 256
|
||||
def_fft 2048, 1024, 512
|
||||
def_fft 4096, 2048, 1024
|
||||
def_fft 8192, 4096, 2048
|
||||
def_fft 16384, 8192, 4096
|
||||
def_fft 32768, 16384, 8192
|
||||
def_fft 65536, 32768, 16384
|
||||
|
||||
function ff_fft_calc_neon, export=1
|
||||
prfm pldl1keep, [x1]
|
||||
|
||||
+205
-205
@@ -36,11 +36,11 @@ function ff_\type\()_\codec\()_chroma_mc8_neon, export=1
|
||||
lsl w9, w9, #3
|
||||
lsl w10, w10, #1
|
||||
add w9, w9, w10
|
||||
add x6, x6, w9, uxtw
|
||||
ld1r {v22.8h}, [x6]
|
||||
add x6, x6, w9, UXTW
|
||||
ld1r {v22.8H}, [x6]
|
||||
.endif
|
||||
.ifc \codec,vc1
|
||||
movi v22.8h, #28
|
||||
movi v22.8H, #28
|
||||
.endif
|
||||
mul w7, w4, w5
|
||||
lsl w14, w5, #3
|
||||
@@ -53,139 +53,139 @@ function ff_\type\()_\codec\()_chroma_mc8_neon, export=1
|
||||
add w4, w4, #64
|
||||
b.eq 2f
|
||||
|
||||
dup v0.8b, w4
|
||||
dup v1.8b, w12
|
||||
ld1 {v4.8b, v5.8b}, [x1], x2
|
||||
dup v2.8b, w6
|
||||
dup v3.8b, w7
|
||||
ext v5.8b, v4.8b, v5.8b, #1
|
||||
1: ld1 {v6.8b, v7.8b}, [x1], x2
|
||||
umull v16.8h, v4.8b, v0.8b
|
||||
umlal v16.8h, v5.8b, v1.8b
|
||||
ext v7.8b, v6.8b, v7.8b, #1
|
||||
ld1 {v4.8b, v5.8b}, [x1], x2
|
||||
umlal v16.8h, v6.8b, v2.8b
|
||||
dup v0.8B, w4
|
||||
dup v1.8B, w12
|
||||
ld1 {v4.8B, v5.8B}, [x1], x2
|
||||
dup v2.8B, w6
|
||||
dup v3.8B, w7
|
||||
ext v5.8B, v4.8B, v5.8B, #1
|
||||
1: ld1 {v6.8B, v7.8B}, [x1], x2
|
||||
umull v16.8H, v4.8B, v0.8B
|
||||
umlal v16.8H, v5.8B, v1.8B
|
||||
ext v7.8B, v6.8B, v7.8B, #1
|
||||
ld1 {v4.8B, v5.8B}, [x1], x2
|
||||
umlal v16.8H, v6.8B, v2.8B
|
||||
prfm pldl1strm, [x1]
|
||||
ext v5.8b, v4.8b, v5.8b, #1
|
||||
umlal v16.8h, v7.8b, v3.8b
|
||||
umull v17.8h, v6.8b, v0.8b
|
||||
ext v5.8B, v4.8B, v5.8B, #1
|
||||
umlal v16.8H, v7.8B, v3.8B
|
||||
umull v17.8H, v6.8B, v0.8B
|
||||
subs w3, w3, #2
|
||||
umlal v17.8h, v7.8b, v1.8b
|
||||
umlal v17.8h, v4.8b, v2.8b
|
||||
umlal v17.8h, v5.8b, v3.8b
|
||||
umlal v17.8H, v7.8B, v1.8B
|
||||
umlal v17.8H, v4.8B, v2.8B
|
||||
umlal v17.8H, v5.8B, v3.8B
|
||||
prfm pldl1strm, [x1, x2]
|
||||
.ifc \codec,h264
|
||||
rshrn v16.8b, v16.8h, #6
|
||||
rshrn v17.8b, v17.8h, #6
|
||||
rshrn v16.8B, v16.8H, #6
|
||||
rshrn v17.8B, v17.8H, #6
|
||||
.else
|
||||
add v16.8h, v16.8h, v22.8h
|
||||
add v17.8h, v17.8h, v22.8h
|
||||
shrn v16.8b, v16.8h, #6
|
||||
shrn v17.8b, v17.8h, #6
|
||||
add v16.8H, v16.8H, v22.8H
|
||||
add v17.8H, v17.8H, v22.8H
|
||||
shrn v16.8B, v16.8H, #6
|
||||
shrn v17.8B, v17.8H, #6
|
||||
.endif
|
||||
.ifc \type,avg
|
||||
ld1 {v20.8b}, [x8], x2
|
||||
ld1 {v21.8b}, [x8], x2
|
||||
urhadd v16.8b, v16.8b, v20.8b
|
||||
urhadd v17.8b, v17.8b, v21.8b
|
||||
ld1 {v20.8B}, [x8], x2
|
||||
ld1 {v21.8B}, [x8], x2
|
||||
urhadd v16.8B, v16.8B, v20.8B
|
||||
urhadd v17.8B, v17.8B, v21.8B
|
||||
.endif
|
||||
st1 {v16.8b}, [x0], x2
|
||||
st1 {v17.8b}, [x0], x2
|
||||
st1 {v16.8B}, [x0], x2
|
||||
st1 {v17.8B}, [x0], x2
|
||||
b.gt 1b
|
||||
ret
|
||||
|
||||
2: adds w12, w12, w6
|
||||
dup v0.8b, w4
|
||||
dup v0.8B, w4
|
||||
b.eq 5f
|
||||
tst w6, w6
|
||||
dup v1.8b, w12
|
||||
dup v1.8B, w12
|
||||
b.eq 4f
|
||||
|
||||
ld1 {v4.8b}, [x1], x2
|
||||
3: ld1 {v6.8b}, [x1], x2
|
||||
umull v16.8h, v4.8b, v0.8b
|
||||
umlal v16.8h, v6.8b, v1.8b
|
||||
ld1 {v4.8b}, [x1], x2
|
||||
umull v17.8h, v6.8b, v0.8b
|
||||
umlal v17.8h, v4.8b, v1.8b
|
||||
ld1 {v4.8B}, [x1], x2
|
||||
3: ld1 {v6.8B}, [x1], x2
|
||||
umull v16.8H, v4.8B, v0.8B
|
||||
umlal v16.8H, v6.8B, v1.8B
|
||||
ld1 {v4.8B}, [x1], x2
|
||||
umull v17.8H, v6.8B, v0.8B
|
||||
umlal v17.8H, v4.8B, v1.8B
|
||||
prfm pldl1strm, [x1]
|
||||
.ifc \codec,h264
|
||||
rshrn v16.8b, v16.8h, #6
|
||||
rshrn v17.8b, v17.8h, #6
|
||||
rshrn v16.8B, v16.8H, #6
|
||||
rshrn v17.8B, v17.8H, #6
|
||||
.else
|
||||
add v16.8h, v16.8h, v22.8h
|
||||
add v17.8h, v17.8h, v22.8h
|
||||
shrn v16.8b, v16.8h, #6
|
||||
shrn v17.8b, v17.8h, #6
|
||||
add v16.8H, v16.8H, v22.8H
|
||||
add v17.8H, v17.8H, v22.8H
|
||||
shrn v16.8B, v16.8H, #6
|
||||
shrn v17.8B, v17.8H, #6
|
||||
.endif
|
||||
prfm pldl1strm, [x1, x2]
|
||||
.ifc \type,avg
|
||||
ld1 {v20.8b}, [x8], x2
|
||||
ld1 {v21.8b}, [x8], x2
|
||||
urhadd v16.8b, v16.8b, v20.8b
|
||||
urhadd v17.8b, v17.8b, v21.8b
|
||||
ld1 {v20.8B}, [x8], x2
|
||||
ld1 {v21.8B}, [x8], x2
|
||||
urhadd v16.8B, v16.8B, v20.8B
|
||||
urhadd v17.8B, v17.8B, v21.8B
|
||||
.endif
|
||||
subs w3, w3, #2
|
||||
st1 {v16.8b}, [x0], x2
|
||||
st1 {v17.8b}, [x0], x2
|
||||
st1 {v16.8B}, [x0], x2
|
||||
st1 {v17.8B}, [x0], x2
|
||||
b.gt 3b
|
||||
ret
|
||||
|
||||
4: ld1 {v4.8b, v5.8b}, [x1], x2
|
||||
ld1 {v6.8b, v7.8b}, [x1], x2
|
||||
ext v5.8b, v4.8b, v5.8b, #1
|
||||
ext v7.8b, v6.8b, v7.8b, #1
|
||||
4: ld1 {v4.8B, v5.8B}, [x1], x2
|
||||
ld1 {v6.8B, v7.8B}, [x1], x2
|
||||
ext v5.8B, v4.8B, v5.8B, #1
|
||||
ext v7.8B, v6.8B, v7.8B, #1
|
||||
prfm pldl1strm, [x1]
|
||||
subs w3, w3, #2
|
||||
umull v16.8h, v4.8b, v0.8b
|
||||
umlal v16.8h, v5.8b, v1.8b
|
||||
umull v17.8h, v6.8b, v0.8b
|
||||
umlal v17.8h, v7.8b, v1.8b
|
||||
umull v16.8H, v4.8B, v0.8B
|
||||
umlal v16.8H, v5.8B, v1.8B
|
||||
umull v17.8H, v6.8B, v0.8B
|
||||
umlal v17.8H, v7.8B, v1.8B
|
||||
prfm pldl1strm, [x1, x2]
|
||||
.ifc \codec,h264
|
||||
rshrn v16.8b, v16.8h, #6
|
||||
rshrn v17.8b, v17.8h, #6
|
||||
rshrn v16.8B, v16.8H, #6
|
||||
rshrn v17.8B, v17.8H, #6
|
||||
.else
|
||||
add v16.8h, v16.8h, v22.8h
|
||||
add v17.8h, v17.8h, v22.8h
|
||||
shrn v16.8b, v16.8h, #6
|
||||
shrn v17.8b, v17.8h, #6
|
||||
add v16.8H, v16.8H, v22.8H
|
||||
add v17.8H, v17.8H, v22.8H
|
||||
shrn v16.8B, v16.8H, #6
|
||||
shrn v17.8B, v17.8H, #6
|
||||
.endif
|
||||
.ifc \type,avg
|
||||
ld1 {v20.8b}, [x8], x2
|
||||
ld1 {v21.8b}, [x8], x2
|
||||
urhadd v16.8b, v16.8b, v20.8b
|
||||
urhadd v17.8b, v17.8b, v21.8b
|
||||
ld1 {v20.8B}, [x8], x2
|
||||
ld1 {v21.8B}, [x8], x2
|
||||
urhadd v16.8B, v16.8B, v20.8B
|
||||
urhadd v17.8B, v17.8B, v21.8B
|
||||
.endif
|
||||
st1 {v16.8b}, [x0], x2
|
||||
st1 {v17.8b}, [x0], x2
|
||||
st1 {v16.8B}, [x0], x2
|
||||
st1 {v17.8B}, [x0], x2
|
||||
b.gt 4b
|
||||
ret
|
||||
|
||||
5: ld1 {v4.8b}, [x1], x2
|
||||
ld1 {v5.8b}, [x1], x2
|
||||
5: ld1 {v4.8B}, [x1], x2
|
||||
ld1 {v5.8B}, [x1], x2
|
||||
prfm pldl1strm, [x1]
|
||||
subs w3, w3, #2
|
||||
umull v16.8h, v4.8b, v0.8b
|
||||
umull v17.8h, v5.8b, v0.8b
|
||||
umull v16.8H, v4.8B, v0.8B
|
||||
umull v17.8H, v5.8B, v0.8B
|
||||
prfm pldl1strm, [x1, x2]
|
||||
.ifc \codec,h264
|
||||
rshrn v16.8b, v16.8h, #6
|
||||
rshrn v17.8b, v17.8h, #6
|
||||
rshrn v16.8B, v16.8H, #6
|
||||
rshrn v17.8B, v17.8H, #6
|
||||
.else
|
||||
add v16.8h, v16.8h, v22.8h
|
||||
add v17.8h, v17.8h, v22.8h
|
||||
shrn v16.8b, v16.8h, #6
|
||||
shrn v17.8b, v17.8h, #6
|
||||
add v16.8H, v16.8H, v22.8H
|
||||
add v17.8H, v17.8H, v22.8H
|
||||
shrn v16.8B, v16.8H, #6
|
||||
shrn v17.8B, v17.8H, #6
|
||||
.endif
|
||||
.ifc \type,avg
|
||||
ld1 {v20.8b}, [x8], x2
|
||||
ld1 {v21.8b}, [x8], x2
|
||||
urhadd v16.8b, v16.8b, v20.8b
|
||||
urhadd v17.8b, v17.8b, v21.8b
|
||||
ld1 {v20.8B}, [x8], x2
|
||||
ld1 {v21.8B}, [x8], x2
|
||||
urhadd v16.8B, v16.8B, v20.8B
|
||||
urhadd v17.8B, v17.8B, v21.8B
|
||||
.endif
|
||||
st1 {v16.8b}, [x0], x2
|
||||
st1 {v17.8b}, [x0], x2
|
||||
st1 {v16.8B}, [x0], x2
|
||||
st1 {v17.8B}, [x0], x2
|
||||
b.gt 5b
|
||||
ret
|
||||
endfunc
|
||||
@@ -206,11 +206,11 @@ function ff_\type\()_\codec\()_chroma_mc4_neon, export=1
|
||||
lsl w9, w9, #3
|
||||
lsl w10, w10, #1
|
||||
add w9, w9, w10
|
||||
add x6, x6, w9, uxtw
|
||||
ld1r {v22.8h}, [x6]
|
||||
add x6, x6, w9, UXTW
|
||||
ld1r {v22.8H}, [x6]
|
||||
.endif
|
||||
.ifc \codec,vc1
|
||||
movi v22.8h, #28
|
||||
movi v22.8H, #28
|
||||
.endif
|
||||
mul w7, w4, w5
|
||||
lsl w14, w5, #3
|
||||
@@ -223,133 +223,133 @@ function ff_\type\()_\codec\()_chroma_mc4_neon, export=1
|
||||
add w4, w4, #64
|
||||
b.eq 2f
|
||||
|
||||
dup v24.8b, w4
|
||||
dup v25.8b, w12
|
||||
ld1 {v4.8b}, [x1], x2
|
||||
dup v26.8b, w6
|
||||
dup v27.8b, w7
|
||||
ext v5.8b, v4.8b, v5.8b, #1
|
||||
trn1 v0.2s, v24.2s, v25.2s
|
||||
trn1 v2.2s, v26.2s, v27.2s
|
||||
trn1 v4.2s, v4.2s, v5.2s
|
||||
1: ld1 {v6.8b}, [x1], x2
|
||||
ext v7.8b, v6.8b, v7.8b, #1
|
||||
trn1 v6.2s, v6.2s, v7.2s
|
||||
umull v18.8h, v4.8b, v0.8b
|
||||
umlal v18.8h, v6.8b, v2.8b
|
||||
ld1 {v4.8b}, [x1], x2
|
||||
ext v5.8b, v4.8b, v5.8b, #1
|
||||
trn1 v4.2s, v4.2s, v5.2s
|
||||
dup v24.8B, w4
|
||||
dup v25.8B, w12
|
||||
ld1 {v4.8B}, [x1], x2
|
||||
dup v26.8B, w6
|
||||
dup v27.8B, w7
|
||||
ext v5.8B, v4.8B, v5.8B, #1
|
||||
trn1 v0.2S, v24.2S, v25.2S
|
||||
trn1 v2.2S, v26.2S, v27.2S
|
||||
trn1 v4.2S, v4.2S, v5.2S
|
||||
1: ld1 {v6.8B}, [x1], x2
|
||||
ext v7.8B, v6.8B, v7.8B, #1
|
||||
trn1 v6.2S, v6.2S, v7.2S
|
||||
umull v18.8H, v4.8B, v0.8B
|
||||
umlal v18.8H, v6.8B, v2.8B
|
||||
ld1 {v4.8B}, [x1], x2
|
||||
ext v5.8B, v4.8B, v5.8B, #1
|
||||
trn1 v4.2S, v4.2S, v5.2S
|
||||
prfm pldl1strm, [x1]
|
||||
umull v19.8h, v6.8b, v0.8b
|
||||
umlal v19.8h, v4.8b, v2.8b
|
||||
trn1 v30.2d, v18.2d, v19.2d
|
||||
trn2 v31.2d, v18.2d, v19.2d
|
||||
add v18.8h, v30.8h, v31.8h
|
||||
umull v19.8H, v6.8B, v0.8B
|
||||
umlal v19.8H, v4.8B, v2.8B
|
||||
trn1 v30.2D, v18.2D, v19.2D
|
||||
trn2 v31.2D, v18.2D, v19.2D
|
||||
add v18.8H, v30.8H, v31.8H
|
||||
.ifc \codec,h264
|
||||
rshrn v16.8b, v18.8h, #6
|
||||
rshrn v16.8B, v18.8H, #6
|
||||
.else
|
||||
add v18.8h, v18.8h, v22.8h
|
||||
shrn v16.8b, v18.8h, #6
|
||||
add v18.8H, v18.8H, v22.8H
|
||||
shrn v16.8B, v18.8H, #6
|
||||
.endif
|
||||
subs w3, w3, #2
|
||||
prfm pldl1strm, [x1, x2]
|
||||
.ifc \type,avg
|
||||
ld1 {v20.s}[0], [x8], x2
|
||||
ld1 {v20.s}[1], [x8], x2
|
||||
urhadd v16.8b, v16.8b, v20.8b
|
||||
ld1 {v20.S}[0], [x8], x2
|
||||
ld1 {v20.S}[1], [x8], x2
|
||||
urhadd v16.8B, v16.8B, v20.8B
|
||||
.endif
|
||||
st1 {v16.s}[0], [x0], x2
|
||||
st1 {v16.s}[1], [x0], x2
|
||||
st1 {v16.S}[0], [x0], x2
|
||||
st1 {v16.S}[1], [x0], x2
|
||||
b.gt 1b
|
||||
ret
|
||||
|
||||
2: adds w12, w12, w6
|
||||
dup v30.8b, w4
|
||||
dup v30.8B, w4
|
||||
b.eq 5f
|
||||
tst w6, w6
|
||||
dup v31.8b, w12
|
||||
trn1 v0.2s, v30.2s, v31.2s
|
||||
trn2 v1.2s, v30.2s, v31.2s
|
||||
dup v31.8B, w12
|
||||
trn1 v0.2S, v30.2S, v31.2S
|
||||
trn2 v1.2S, v30.2S, v31.2S
|
||||
b.eq 4f
|
||||
|
||||
ext v1.8b, v0.8b, v1.8b, #4
|
||||
ld1 {v4.s}[0], [x1], x2
|
||||
3: ld1 {v4.s}[1], [x1], x2
|
||||
umull v18.8h, v4.8b, v0.8b
|
||||
ld1 {v4.s}[0], [x1], x2
|
||||
umull v19.8h, v4.8b, v1.8b
|
||||
trn1 v30.2d, v18.2d, v19.2d
|
||||
trn2 v31.2d, v18.2d, v19.2d
|
||||
add v18.8h, v30.8h, v31.8h
|
||||
ext v1.8B, v0.8B, v1.8B, #4
|
||||
ld1 {v4.S}[0], [x1], x2
|
||||
3: ld1 {v4.S}[1], [x1], x2
|
||||
umull v18.8H, v4.8B, v0.8B
|
||||
ld1 {v4.S}[0], [x1], x2
|
||||
umull v19.8H, v4.8B, v1.8B
|
||||
trn1 v30.2D, v18.2D, v19.2D
|
||||
trn2 v31.2D, v18.2D, v19.2D
|
||||
add v18.8H, v30.8H, v31.8H
|
||||
prfm pldl1strm, [x1]
|
||||
.ifc \codec,h264
|
||||
rshrn v16.8b, v18.8h, #6
|
||||
rshrn v16.8B, v18.8H, #6
|
||||
.else
|
||||
add v18.8h, v18.8h, v22.8h
|
||||
shrn v16.8b, v18.8h, #6
|
||||
add v18.8H, v18.8H, v22.8H
|
||||
shrn v16.8B, v18.8H, #6
|
||||
.endif
|
||||
.ifc \type,avg
|
||||
ld1 {v20.s}[0], [x8], x2
|
||||
ld1 {v20.s}[1], [x8], x2
|
||||
urhadd v16.8b, v16.8b, v20.8b
|
||||
ld1 {v20.S}[0], [x8], x2
|
||||
ld1 {v20.S}[1], [x8], x2
|
||||
urhadd v16.8B, v16.8B, v20.8B
|
||||
.endif
|
||||
subs w3, w3, #2
|
||||
prfm pldl1strm, [x1, x2]
|
||||
st1 {v16.s}[0], [x0], x2
|
||||
st1 {v16.s}[1], [x0], x2
|
||||
st1 {v16.S}[0], [x0], x2
|
||||
st1 {v16.S}[1], [x0], x2
|
||||
b.gt 3b
|
||||
ret
|
||||
|
||||
4: ld1 {v4.8b}, [x1], x2
|
||||
ld1 {v6.8b}, [x1], x2
|
||||
ext v5.8b, v4.8b, v5.8b, #1
|
||||
ext v7.8b, v6.8b, v7.8b, #1
|
||||
trn1 v4.2s, v4.2s, v5.2s
|
||||
trn1 v6.2s, v6.2s, v7.2s
|
||||
umull v18.8h, v4.8b, v0.8b
|
||||
umull v19.8h, v6.8b, v0.8b
|
||||
4: ld1 {v4.8B}, [x1], x2
|
||||
ld1 {v6.8B}, [x1], x2
|
||||
ext v5.8B, v4.8B, v5.8B, #1
|
||||
ext v7.8B, v6.8B, v7.8B, #1
|
||||
trn1 v4.2S, v4.2S, v5.2S
|
||||
trn1 v6.2S, v6.2S, v7.2S
|
||||
umull v18.8H, v4.8B, v0.8B
|
||||
umull v19.8H, v6.8B, v0.8B
|
||||
subs w3, w3, #2
|
||||
trn1 v30.2d, v18.2d, v19.2d
|
||||
trn2 v31.2d, v18.2d, v19.2d
|
||||
add v18.8h, v30.8h, v31.8h
|
||||
trn1 v30.2D, v18.2D, v19.2D
|
||||
trn2 v31.2D, v18.2D, v19.2D
|
||||
add v18.8H, v30.8H, v31.8H
|
||||
prfm pldl1strm, [x1]
|
||||
.ifc \codec,h264
|
||||
rshrn v16.8b, v18.8h, #6
|
||||
rshrn v16.8B, v18.8H, #6
|
||||
.else
|
||||
add v18.8h, v18.8h, v22.8h
|
||||
shrn v16.8b, v18.8h, #6
|
||||
add v18.8H, v18.8H, v22.8H
|
||||
shrn v16.8B, v18.8H, #6
|
||||
.endif
|
||||
.ifc \type,avg
|
||||
ld1 {v20.s}[0], [x8], x2
|
||||
ld1 {v20.s}[1], [x8], x2
|
||||
urhadd v16.8b, v16.8b, v20.8b
|
||||
ld1 {v20.S}[0], [x8], x2
|
||||
ld1 {v20.S}[1], [x8], x2
|
||||
urhadd v16.8B, v16.8B, v20.8B
|
||||
.endif
|
||||
prfm pldl1strm, [x1]
|
||||
st1 {v16.s}[0], [x0], x2
|
||||
st1 {v16.s}[1], [x0], x2
|
||||
st1 {v16.S}[0], [x0], x2
|
||||
st1 {v16.S}[1], [x0], x2
|
||||
b.gt 4b
|
||||
ret
|
||||
|
||||
5: ld1 {v4.s}[0], [x1], x2
|
||||
ld1 {v4.s}[1], [x1], x2
|
||||
umull v18.8h, v4.8b, v30.8b
|
||||
5: ld1 {v4.S}[0], [x1], x2
|
||||
ld1 {v4.S}[1], [x1], x2
|
||||
umull v18.8H, v4.8B, v30.8B
|
||||
subs w3, w3, #2
|
||||
prfm pldl1strm, [x1]
|
||||
.ifc \codec,h264
|
||||
rshrn v16.8b, v18.8h, #6
|
||||
rshrn v16.8B, v18.8H, #6
|
||||
.else
|
||||
add v18.8h, v18.8h, v22.8h
|
||||
shrn v16.8b, v18.8h, #6
|
||||
add v18.8H, v18.8H, v22.8H
|
||||
shrn v16.8B, v18.8H, #6
|
||||
.endif
|
||||
.ifc \type,avg
|
||||
ld1 {v20.s}[0], [x8], x2
|
||||
ld1 {v20.s}[1], [x8], x2
|
||||
urhadd v16.8b, v16.8b, v20.8b
|
||||
ld1 {v20.S}[0], [x8], x2
|
||||
ld1 {v20.S}[1], [x8], x2
|
||||
urhadd v16.8B, v16.8B, v20.8B
|
||||
.endif
|
||||
prfm pldl1strm, [x1]
|
||||
st1 {v16.s}[0], [x0], x2
|
||||
st1 {v16.s}[1], [x0], x2
|
||||
st1 {v16.S}[0], [x0], x2
|
||||
st1 {v16.S}[1], [x0], x2
|
||||
b.gt 5b
|
||||
ret
|
||||
endfunc
|
||||
@@ -370,51 +370,51 @@ function ff_\type\()_h264_chroma_mc2_neon, export=1
|
||||
sub w4, w7, w13
|
||||
sub w4, w4, w14
|
||||
add w4, w4, #64
|
||||
dup v0.8b, w4
|
||||
dup v2.8b, w12
|
||||
dup v1.8b, w6
|
||||
dup v3.8b, w7
|
||||
trn1 v0.4h, v0.4h, v2.4h
|
||||
trn1 v1.4h, v1.4h, v3.4h
|
||||
dup v0.8B, w4
|
||||
dup v2.8B, w12
|
||||
dup v1.8B, w6
|
||||
dup v3.8B, w7
|
||||
trn1 v0.4H, v0.4H, v2.4H
|
||||
trn1 v1.4H, v1.4H, v3.4H
|
||||
1:
|
||||
ld1 {v4.s}[0], [x1], x2
|
||||
ld1 {v4.s}[1], [x1], x2
|
||||
rev64 v5.2s, v4.2s
|
||||
ld1 {v5.s}[1], [x1]
|
||||
ext v6.8b, v4.8b, v5.8b, #1
|
||||
ext v7.8b, v5.8b, v4.8b, #1
|
||||
trn1 v4.4h, v4.4h, v6.4h
|
||||
trn1 v5.4h, v5.4h, v7.4h
|
||||
umull v16.8h, v4.8b, v0.8b
|
||||
umlal v16.8h, v5.8b, v1.8b
|
||||
ld1 {v4.S}[0], [x1], x2
|
||||
ld1 {v4.S}[1], [x1], x2
|
||||
rev64 v5.2S, v4.2S
|
||||
ld1 {v5.S}[1], [x1]
|
||||
ext v6.8B, v4.8B, v5.8B, #1
|
||||
ext v7.8B, v5.8B, v4.8B, #1
|
||||
trn1 v4.4H, v4.4H, v6.4H
|
||||
trn1 v5.4H, v5.4H, v7.4H
|
||||
umull v16.8H, v4.8B, v0.8B
|
||||
umlal v16.8H, v5.8B, v1.8B
|
||||
.ifc \type,avg
|
||||
ld1 {v18.h}[0], [x0], x2
|
||||
ld1 {v18.h}[2], [x0]
|
||||
ld1 {v18.H}[0], [x0], x2
|
||||
ld1 {v18.H}[2], [x0]
|
||||
sub x0, x0, x2
|
||||
.endif
|
||||
rev64 v17.4s, v16.4s
|
||||
add v16.8h, v16.8h, v17.8h
|
||||
rshrn v16.8b, v16.8h, #6
|
||||
rev64 v17.4S, v16.4S
|
||||
add v16.8H, v16.8H, v17.8H
|
||||
rshrn v16.8B, v16.8H, #6
|
||||
.ifc \type,avg
|
||||
urhadd v16.8b, v16.8b, v18.8b
|
||||
urhadd v16.8B, v16.8B, v18.8B
|
||||
.endif
|
||||
st1 {v16.h}[0], [x0], x2
|
||||
st1 {v16.h}[2], [x0], x2
|
||||
st1 {v16.H}[0], [x0], x2
|
||||
st1 {v16.H}[2], [x0], x2
|
||||
subs w3, w3, #2
|
||||
b.gt 1b
|
||||
ret
|
||||
|
||||
2:
|
||||
ld1 {v16.h}[0], [x1], x2
|
||||
ld1 {v16.h}[1], [x1], x2
|
||||
ld1 {v16.H}[0], [x1], x2
|
||||
ld1 {v16.H}[1], [x1], x2
|
||||
.ifc \type,avg
|
||||
ld1 {v18.h}[0], [x0], x2
|
||||
ld1 {v18.h}[1], [x0]
|
||||
ld1 {v18.H}[0], [x0], x2
|
||||
ld1 {v18.H}[1], [x0]
|
||||
sub x0, x0, x2
|
||||
urhadd v16.8b, v16.8b, v18.8b
|
||||
urhadd v16.8B, v16.8B, v18.8B
|
||||
.endif
|
||||
st1 {v16.h}[0], [x0], x2
|
||||
st1 {v16.h}[1], [x0], x2
|
||||
st1 {v16.H}[0], [x0], x2
|
||||
st1 {v16.H}[1], [x0], x2
|
||||
subs w3, w3, #2
|
||||
b.gt 2b
|
||||
ret
|
||||
|
||||
+531
-531
File diff suppressed because it is too large
Load Diff
+272
-272
@@ -27,114 +27,114 @@
|
||||
.macro lowpass_const r
|
||||
movz \r, #20, lsl #16
|
||||
movk \r, #5
|
||||
mov v6.s[0], \r
|
||||
mov v6.S[0], \r
|
||||
.endm
|
||||
|
||||
//trashes v0-v5
|
||||
.macro lowpass_8 r0, r1, r2, r3, d0, d1, narrow=1
|
||||
ext v2.8b, \r0\().8b, \r1\().8b, #2
|
||||
ext v3.8b, \r0\().8b, \r1\().8b, #3
|
||||
uaddl v2.8h, v2.8b, v3.8b
|
||||
ext v4.8b, \r0\().8b, \r1\().8b, #1
|
||||
ext v5.8b, \r0\().8b, \r1\().8b, #4
|
||||
uaddl v4.8h, v4.8b, v5.8b
|
||||
ext v1.8b, \r0\().8b, \r1\().8b, #5
|
||||
uaddl \d0\().8h, \r0\().8b, v1.8b
|
||||
ext v0.8b, \r2\().8b, \r3\().8b, #2
|
||||
mla \d0\().8h, v2.8h, v6.h[1]
|
||||
ext v1.8b, \r2\().8b, \r3\().8b, #3
|
||||
uaddl v0.8h, v0.8b, v1.8b
|
||||
ext v1.8b, \r2\().8b, \r3\().8b, #1
|
||||
mls \d0\().8h, v4.8h, v6.h[0]
|
||||
ext v3.8b, \r2\().8b, \r3\().8b, #4
|
||||
uaddl v1.8h, v1.8b, v3.8b
|
||||
ext v2.8b, \r2\().8b, \r3\().8b, #5
|
||||
uaddl \d1\().8h, \r2\().8b, v2.8b
|
||||
mla \d1\().8h, v0.8h, v6.h[1]
|
||||
mls \d1\().8h, v1.8h, v6.h[0]
|
||||
ext v2.8B, \r0\().8B, \r1\().8B, #2
|
||||
ext v3.8B, \r0\().8B, \r1\().8B, #3
|
||||
uaddl v2.8H, v2.8B, v3.8B
|
||||
ext v4.8B, \r0\().8B, \r1\().8B, #1
|
||||
ext v5.8B, \r0\().8B, \r1\().8B, #4
|
||||
uaddl v4.8H, v4.8B, v5.8B
|
||||
ext v1.8B, \r0\().8B, \r1\().8B, #5
|
||||
uaddl \d0\().8H, \r0\().8B, v1.8B
|
||||
ext v0.8B, \r2\().8B, \r3\().8B, #2
|
||||
mla \d0\().8H, v2.8H, v6.H[1]
|
||||
ext v1.8B, \r2\().8B, \r3\().8B, #3
|
||||
uaddl v0.8H, v0.8B, v1.8B
|
||||
ext v1.8B, \r2\().8B, \r3\().8B, #1
|
||||
mls \d0\().8H, v4.8H, v6.H[0]
|
||||
ext v3.8B, \r2\().8B, \r3\().8B, #4
|
||||
uaddl v1.8H, v1.8B, v3.8B
|
||||
ext v2.8B, \r2\().8B, \r3\().8B, #5
|
||||
uaddl \d1\().8H, \r2\().8B, v2.8B
|
||||
mla \d1\().8H, v0.8H, v6.H[1]
|
||||
mls \d1\().8H, v1.8H, v6.H[0]
|
||||
.if \narrow
|
||||
sqrshrun \d0\().8b, \d0\().8h, #5
|
||||
sqrshrun \d1\().8b, \d1\().8h, #5
|
||||
sqrshrun \d0\().8B, \d0\().8H, #5
|
||||
sqrshrun \d1\().8B, \d1\().8H, #5
|
||||
.endif
|
||||
.endm
|
||||
|
||||
//trashes v0-v5, v7, v30-v31
|
||||
.macro lowpass_8H r0, r1
|
||||
ext v0.16b, \r0\().16b, \r0\().16b, #2
|
||||
ext v1.16b, \r0\().16b, \r0\().16b, #3
|
||||
uaddl v0.8h, v0.8b, v1.8b
|
||||
ext v2.16b, \r0\().16b, \r0\().16b, #1
|
||||
ext v3.16b, \r0\().16b, \r0\().16b, #4
|
||||
uaddl v2.8h, v2.8b, v3.8b
|
||||
ext v30.16b, \r0\().16b, \r0\().16b, #5
|
||||
uaddl \r0\().8h, \r0\().8b, v30.8b
|
||||
ext v4.16b, \r1\().16b, \r1\().16b, #2
|
||||
mla \r0\().8h, v0.8h, v6.h[1]
|
||||
ext v5.16b, \r1\().16b, \r1\().16b, #3
|
||||
uaddl v4.8h, v4.8b, v5.8b
|
||||
ext v7.16b, \r1\().16b, \r1\().16b, #1
|
||||
mls \r0\().8h, v2.8h, v6.h[0]
|
||||
ext v0.16b, \r1\().16b, \r1\().16b, #4
|
||||
uaddl v7.8h, v7.8b, v0.8b
|
||||
ext v31.16b, \r1\().16b, \r1\().16b, #5
|
||||
uaddl \r1\().8h, \r1\().8b, v31.8b
|
||||
mla \r1\().8h, v4.8h, v6.h[1]
|
||||
mls \r1\().8h, v7.8h, v6.h[0]
|
||||
ext v0.16B, \r0\().16B, \r0\().16B, #2
|
||||
ext v1.16B, \r0\().16B, \r0\().16B, #3
|
||||
uaddl v0.8H, v0.8B, v1.8B
|
||||
ext v2.16B, \r0\().16B, \r0\().16B, #1
|
||||
ext v3.16B, \r0\().16B, \r0\().16B, #4
|
||||
uaddl v2.8H, v2.8B, v3.8B
|
||||
ext v30.16B, \r0\().16B, \r0\().16B, #5
|
||||
uaddl \r0\().8H, \r0\().8B, v30.8B
|
||||
ext v4.16B, \r1\().16B, \r1\().16B, #2
|
||||
mla \r0\().8H, v0.8H, v6.H[1]
|
||||
ext v5.16B, \r1\().16B, \r1\().16B, #3
|
||||
uaddl v4.8H, v4.8B, v5.8B
|
||||
ext v7.16B, \r1\().16B, \r1\().16B, #1
|
||||
mls \r0\().8H, v2.8H, v6.H[0]
|
||||
ext v0.16B, \r1\().16B, \r1\().16B, #4
|
||||
uaddl v7.8H, v7.8B, v0.8B
|
||||
ext v31.16B, \r1\().16B, \r1\().16B, #5
|
||||
uaddl \r1\().8H, \r1\().8B, v31.8B
|
||||
mla \r1\().8H, v4.8H, v6.H[1]
|
||||
mls \r1\().8H, v7.8H, v6.H[0]
|
||||
.endm
|
||||
|
||||
// trashes v2-v5, v30
|
||||
.macro lowpass_8_1 r0, r1, d0, narrow=1
|
||||
ext v2.8b, \r0\().8b, \r1\().8b, #2
|
||||
ext v3.8b, \r0\().8b, \r1\().8b, #3
|
||||
uaddl v2.8h, v2.8b, v3.8b
|
||||
ext v4.8b, \r0\().8b, \r1\().8b, #1
|
||||
ext v5.8b, \r0\().8b, \r1\().8b, #4
|
||||
uaddl v4.8h, v4.8b, v5.8b
|
||||
ext v30.8b, \r0\().8b, \r1\().8b, #5
|
||||
uaddl \d0\().8h, \r0\().8b, v30.8b
|
||||
mla \d0\().8h, v2.8h, v6.h[1]
|
||||
mls \d0\().8h, v4.8h, v6.h[0]
|
||||
ext v2.8B, \r0\().8B, \r1\().8B, #2
|
||||
ext v3.8B, \r0\().8B, \r1\().8B, #3
|
||||
uaddl v2.8H, v2.8B, v3.8B
|
||||
ext v4.8B, \r0\().8B, \r1\().8B, #1
|
||||
ext v5.8B, \r0\().8B, \r1\().8B, #4
|
||||
uaddl v4.8H, v4.8B, v5.8B
|
||||
ext v30.8B, \r0\().8B, \r1\().8B, #5
|
||||
uaddl \d0\().8H, \r0\().8B, v30.8B
|
||||
mla \d0\().8H, v2.8H, v6.H[1]
|
||||
mls \d0\().8H, v4.8H, v6.H[0]
|
||||
.if \narrow
|
||||
sqrshrun \d0\().8b, \d0\().8h, #5
|
||||
sqrshrun \d0\().8B, \d0\().8H, #5
|
||||
.endif
|
||||
.endm
|
||||
|
||||
// trashed v0-v7
|
||||
.macro lowpass_8.16 r0, r1, r2
|
||||
ext v1.16b, \r0\().16b, \r1\().16b, #4
|
||||
ext v0.16b, \r0\().16b, \r1\().16b, #6
|
||||
saddl v5.4s, v1.4h, v0.4h
|
||||
ext v2.16b, \r0\().16b, \r1\().16b, #2
|
||||
saddl2 v1.4s, v1.8h, v0.8h
|
||||
ext v3.16b, \r0\().16b, \r1\().16b, #8
|
||||
saddl v6.4s, v2.4h, v3.4h
|
||||
ext \r1\().16b, \r0\().16b, \r1\().16b, #10
|
||||
saddl2 v2.4s, v2.8h, v3.8h
|
||||
saddl v0.4s, \r0\().4h, \r1\().4h
|
||||
saddl2 v4.4s, \r0\().8h, \r1\().8h
|
||||
ext v1.16B, \r0\().16B, \r1\().16B, #4
|
||||
ext v0.16B, \r0\().16B, \r1\().16B, #6
|
||||
saddl v5.4S, v1.4H, v0.4H
|
||||
ext v2.16B, \r0\().16B, \r1\().16B, #2
|
||||
saddl2 v1.4S, v1.8H, v0.8H
|
||||
ext v3.16B, \r0\().16B, \r1\().16B, #8
|
||||
saddl v6.4S, v2.4H, v3.4H
|
||||
ext \r1\().16B, \r0\().16B, \r1\().16B, #10
|
||||
saddl2 v2.4S, v2.8H, v3.8H
|
||||
saddl v0.4S, \r0\().4H, \r1\().4H
|
||||
saddl2 v4.4S, \r0\().8H, \r1\().8H
|
||||
|
||||
shl v3.4s, v5.4s, #4
|
||||
shl v5.4s, v5.4s, #2
|
||||
shl v7.4s, v6.4s, #2
|
||||
add v5.4s, v5.4s, v3.4s
|
||||
add v6.4s, v6.4s, v7.4s
|
||||
shl v3.4S, v5.4S, #4
|
||||
shl v5.4S, v5.4S, #2
|
||||
shl v7.4S, v6.4S, #2
|
||||
add v5.4S, v5.4S, v3.4S
|
||||
add v6.4S, v6.4S, v7.4S
|
||||
|
||||
shl v3.4s, v1.4s, #4
|
||||
shl v1.4s, v1.4s, #2
|
||||
shl v7.4s, v2.4s, #2
|
||||
add v1.4s, v1.4s, v3.4s
|
||||
add v2.4s, v2.4s, v7.4s
|
||||
shl v3.4S, v1.4S, #4
|
||||
shl v1.4S, v1.4S, #2
|
||||
shl v7.4S, v2.4S, #2
|
||||
add v1.4S, v1.4S, v3.4S
|
||||
add v2.4S, v2.4S, v7.4S
|
||||
|
||||
add v5.4s, v5.4s, v0.4s
|
||||
sub v5.4s, v5.4s, v6.4s
|
||||
add v5.4S, v5.4S, v0.4S
|
||||
sub v5.4S, v5.4S, v6.4S
|
||||
|
||||
add v1.4s, v1.4s, v4.4s
|
||||
sub v1.4s, v1.4s, v2.4s
|
||||
add v1.4S, v1.4S, v4.4S
|
||||
sub v1.4S, v1.4S, v2.4S
|
||||
|
||||
rshrn v5.4h, v5.4s, #10
|
||||
rshrn2 v5.8h, v1.4s, #10
|
||||
rshrn v5.4H, v5.4S, #10
|
||||
rshrn2 v5.8H, v1.4S, #10
|
||||
|
||||
sqxtun \r2\().8b, v5.8h
|
||||
sqxtun \r2\().8B, v5.8H
|
||||
.endm
|
||||
|
||||
function put_h264_qpel16_h_lowpass_neon_packed
|
||||
@@ -163,19 +163,19 @@ function \type\()_h264_qpel16_h_lowpass_neon
|
||||
endfunc
|
||||
|
||||
function \type\()_h264_qpel8_h_lowpass_neon
|
||||
1: ld1 {v28.8b, v29.8b}, [x1], x2
|
||||
ld1 {v16.8b, v17.8b}, [x1], x2
|
||||
1: ld1 {v28.8B, v29.8B}, [x1], x2
|
||||
ld1 {v16.8B, v17.8B}, [x1], x2
|
||||
subs x12, x12, #2
|
||||
lowpass_8 v28, v29, v16, v17, v28, v16
|
||||
.ifc \type,avg
|
||||
ld1 {v2.8b}, [x0], x3
|
||||
urhadd v28.8b, v28.8b, v2.8b
|
||||
ld1 {v3.8b}, [x0]
|
||||
urhadd v16.8b, v16.8b, v3.8b
|
||||
ld1 {v2.8B}, [x0], x3
|
||||
urhadd v28.8B, v28.8B, v2.8B
|
||||
ld1 {v3.8B}, [x0]
|
||||
urhadd v16.8B, v16.8B, v3.8B
|
||||
sub x0, x0, x3
|
||||
.endif
|
||||
st1 {v28.8b}, [x0], x3
|
||||
st1 {v16.8b}, [x0], x3
|
||||
st1 {v28.8B}, [x0], x3
|
||||
st1 {v16.8B}, [x0], x3
|
||||
b.ne 1b
|
||||
ret
|
||||
endfunc
|
||||
@@ -200,23 +200,23 @@ function \type\()_h264_qpel16_h_lowpass_l2_neon
|
||||
endfunc
|
||||
|
||||
function \type\()_h264_qpel8_h_lowpass_l2_neon
|
||||
1: ld1 {v26.8b, v27.8b}, [x1], x2
|
||||
ld1 {v16.8b, v17.8b}, [x1], x2
|
||||
ld1 {v28.8b}, [x3], x2
|
||||
ld1 {v29.8b}, [x3], x2
|
||||
1: ld1 {v26.8B, v27.8B}, [x1], x2
|
||||
ld1 {v16.8B, v17.8B}, [x1], x2
|
||||
ld1 {v28.8B}, [x3], x2
|
||||
ld1 {v29.8B}, [x3], x2
|
||||
subs x12, x12, #2
|
||||
lowpass_8 v26, v27, v16, v17, v26, v27
|
||||
urhadd v26.8b, v26.8b, v28.8b
|
||||
urhadd v27.8b, v27.8b, v29.8b
|
||||
urhadd v26.8B, v26.8B, v28.8B
|
||||
urhadd v27.8B, v27.8B, v29.8B
|
||||
.ifc \type,avg
|
||||
ld1 {v2.8b}, [x0], x2
|
||||
urhadd v26.8b, v26.8b, v2.8b
|
||||
ld1 {v3.8b}, [x0]
|
||||
urhadd v27.8b, v27.8b, v3.8b
|
||||
ld1 {v2.8B}, [x0], x2
|
||||
urhadd v26.8B, v26.8B, v2.8B
|
||||
ld1 {v3.8B}, [x0]
|
||||
urhadd v27.8B, v27.8B, v3.8B
|
||||
sub x0, x0, x2
|
||||
.endif
|
||||
st1 {v26.8b}, [x0], x2
|
||||
st1 {v27.8b}, [x0], x2
|
||||
st1 {v26.8B}, [x0], x2
|
||||
st1 {v27.8B}, [x0], x2
|
||||
b.ne 1b
|
||||
ret
|
||||
endfunc
|
||||
@@ -257,19 +257,19 @@ function \type\()_h264_qpel16_v_lowpass_neon
|
||||
endfunc
|
||||
|
||||
function \type\()_h264_qpel8_v_lowpass_neon
|
||||
ld1 {v16.8b}, [x1], x3
|
||||
ld1 {v18.8b}, [x1], x3
|
||||
ld1 {v20.8b}, [x1], x3
|
||||
ld1 {v22.8b}, [x1], x3
|
||||
ld1 {v24.8b}, [x1], x3
|
||||
ld1 {v26.8b}, [x1], x3
|
||||
ld1 {v28.8b}, [x1], x3
|
||||
ld1 {v30.8b}, [x1], x3
|
||||
ld1 {v17.8b}, [x1], x3
|
||||
ld1 {v19.8b}, [x1], x3
|
||||
ld1 {v21.8b}, [x1], x3
|
||||
ld1 {v23.8b}, [x1], x3
|
||||
ld1 {v25.8b}, [x1]
|
||||
ld1 {v16.8B}, [x1], x3
|
||||
ld1 {v18.8B}, [x1], x3
|
||||
ld1 {v20.8B}, [x1], x3
|
||||
ld1 {v22.8B}, [x1], x3
|
||||
ld1 {v24.8B}, [x1], x3
|
||||
ld1 {v26.8B}, [x1], x3
|
||||
ld1 {v28.8B}, [x1], x3
|
||||
ld1 {v30.8B}, [x1], x3
|
||||
ld1 {v17.8B}, [x1], x3
|
||||
ld1 {v19.8B}, [x1], x3
|
||||
ld1 {v21.8B}, [x1], x3
|
||||
ld1 {v23.8B}, [x1], x3
|
||||
ld1 {v25.8B}, [x1]
|
||||
|
||||
transpose_8x8B v16, v18, v20, v22, v24, v26, v28, v30, v0, v1
|
||||
transpose_8x8B v17, v19, v21, v23, v25, v27, v29, v31, v0, v1
|
||||
@@ -280,33 +280,33 @@ function \type\()_h264_qpel8_v_lowpass_neon
|
||||
transpose_8x8B v16, v17, v18, v19, v20, v21, v22, v23, v0, v1
|
||||
|
||||
.ifc \type,avg
|
||||
ld1 {v24.8b}, [x0], x2
|
||||
urhadd v16.8b, v16.8b, v24.8b
|
||||
ld1 {v25.8b}, [x0], x2
|
||||
urhadd v17.8b, v17.8b, v25.8b
|
||||
ld1 {v26.8b}, [x0], x2
|
||||
urhadd v18.8b, v18.8b, v26.8b
|
||||
ld1 {v27.8b}, [x0], x2
|
||||
urhadd v19.8b, v19.8b, v27.8b
|
||||
ld1 {v28.8b}, [x0], x2
|
||||
urhadd v20.8b, v20.8b, v28.8b
|
||||
ld1 {v29.8b}, [x0], x2
|
||||
urhadd v21.8b, v21.8b, v29.8b
|
||||
ld1 {v30.8b}, [x0], x2
|
||||
urhadd v22.8b, v22.8b, v30.8b
|
||||
ld1 {v31.8b}, [x0], x2
|
||||
urhadd v23.8b, v23.8b, v31.8b
|
||||
ld1 {v24.8B}, [x0], x2
|
||||
urhadd v16.8B, v16.8B, v24.8B
|
||||
ld1 {v25.8B}, [x0], x2
|
||||
urhadd v17.8B, v17.8B, v25.8B
|
||||
ld1 {v26.8B}, [x0], x2
|
||||
urhadd v18.8B, v18.8B, v26.8B
|
||||
ld1 {v27.8B}, [x0], x2
|
||||
urhadd v19.8B, v19.8B, v27.8B
|
||||
ld1 {v28.8B}, [x0], x2
|
||||
urhadd v20.8B, v20.8B, v28.8B
|
||||
ld1 {v29.8B}, [x0], x2
|
||||
urhadd v21.8B, v21.8B, v29.8B
|
||||
ld1 {v30.8B}, [x0], x2
|
||||
urhadd v22.8B, v22.8B, v30.8B
|
||||
ld1 {v31.8B}, [x0], x2
|
||||
urhadd v23.8B, v23.8B, v31.8B
|
||||
sub x0, x0, x2, lsl #3
|
||||
.endif
|
||||
|
||||
st1 {v16.8b}, [x0], x2
|
||||
st1 {v17.8b}, [x0], x2
|
||||
st1 {v18.8b}, [x0], x2
|
||||
st1 {v19.8b}, [x0], x2
|
||||
st1 {v20.8b}, [x0], x2
|
||||
st1 {v21.8b}, [x0], x2
|
||||
st1 {v22.8b}, [x0], x2
|
||||
st1 {v23.8b}, [x0], x2
|
||||
st1 {v16.8B}, [x0], x2
|
||||
st1 {v17.8B}, [x0], x2
|
||||
st1 {v18.8B}, [x0], x2
|
||||
st1 {v19.8B}, [x0], x2
|
||||
st1 {v20.8B}, [x0], x2
|
||||
st1 {v21.8B}, [x0], x2
|
||||
st1 {v22.8B}, [x0], x2
|
||||
st1 {v23.8B}, [x0], x2
|
||||
|
||||
ret
|
||||
endfunc
|
||||
@@ -334,19 +334,19 @@ function \type\()_h264_qpel16_v_lowpass_l2_neon
|
||||
endfunc
|
||||
|
||||
function \type\()_h264_qpel8_v_lowpass_l2_neon
|
||||
ld1 {v16.8b}, [x1], x3
|
||||
ld1 {v18.8b}, [x1], x3
|
||||
ld1 {v20.8b}, [x1], x3
|
||||
ld1 {v22.8b}, [x1], x3
|
||||
ld1 {v24.8b}, [x1], x3
|
||||
ld1 {v26.8b}, [x1], x3
|
||||
ld1 {v28.8b}, [x1], x3
|
||||
ld1 {v30.8b}, [x1], x3
|
||||
ld1 {v17.8b}, [x1], x3
|
||||
ld1 {v19.8b}, [x1], x3
|
||||
ld1 {v21.8b}, [x1], x3
|
||||
ld1 {v23.8b}, [x1], x3
|
||||
ld1 {v25.8b}, [x1]
|
||||
ld1 {v16.8B}, [x1], x3
|
||||
ld1 {v18.8B}, [x1], x3
|
||||
ld1 {v20.8B}, [x1], x3
|
||||
ld1 {v22.8B}, [x1], x3
|
||||
ld1 {v24.8B}, [x1], x3
|
||||
ld1 {v26.8B}, [x1], x3
|
||||
ld1 {v28.8B}, [x1], x3
|
||||
ld1 {v30.8B}, [x1], x3
|
||||
ld1 {v17.8B}, [x1], x3
|
||||
ld1 {v19.8B}, [x1], x3
|
||||
ld1 {v21.8B}, [x1], x3
|
||||
ld1 {v23.8B}, [x1], x3
|
||||
ld1 {v25.8B}, [x1]
|
||||
|
||||
transpose_8x8B v16, v18, v20, v22, v24, v26, v28, v30, v0, v1
|
||||
transpose_8x8B v17, v19, v21, v23, v25, v27, v29, v31, v0, v1
|
||||
@@ -356,51 +356,51 @@ function \type\()_h264_qpel8_v_lowpass_l2_neon
|
||||
lowpass_8 v28, v29, v30, v31, v22, v23
|
||||
transpose_8x8B v16, v17, v18, v19, v20, v21, v22, v23, v0, v1
|
||||
|
||||
ld1 {v24.8b}, [x12], x2
|
||||
ld1 {v25.8b}, [x12], x2
|
||||
ld1 {v26.8b}, [x12], x2
|
||||
ld1 {v27.8b}, [x12], x2
|
||||
ld1 {v28.8b}, [x12], x2
|
||||
urhadd v16.8b, v24.8b, v16.8b
|
||||
urhadd v17.8b, v25.8b, v17.8b
|
||||
ld1 {v29.8b}, [x12], x2
|
||||
urhadd v18.8b, v26.8b, v18.8b
|
||||
urhadd v19.8b, v27.8b, v19.8b
|
||||
ld1 {v30.8b}, [x12], x2
|
||||
urhadd v20.8b, v28.8b, v20.8b
|
||||
urhadd v21.8b, v29.8b, v21.8b
|
||||
ld1 {v31.8b}, [x12], x2
|
||||
urhadd v22.8b, v30.8b, v22.8b
|
||||
urhadd v23.8b, v31.8b, v23.8b
|
||||
ld1 {v24.8B}, [x12], x2
|
||||
ld1 {v25.8B}, [x12], x2
|
||||
ld1 {v26.8B}, [x12], x2
|
||||
ld1 {v27.8B}, [x12], x2
|
||||
ld1 {v28.8B}, [x12], x2
|
||||
urhadd v16.8B, v24.8B, v16.8B
|
||||
urhadd v17.8B, v25.8B, v17.8B
|
||||
ld1 {v29.8B}, [x12], x2
|
||||
urhadd v18.8B, v26.8B, v18.8B
|
||||
urhadd v19.8B, v27.8B, v19.8B
|
||||
ld1 {v30.8B}, [x12], x2
|
||||
urhadd v20.8B, v28.8B, v20.8B
|
||||
urhadd v21.8B, v29.8B, v21.8B
|
||||
ld1 {v31.8B}, [x12], x2
|
||||
urhadd v22.8B, v30.8B, v22.8B
|
||||
urhadd v23.8B, v31.8B, v23.8B
|
||||
|
||||
.ifc \type,avg
|
||||
ld1 {v24.8b}, [x0], x3
|
||||
urhadd v16.8b, v16.8b, v24.8b
|
||||
ld1 {v25.8b}, [x0], x3
|
||||
urhadd v17.8b, v17.8b, v25.8b
|
||||
ld1 {v26.8b}, [x0], x3
|
||||
urhadd v18.8b, v18.8b, v26.8b
|
||||
ld1 {v27.8b}, [x0], x3
|
||||
urhadd v19.8b, v19.8b, v27.8b
|
||||
ld1 {v28.8b}, [x0], x3
|
||||
urhadd v20.8b, v20.8b, v28.8b
|
||||
ld1 {v29.8b}, [x0], x3
|
||||
urhadd v21.8b, v21.8b, v29.8b
|
||||
ld1 {v30.8b}, [x0], x3
|
||||
urhadd v22.8b, v22.8b, v30.8b
|
||||
ld1 {v31.8b}, [x0], x3
|
||||
urhadd v23.8b, v23.8b, v31.8b
|
||||
ld1 {v24.8B}, [x0], x3
|
||||
urhadd v16.8B, v16.8B, v24.8B
|
||||
ld1 {v25.8B}, [x0], x3
|
||||
urhadd v17.8B, v17.8B, v25.8B
|
||||
ld1 {v26.8B}, [x0], x3
|
||||
urhadd v18.8B, v18.8B, v26.8B
|
||||
ld1 {v27.8B}, [x0], x3
|
||||
urhadd v19.8B, v19.8B, v27.8B
|
||||
ld1 {v28.8B}, [x0], x3
|
||||
urhadd v20.8B, v20.8B, v28.8B
|
||||
ld1 {v29.8B}, [x0], x3
|
||||
urhadd v21.8B, v21.8B, v29.8B
|
||||
ld1 {v30.8B}, [x0], x3
|
||||
urhadd v22.8B, v22.8B, v30.8B
|
||||
ld1 {v31.8B}, [x0], x3
|
||||
urhadd v23.8B, v23.8B, v31.8B
|
||||
sub x0, x0, x3, lsl #3
|
||||
.endif
|
||||
|
||||
st1 {v16.8b}, [x0], x3
|
||||
st1 {v17.8b}, [x0], x3
|
||||
st1 {v18.8b}, [x0], x3
|
||||
st1 {v19.8b}, [x0], x3
|
||||
st1 {v20.8b}, [x0], x3
|
||||
st1 {v21.8b}, [x0], x3
|
||||
st1 {v22.8b}, [x0], x3
|
||||
st1 {v23.8b}, [x0], x3
|
||||
st1 {v16.8B}, [x0], x3
|
||||
st1 {v17.8B}, [x0], x3
|
||||
st1 {v18.8B}, [x0], x3
|
||||
st1 {v19.8B}, [x0], x3
|
||||
st1 {v20.8B}, [x0], x3
|
||||
st1 {v21.8B}, [x0], x3
|
||||
st1 {v22.8B}, [x0], x3
|
||||
st1 {v23.8B}, [x0], x3
|
||||
|
||||
ret
|
||||
endfunc
|
||||
@@ -411,19 +411,19 @@ endfunc
|
||||
|
||||
function put_h264_qpel8_hv_lowpass_neon_top
|
||||
lowpass_const w12
|
||||
ld1 {v16.8h}, [x1], x3
|
||||
ld1 {v17.8h}, [x1], x3
|
||||
ld1 {v18.8h}, [x1], x3
|
||||
ld1 {v19.8h}, [x1], x3
|
||||
ld1 {v20.8h}, [x1], x3
|
||||
ld1 {v21.8h}, [x1], x3
|
||||
ld1 {v22.8h}, [x1], x3
|
||||
ld1 {v23.8h}, [x1], x3
|
||||
ld1 {v24.8h}, [x1], x3
|
||||
ld1 {v25.8h}, [x1], x3
|
||||
ld1 {v26.8h}, [x1], x3
|
||||
ld1 {v27.8h}, [x1], x3
|
||||
ld1 {v28.8h}, [x1]
|
||||
ld1 {v16.8H}, [x1], x3
|
||||
ld1 {v17.8H}, [x1], x3
|
||||
ld1 {v18.8H}, [x1], x3
|
||||
ld1 {v19.8H}, [x1], x3
|
||||
ld1 {v20.8H}, [x1], x3
|
||||
ld1 {v21.8H}, [x1], x3
|
||||
ld1 {v22.8H}, [x1], x3
|
||||
ld1 {v23.8H}, [x1], x3
|
||||
ld1 {v24.8H}, [x1], x3
|
||||
ld1 {v25.8H}, [x1], x3
|
||||
ld1 {v26.8H}, [x1], x3
|
||||
ld1 {v27.8H}, [x1], x3
|
||||
ld1 {v28.8H}, [x1]
|
||||
lowpass_8H v16, v17
|
||||
lowpass_8H v18, v19
|
||||
lowpass_8H v20, v21
|
||||
@@ -447,7 +447,7 @@ function put_h264_qpel8_hv_lowpass_neon_top
|
||||
lowpass_8.16 v22, v30, v22
|
||||
lowpass_8.16 v23, v31, v23
|
||||
|
||||
transpose_8x8B v16, v17, v18, v19, v20, v21, v22, v23, v0, v1
|
||||
transpose_8x8B v16, v17, v18, v19, v20, v21, v22, v23, v0, v1
|
||||
|
||||
ret
|
||||
endfunc
|
||||
@@ -457,33 +457,33 @@ function \type\()_h264_qpel8_hv_lowpass_neon
|
||||
mov x10, x30
|
||||
bl put_h264_qpel8_hv_lowpass_neon_top
|
||||
.ifc \type,avg
|
||||
ld1 {v0.8b}, [x0], x2
|
||||
urhadd v16.8b, v16.8b, v0.8b
|
||||
ld1 {v1.8b}, [x0], x2
|
||||
urhadd v17.8b, v17.8b, v1.8b
|
||||
ld1 {v2.8b}, [x0], x2
|
||||
urhadd v18.8b, v18.8b, v2.8b
|
||||
ld1 {v3.8b}, [x0], x2
|
||||
urhadd v19.8b, v19.8b, v3.8b
|
||||
ld1 {v4.8b}, [x0], x2
|
||||
urhadd v20.8b, v20.8b, v4.8b
|
||||
ld1 {v5.8b}, [x0], x2
|
||||
urhadd v21.8b, v21.8b, v5.8b
|
||||
ld1 {v6.8b}, [x0], x2
|
||||
urhadd v22.8b, v22.8b, v6.8b
|
||||
ld1 {v7.8b}, [x0], x2
|
||||
urhadd v23.8b, v23.8b, v7.8b
|
||||
ld1 {v0.8B}, [x0], x2
|
||||
urhadd v16.8B, v16.8B, v0.8B
|
||||
ld1 {v1.8B}, [x0], x2
|
||||
urhadd v17.8B, v17.8B, v1.8B
|
||||
ld1 {v2.8B}, [x0], x2
|
||||
urhadd v18.8B, v18.8B, v2.8B
|
||||
ld1 {v3.8B}, [x0], x2
|
||||
urhadd v19.8B, v19.8B, v3.8B
|
||||
ld1 {v4.8B}, [x0], x2
|
||||
urhadd v20.8B, v20.8B, v4.8B
|
||||
ld1 {v5.8B}, [x0], x2
|
||||
urhadd v21.8B, v21.8B, v5.8B
|
||||
ld1 {v6.8B}, [x0], x2
|
||||
urhadd v22.8B, v22.8B, v6.8B
|
||||
ld1 {v7.8B}, [x0], x2
|
||||
urhadd v23.8B, v23.8B, v7.8B
|
||||
sub x0, x0, x2, lsl #3
|
||||
.endif
|
||||
|
||||
st1 {v16.8b}, [x0], x2
|
||||
st1 {v17.8b}, [x0], x2
|
||||
st1 {v18.8b}, [x0], x2
|
||||
st1 {v19.8b}, [x0], x2
|
||||
st1 {v20.8b}, [x0], x2
|
||||
st1 {v21.8b}, [x0], x2
|
||||
st1 {v22.8b}, [x0], x2
|
||||
st1 {v23.8b}, [x0], x2
|
||||
st1 {v16.8B}, [x0], x2
|
||||
st1 {v17.8B}, [x0], x2
|
||||
st1 {v18.8B}, [x0], x2
|
||||
st1 {v19.8B}, [x0], x2
|
||||
st1 {v20.8B}, [x0], x2
|
||||
st1 {v21.8B}, [x0], x2
|
||||
st1 {v22.8B}, [x0], x2
|
||||
st1 {v23.8B}, [x0], x2
|
||||
|
||||
ret x10
|
||||
endfunc
|
||||
@@ -497,45 +497,45 @@ function \type\()_h264_qpel8_hv_lowpass_l2_neon
|
||||
mov x10, x30
|
||||
bl put_h264_qpel8_hv_lowpass_neon_top
|
||||
|
||||
ld1 {v0.8b, v1.8b}, [x2], #16
|
||||
ld1 {v2.8b, v3.8b}, [x2], #16
|
||||
urhadd v0.8b, v0.8b, v16.8b
|
||||
urhadd v1.8b, v1.8b, v17.8b
|
||||
ld1 {v4.8b, v5.8b}, [x2], #16
|
||||
urhadd v2.8b, v2.8b, v18.8b
|
||||
urhadd v3.8b, v3.8b, v19.8b
|
||||
ld1 {v6.8b, v7.8b}, [x2], #16
|
||||
urhadd v4.8b, v4.8b, v20.8b
|
||||
urhadd v5.8b, v5.8b, v21.8b
|
||||
urhadd v6.8b, v6.8b, v22.8b
|
||||
urhadd v7.8b, v7.8b, v23.8b
|
||||
ld1 {v0.8B, v1.8B}, [x2], #16
|
||||
ld1 {v2.8B, v3.8B}, [x2], #16
|
||||
urhadd v0.8B, v0.8B, v16.8B
|
||||
urhadd v1.8B, v1.8B, v17.8B
|
||||
ld1 {v4.8B, v5.8B}, [x2], #16
|
||||
urhadd v2.8B, v2.8B, v18.8B
|
||||
urhadd v3.8B, v3.8B, v19.8B
|
||||
ld1 {v6.8B, v7.8B}, [x2], #16
|
||||
urhadd v4.8B, v4.8B, v20.8B
|
||||
urhadd v5.8B, v5.8B, v21.8B
|
||||
urhadd v6.8B, v6.8B, v22.8B
|
||||
urhadd v7.8B, v7.8B, v23.8B
|
||||
.ifc \type,avg
|
||||
ld1 {v16.8b}, [x0], x3
|
||||
urhadd v0.8b, v0.8b, v16.8b
|
||||
ld1 {v17.8b}, [x0], x3
|
||||
urhadd v1.8b, v1.8b, v17.8b
|
||||
ld1 {v18.8b}, [x0], x3
|
||||
urhadd v2.8b, v2.8b, v18.8b
|
||||
ld1 {v19.8b}, [x0], x3
|
||||
urhadd v3.8b, v3.8b, v19.8b
|
||||
ld1 {v20.8b}, [x0], x3
|
||||
urhadd v4.8b, v4.8b, v20.8b
|
||||
ld1 {v21.8b}, [x0], x3
|
||||
urhadd v5.8b, v5.8b, v21.8b
|
||||
ld1 {v22.8b}, [x0], x3
|
||||
urhadd v6.8b, v6.8b, v22.8b
|
||||
ld1 {v23.8b}, [x0], x3
|
||||
urhadd v7.8b, v7.8b, v23.8b
|
||||
ld1 {v16.8B}, [x0], x3
|
||||
urhadd v0.8B, v0.8B, v16.8B
|
||||
ld1 {v17.8B}, [x0], x3
|
||||
urhadd v1.8B, v1.8B, v17.8B
|
||||
ld1 {v18.8B}, [x0], x3
|
||||
urhadd v2.8B, v2.8B, v18.8B
|
||||
ld1 {v19.8B}, [x0], x3
|
||||
urhadd v3.8B, v3.8B, v19.8B
|
||||
ld1 {v20.8B}, [x0], x3
|
||||
urhadd v4.8B, v4.8B, v20.8B
|
||||
ld1 {v21.8B}, [x0], x3
|
||||
urhadd v5.8B, v5.8B, v21.8B
|
||||
ld1 {v22.8B}, [x0], x3
|
||||
urhadd v6.8B, v6.8B, v22.8B
|
||||
ld1 {v23.8B}, [x0], x3
|
||||
urhadd v7.8B, v7.8B, v23.8B
|
||||
sub x0, x0, x3, lsl #3
|
||||
.endif
|
||||
st1 {v0.8b}, [x0], x3
|
||||
st1 {v1.8b}, [x0], x3
|
||||
st1 {v2.8b}, [x0], x3
|
||||
st1 {v3.8b}, [x0], x3
|
||||
st1 {v4.8b}, [x0], x3
|
||||
st1 {v5.8b}, [x0], x3
|
||||
st1 {v6.8b}, [x0], x3
|
||||
st1 {v7.8b}, [x0], x3
|
||||
st1 {v0.8B}, [x0], x3
|
||||
st1 {v1.8B}, [x0], x3
|
||||
st1 {v2.8B}, [x0], x3
|
||||
st1 {v3.8B}, [x0], x3
|
||||
st1 {v4.8B}, [x0], x3
|
||||
st1 {v5.8B}, [x0], x3
|
||||
st1 {v6.8B}, [x0], x3
|
||||
st1 {v7.8B}, [x0], x3
|
||||
|
||||
ret x10
|
||||
endfunc
|
||||
@@ -579,8 +579,8 @@ function \type\()_h264_qpel16_hv_lowpass_l2_neon
|
||||
endfunc
|
||||
.endm
|
||||
|
||||
h264_qpel16_hv put
|
||||
h264_qpel16_hv avg
|
||||
h264_qpel16_hv put
|
||||
h264_qpel16_hv avg
|
||||
|
||||
.macro h264_qpel8 type
|
||||
function ff_\type\()_h264_qpel8_mc10_neon, export=1
|
||||
@@ -758,8 +758,8 @@ function ff_\type\()_h264_qpel8_mc33_neon, export=1
|
||||
endfunc
|
||||
.endm
|
||||
|
||||
h264_qpel8 put
|
||||
h264_qpel8 avg
|
||||
h264_qpel8 put
|
||||
h264_qpel8 avg
|
||||
|
||||
.macro h264_qpel16 type
|
||||
function ff_\type\()_h264_qpel16_mc10_neon, export=1
|
||||
@@ -930,5 +930,5 @@ function ff_\type\()_h264_qpel16_mc33_neon, export=1
|
||||
endfunc
|
||||
.endm
|
||||
|
||||
h264_qpel16 put
|
||||
h264_qpel16 avg
|
||||
h264_qpel16 put
|
||||
h264_qpel16 avg
|
||||
|
||||
+181
-181
@@ -26,295 +26,295 @@
|
||||
.if \avg
|
||||
mov x12, x0
|
||||
.endif
|
||||
1: ld1 {v0.16b}, [x1], x2
|
||||
ld1 {v1.16b}, [x1], x2
|
||||
ld1 {v2.16b}, [x1], x2
|
||||
ld1 {v3.16b}, [x1], x2
|
||||
1: ld1 {v0.16B}, [x1], x2
|
||||
ld1 {v1.16B}, [x1], x2
|
||||
ld1 {v2.16B}, [x1], x2
|
||||
ld1 {v3.16B}, [x1], x2
|
||||
.if \avg
|
||||
ld1 {v4.16b}, [x12], x2
|
||||
urhadd v0.16b, v0.16b, v4.16b
|
||||
ld1 {v5.16b}, [x12], x2
|
||||
urhadd v1.16b, v1.16b, v5.16b
|
||||
ld1 {v6.16b}, [x12], x2
|
||||
urhadd v2.16b, v2.16b, v6.16b
|
||||
ld1 {v7.16b}, [x12], x2
|
||||
urhadd v3.16b, v3.16b, v7.16b
|
||||
ld1 {v4.16B}, [x12], x2
|
||||
urhadd v0.16B, v0.16B, v4.16B
|
||||
ld1 {v5.16B}, [x12], x2
|
||||
urhadd v1.16B, v1.16B, v5.16B
|
||||
ld1 {v6.16B}, [x12], x2
|
||||
urhadd v2.16B, v2.16B, v6.16B
|
||||
ld1 {v7.16B}, [x12], x2
|
||||
urhadd v3.16B, v3.16B, v7.16B
|
||||
.endif
|
||||
subs w3, w3, #4
|
||||
st1 {v0.16b}, [x0], x2
|
||||
st1 {v1.16b}, [x0], x2
|
||||
st1 {v2.16b}, [x0], x2
|
||||
st1 {v3.16b}, [x0], x2
|
||||
st1 {v0.16B}, [x0], x2
|
||||
st1 {v1.16B}, [x0], x2
|
||||
st1 {v2.16B}, [x0], x2
|
||||
st1 {v3.16B}, [x0], x2
|
||||
b.ne 1b
|
||||
ret
|
||||
.endm
|
||||
|
||||
.macro pixels16_x2 rnd=1, avg=0
|
||||
1: ld1 {v0.16b, v1.16b}, [x1], x2
|
||||
ld1 {v2.16b, v3.16b}, [x1], x2
|
||||
1: ld1 {v0.16B, v1.16B}, [x1], x2
|
||||
ld1 {v2.16B, v3.16B}, [x1], x2
|
||||
subs w3, w3, #2
|
||||
ext v1.16b, v0.16b, v1.16b, #1
|
||||
avg v0.16b, v0.16b, v1.16b
|
||||
ext v3.16b, v2.16b, v3.16b, #1
|
||||
avg v2.16b, v2.16b, v3.16b
|
||||
ext v1.16B, v0.16B, v1.16B, #1
|
||||
avg v0.16B, v0.16B, v1.16B
|
||||
ext v3.16B, v2.16B, v3.16B, #1
|
||||
avg v2.16B, v2.16B, v3.16B
|
||||
.if \avg
|
||||
ld1 {v1.16b}, [x0], x2
|
||||
ld1 {v3.16b}, [x0]
|
||||
urhadd v0.16b, v0.16b, v1.16b
|
||||
urhadd v2.16b, v2.16b, v3.16b
|
||||
ld1 {v1.16B}, [x0], x2
|
||||
ld1 {v3.16B}, [x0]
|
||||
urhadd v0.16B, v0.16B, v1.16B
|
||||
urhadd v2.16B, v2.16B, v3.16B
|
||||
sub x0, x0, x2
|
||||
.endif
|
||||
st1 {v0.16b}, [x0], x2
|
||||
st1 {v2.16b}, [x0], x2
|
||||
st1 {v0.16B}, [x0], x2
|
||||
st1 {v2.16B}, [x0], x2
|
||||
b.ne 1b
|
||||
ret
|
||||
.endm
|
||||
|
||||
.macro pixels16_y2 rnd=1, avg=0
|
||||
sub w3, w3, #2
|
||||
ld1 {v0.16b}, [x1], x2
|
||||
ld1 {v1.16b}, [x1], x2
|
||||
ld1 {v0.16B}, [x1], x2
|
||||
ld1 {v1.16B}, [x1], x2
|
||||
1: subs w3, w3, #2
|
||||
avg v2.16b, v0.16b, v1.16b
|
||||
ld1 {v0.16b}, [x1], x2
|
||||
avg v3.16b, v0.16b, v1.16b
|
||||
ld1 {v1.16b}, [x1], x2
|
||||
avg v2.16B, v0.16B, v1.16B
|
||||
ld1 {v0.16B}, [x1], x2
|
||||
avg v3.16B, v0.16B, v1.16B
|
||||
ld1 {v1.16B}, [x1], x2
|
||||
.if \avg
|
||||
ld1 {v4.16b}, [x0], x2
|
||||
ld1 {v5.16b}, [x0]
|
||||
urhadd v2.16b, v2.16b, v4.16b
|
||||
urhadd v3.16b, v3.16b, v5.16b
|
||||
ld1 {v4.16B}, [x0], x2
|
||||
ld1 {v5.16B}, [x0]
|
||||
urhadd v2.16B, v2.16B, v4.16B
|
||||
urhadd v3.16B, v3.16B, v5.16B
|
||||
sub x0, x0, x2
|
||||
.endif
|
||||
st1 {v2.16b}, [x0], x2
|
||||
st1 {v3.16b}, [x0], x2
|
||||
st1 {v2.16B}, [x0], x2
|
||||
st1 {v3.16B}, [x0], x2
|
||||
b.ne 1b
|
||||
|
||||
avg v2.16b, v0.16b, v1.16b
|
||||
ld1 {v0.16b}, [x1], x2
|
||||
avg v3.16b, v0.16b, v1.16b
|
||||
avg v2.16B, v0.16B, v1.16B
|
||||
ld1 {v0.16B}, [x1], x2
|
||||
avg v3.16B, v0.16B, v1.16B
|
||||
.if \avg
|
||||
ld1 {v4.16b}, [x0], x2
|
||||
ld1 {v5.16b}, [x0]
|
||||
urhadd v2.16b, v2.16b, v4.16b
|
||||
urhadd v3.16b, v3.16b, v5.16b
|
||||
ld1 {v4.16B}, [x0], x2
|
||||
ld1 {v5.16B}, [x0]
|
||||
urhadd v2.16B, v2.16B, v4.16B
|
||||
urhadd v3.16B, v3.16B, v5.16B
|
||||
sub x0, x0, x2
|
||||
.endif
|
||||
st1 {v2.16b}, [x0], x2
|
||||
st1 {v3.16b}, [x0], x2
|
||||
st1 {v2.16B}, [x0], x2
|
||||
st1 {v3.16B}, [x0], x2
|
||||
|
||||
ret
|
||||
.endm
|
||||
|
||||
.macro pixels16_xy2 rnd=1, avg=0
|
||||
sub w3, w3, #2
|
||||
ld1 {v0.16b, v1.16b}, [x1], x2
|
||||
ld1 {v4.16b, v5.16b}, [x1], x2
|
||||
ld1 {v0.16B, v1.16B}, [x1], x2
|
||||
ld1 {v4.16B, v5.16B}, [x1], x2
|
||||
NRND movi v26.8H, #1
|
||||
ext v1.16b, v0.16b, v1.16b, #1
|
||||
ext v5.16b, v4.16b, v5.16b, #1
|
||||
uaddl v16.8h, v0.8b, v1.8b
|
||||
uaddl2 v20.8h, v0.16b, v1.16b
|
||||
uaddl v18.8h, v4.8b, v5.8b
|
||||
uaddl2 v22.8h, v4.16b, v5.16b
|
||||
ext v1.16B, v0.16B, v1.16B, #1
|
||||
ext v5.16B, v4.16B, v5.16B, #1
|
||||
uaddl v16.8H, v0.8B, v1.8B
|
||||
uaddl2 v20.8H, v0.16B, v1.16B
|
||||
uaddl v18.8H, v4.8B, v5.8B
|
||||
uaddl2 v22.8H, v4.16B, v5.16B
|
||||
1: subs w3, w3, #2
|
||||
ld1 {v0.16b, v1.16b}, [x1], x2
|
||||
add v24.8h, v16.8h, v18.8h
|
||||
ld1 {v0.16B, v1.16B}, [x1], x2
|
||||
add v24.8H, v16.8H, v18.8H
|
||||
NRND add v24.8H, v24.8H, v26.8H
|
||||
ext v30.16b, v0.16b, v1.16b, #1
|
||||
add v1.8h, v20.8h, v22.8h
|
||||
mshrn v28.8b, v24.8h, #2
|
||||
ext v30.16B, v0.16B, v1.16B, #1
|
||||
add v1.8H, v20.8H, v22.8H
|
||||
mshrn v28.8B, v24.8H, #2
|
||||
NRND add v1.8H, v1.8H, v26.8H
|
||||
mshrn2 v28.16b, v1.8h, #2
|
||||
mshrn2 v28.16B, v1.8H, #2
|
||||
.if \avg
|
||||
ld1 {v16.16b}, [x0]
|
||||
urhadd v28.16b, v28.16b, v16.16b
|
||||
ld1 {v16.16B}, [x0]
|
||||
urhadd v28.16B, v28.16B, v16.16B
|
||||
.endif
|
||||
uaddl v16.8h, v0.8b, v30.8b
|
||||
ld1 {v2.16b, v3.16b}, [x1], x2
|
||||
uaddl2 v20.8h, v0.16b, v30.16b
|
||||
st1 {v28.16b}, [x0], x2
|
||||
add v24.8h, v16.8h, v18.8h
|
||||
uaddl v16.8H, v0.8B, v30.8B
|
||||
ld1 {v2.16B, v3.16B}, [x1], x2
|
||||
uaddl2 v20.8H, v0.16B, v30.16B
|
||||
st1 {v28.16B}, [x0], x2
|
||||
add v24.8H, v16.8H, v18.8H
|
||||
NRND add v24.8H, v24.8H, v26.8H
|
||||
ext v3.16b, v2.16b, v3.16b, #1
|
||||
add v0.8h, v20.8h, v22.8h
|
||||
mshrn v30.8b, v24.8h, #2
|
||||
ext v3.16B, v2.16B, v3.16B, #1
|
||||
add v0.8H, v20.8H, v22.8H
|
||||
mshrn v30.8B, v24.8H, #2
|
||||
NRND add v0.8H, v0.8H, v26.8H
|
||||
mshrn2 v30.16b, v0.8h, #2
|
||||
mshrn2 v30.16B, v0.8H, #2
|
||||
.if \avg
|
||||
ld1 {v18.16b}, [x0]
|
||||
urhadd v30.16b, v30.16b, v18.16b
|
||||
ld1 {v18.16B}, [x0]
|
||||
urhadd v30.16B, v30.16B, v18.16B
|
||||
.endif
|
||||
uaddl v18.8h, v2.8b, v3.8b
|
||||
uaddl2 v22.8h, v2.16b, v3.16b
|
||||
st1 {v30.16b}, [x0], x2
|
||||
uaddl v18.8H, v2.8B, v3.8B
|
||||
uaddl2 v22.8H, v2.16B, v3.16B
|
||||
st1 {v30.16B}, [x0], x2
|
||||
b.gt 1b
|
||||
|
||||
ld1 {v0.16b, v1.16b}, [x1], x2
|
||||
add v24.8h, v16.8h, v18.8h
|
||||
ld1 {v0.16B, v1.16B}, [x1], x2
|
||||
add v24.8H, v16.8H, v18.8H
|
||||
NRND add v24.8H, v24.8H, v26.8H
|
||||
ext v30.16b, v0.16b, v1.16b, #1
|
||||
add v1.8h, v20.8h, v22.8h
|
||||
mshrn v28.8b, v24.8h, #2
|
||||
ext v30.16B, v0.16B, v1.16B, #1
|
||||
add v1.8H, v20.8H, v22.8H
|
||||
mshrn v28.8B, v24.8H, #2
|
||||
NRND add v1.8H, v1.8H, v26.8H
|
||||
mshrn2 v28.16b, v1.8h, #2
|
||||
mshrn2 v28.16B, v1.8H, #2
|
||||
.if \avg
|
||||
ld1 {v16.16b}, [x0]
|
||||
urhadd v28.16b, v28.16b, v16.16b
|
||||
ld1 {v16.16B}, [x0]
|
||||
urhadd v28.16B, v28.16B, v16.16B
|
||||
.endif
|
||||
uaddl v16.8h, v0.8b, v30.8b
|
||||
uaddl2 v20.8h, v0.16b, v30.16b
|
||||
st1 {v28.16b}, [x0], x2
|
||||
add v24.8h, v16.8h, v18.8h
|
||||
uaddl v16.8H, v0.8B, v30.8B
|
||||
uaddl2 v20.8H, v0.16B, v30.16B
|
||||
st1 {v28.16B}, [x0], x2
|
||||
add v24.8H, v16.8H, v18.8H
|
||||
NRND add v24.8H, v24.8H, v26.8H
|
||||
add v0.8h, v20.8h, v22.8h
|
||||
mshrn v30.8b, v24.8h, #2
|
||||
add v0.8H, v20.8H, v22.8H
|
||||
mshrn v30.8B, v24.8H, #2
|
||||
NRND add v0.8H, v0.8H, v26.8H
|
||||
mshrn2 v30.16b, v0.8h, #2
|
||||
mshrn2 v30.16B, v0.8H, #2
|
||||
.if \avg
|
||||
ld1 {v18.16b}, [x0]
|
||||
urhadd v30.16b, v30.16b, v18.16b
|
||||
ld1 {v18.16B}, [x0]
|
||||
urhadd v30.16B, v30.16B, v18.16B
|
||||
.endif
|
||||
st1 {v30.16b}, [x0], x2
|
||||
st1 {v30.16B}, [x0], x2
|
||||
|
||||
ret
|
||||
.endm
|
||||
|
||||
.macro pixels8 rnd=1, avg=0
|
||||
1: ld1 {v0.8b}, [x1], x2
|
||||
ld1 {v1.8b}, [x1], x2
|
||||
ld1 {v2.8b}, [x1], x2
|
||||
ld1 {v3.8b}, [x1], x2
|
||||
1: ld1 {v0.8B}, [x1], x2
|
||||
ld1 {v1.8B}, [x1], x2
|
||||
ld1 {v2.8B}, [x1], x2
|
||||
ld1 {v3.8B}, [x1], x2
|
||||
.if \avg
|
||||
ld1 {v4.8b}, [x0], x2
|
||||
urhadd v0.8b, v0.8b, v4.8b
|
||||
ld1 {v5.8b}, [x0], x2
|
||||
urhadd v1.8b, v1.8b, v5.8b
|
||||
ld1 {v6.8b}, [x0], x2
|
||||
urhadd v2.8b, v2.8b, v6.8b
|
||||
ld1 {v7.8b}, [x0], x2
|
||||
urhadd v3.8b, v3.8b, v7.8b
|
||||
ld1 {v4.8B}, [x0], x2
|
||||
urhadd v0.8B, v0.8B, v4.8B
|
||||
ld1 {v5.8B}, [x0], x2
|
||||
urhadd v1.8B, v1.8B, v5.8B
|
||||
ld1 {v6.8B}, [x0], x2
|
||||
urhadd v2.8B, v2.8B, v6.8B
|
||||
ld1 {v7.8B}, [x0], x2
|
||||
urhadd v3.8B, v3.8B, v7.8B
|
||||
sub x0, x0, x2, lsl #2
|
||||
.endif
|
||||
subs w3, w3, #4
|
||||
st1 {v0.8b}, [x0], x2
|
||||
st1 {v1.8b}, [x0], x2
|
||||
st1 {v2.8b}, [x0], x2
|
||||
st1 {v3.8b}, [x0], x2
|
||||
st1 {v0.8B}, [x0], x2
|
||||
st1 {v1.8B}, [x0], x2
|
||||
st1 {v2.8B}, [x0], x2
|
||||
st1 {v3.8B}, [x0], x2
|
||||
b.ne 1b
|
||||
ret
|
||||
.endm
|
||||
|
||||
.macro pixels8_x2 rnd=1, avg=0
|
||||
1: ld1 {v0.8b, v1.8b}, [x1], x2
|
||||
ext v1.8b, v0.8b, v1.8b, #1
|
||||
ld1 {v2.8b, v3.8b}, [x1], x2
|
||||
ext v3.8b, v2.8b, v3.8b, #1
|
||||
1: ld1 {v0.8B, v1.8B}, [x1], x2
|
||||
ext v1.8B, v0.8B, v1.8B, #1
|
||||
ld1 {v2.8B, v3.8B}, [x1], x2
|
||||
ext v3.8B, v2.8B, v3.8B, #1
|
||||
subs w3, w3, #2
|
||||
avg v0.8b, v0.8b, v1.8b
|
||||
avg v2.8b, v2.8b, v3.8b
|
||||
avg v0.8B, v0.8B, v1.8B
|
||||
avg v2.8B, v2.8B, v3.8B
|
||||
.if \avg
|
||||
ld1 {v4.8b}, [x0], x2
|
||||
ld1 {v5.8b}, [x0]
|
||||
urhadd v0.8b, v0.8b, v4.8b
|
||||
urhadd v2.8b, v2.8b, v5.8b
|
||||
ld1 {v4.8B}, [x0], x2
|
||||
ld1 {v5.8B}, [x0]
|
||||
urhadd v0.8B, v0.8B, v4.8B
|
||||
urhadd v2.8B, v2.8B, v5.8B
|
||||
sub x0, x0, x2
|
||||
.endif
|
||||
st1 {v0.8b}, [x0], x2
|
||||
st1 {v2.8b}, [x0], x2
|
||||
st1 {v0.8B}, [x0], x2
|
||||
st1 {v2.8B}, [x0], x2
|
||||
b.ne 1b
|
||||
ret
|
||||
.endm
|
||||
|
||||
.macro pixels8_y2 rnd=1, avg=0
|
||||
sub w3, w3, #2
|
||||
ld1 {v0.8b}, [x1], x2
|
||||
ld1 {v1.8b}, [x1], x2
|
||||
ld1 {v0.8B}, [x1], x2
|
||||
ld1 {v1.8B}, [x1], x2
|
||||
1: subs w3, w3, #2
|
||||
avg v4.8b, v0.8b, v1.8b
|
||||
ld1 {v0.8b}, [x1], x2
|
||||
avg v5.8b, v0.8b, v1.8b
|
||||
ld1 {v1.8b}, [x1], x2
|
||||
avg v4.8B, v0.8B, v1.8B
|
||||
ld1 {v0.8B}, [x1], x2
|
||||
avg v5.8B, v0.8B, v1.8B
|
||||
ld1 {v1.8B}, [x1], x2
|
||||
.if \avg
|
||||
ld1 {v2.8b}, [x0], x2
|
||||
ld1 {v3.8b}, [x0]
|
||||
urhadd v4.8b, v4.8b, v2.8b
|
||||
urhadd v5.8b, v5.8b, v3.8b
|
||||
ld1 {v2.8B}, [x0], x2
|
||||
ld1 {v3.8B}, [x0]
|
||||
urhadd v4.8B, v4.8B, v2.8B
|
||||
urhadd v5.8B, v5.8B, v3.8B
|
||||
sub x0, x0, x2
|
||||
.endif
|
||||
st1 {v4.8b}, [x0], x2
|
||||
st1 {v5.8b}, [x0], x2
|
||||
st1 {v4.8B}, [x0], x2
|
||||
st1 {v5.8B}, [x0], x2
|
||||
b.ne 1b
|
||||
|
||||
avg v4.8b, v0.8b, v1.8b
|
||||
ld1 {v0.8b}, [x1], x2
|
||||
avg v5.8b, v0.8b, v1.8b
|
||||
avg v4.8B, v0.8B, v1.8B
|
||||
ld1 {v0.8B}, [x1], x2
|
||||
avg v5.8B, v0.8B, v1.8B
|
||||
.if \avg
|
||||
ld1 {v2.8b}, [x0], x2
|
||||
ld1 {v3.8b}, [x0]
|
||||
urhadd v4.8b, v4.8b, v2.8b
|
||||
urhadd v5.8b, v5.8b, v3.8b
|
||||
ld1 {v2.8B}, [x0], x2
|
||||
ld1 {v3.8B}, [x0]
|
||||
urhadd v4.8B, v4.8B, v2.8B
|
||||
urhadd v5.8B, v5.8B, v3.8B
|
||||
sub x0, x0, x2
|
||||
.endif
|
||||
st1 {v4.8b}, [x0], x2
|
||||
st1 {v5.8b}, [x0], x2
|
||||
st1 {v4.8B}, [x0], x2
|
||||
st1 {v5.8B}, [x0], x2
|
||||
|
||||
ret
|
||||
.endm
|
||||
|
||||
.macro pixels8_xy2 rnd=1, avg=0
|
||||
sub w3, w3, #2
|
||||
ld1 {v0.16b}, [x1], x2
|
||||
ld1 {v1.16b}, [x1], x2
|
||||
ld1 {v0.16B}, [x1], x2
|
||||
ld1 {v1.16B}, [x1], x2
|
||||
NRND movi v19.8H, #1
|
||||
ext v4.16b, v0.16b, v4.16b, #1
|
||||
ext v6.16b, v1.16b, v6.16b, #1
|
||||
uaddl v16.8h, v0.8b, v4.8b
|
||||
uaddl v17.8h, v1.8b, v6.8b
|
||||
ext v4.16B, v0.16B, v4.16B, #1
|
||||
ext v6.16B, v1.16B, v6.16B, #1
|
||||
uaddl v16.8H, v0.8B, v4.8B
|
||||
uaddl v17.8H, v1.8B, v6.8B
|
||||
1: subs w3, w3, #2
|
||||
ld1 {v0.16b}, [x1], x2
|
||||
add v18.8h, v16.8h, v17.8h
|
||||
ext v4.16b, v0.16b, v4.16b, #1
|
||||
ld1 {v0.16B}, [x1], x2
|
||||
add v18.8H, v16.8H, v17.8H
|
||||
ext v4.16B, v0.16B, v4.16B, #1
|
||||
NRND add v18.8H, v18.8H, v19.8H
|
||||
uaddl v16.8h, v0.8b, v4.8b
|
||||
mshrn v5.8b, v18.8h, #2
|
||||
ld1 {v1.16b}, [x1], x2
|
||||
add v18.8h, v16.8h, v17.8h
|
||||
uaddl v16.8H, v0.8B, v4.8B
|
||||
mshrn v5.8B, v18.8H, #2
|
||||
ld1 {v1.16B}, [x1], x2
|
||||
add v18.8H, v16.8H, v17.8H
|
||||
.if \avg
|
||||
ld1 {v7.8b}, [x0]
|
||||
urhadd v5.8b, v5.8b, v7.8b
|
||||
ld1 {v7.8B}, [x0]
|
||||
urhadd v5.8B, v5.8B, v7.8B
|
||||
.endif
|
||||
NRND add v18.8H, v18.8H, v19.8H
|
||||
st1 {v5.8b}, [x0], x2
|
||||
mshrn v7.8b, v18.8h, #2
|
||||
st1 {v5.8B}, [x0], x2
|
||||
mshrn v7.8B, v18.8H, #2
|
||||
.if \avg
|
||||
ld1 {v5.8b}, [x0]
|
||||
urhadd v7.8b, v7.8b, v5.8b
|
||||
ld1 {v5.8B}, [x0]
|
||||
urhadd v7.8B, v7.8B, v5.8B
|
||||
.endif
|
||||
ext v6.16b, v1.16b, v6.16b, #1
|
||||
uaddl v17.8h, v1.8b, v6.8b
|
||||
st1 {v7.8b}, [x0], x2
|
||||
ext v6.16B, v1.16B, v6.16B, #1
|
||||
uaddl v17.8H, v1.8B, v6.8B
|
||||
st1 {v7.8B}, [x0], x2
|
||||
b.gt 1b
|
||||
|
||||
ld1 {v0.16b}, [x1], x2
|
||||
add v18.8h, v16.8h, v17.8h
|
||||
ext v4.16b, v0.16b, v4.16b, #1
|
||||
ld1 {v0.16B}, [x1], x2
|
||||
add v18.8H, v16.8H, v17.8H
|
||||
ext v4.16B, v0.16B, v4.16B, #1
|
||||
NRND add v18.8H, v18.8H, v19.8H
|
||||
uaddl v16.8h, v0.8b, v4.8b
|
||||
mshrn v5.8b, v18.8h, #2
|
||||
add v18.8h, v16.8h, v17.8h
|
||||
uaddl v16.8H, v0.8B, v4.8B
|
||||
mshrn v5.8B, v18.8H, #2
|
||||
add v18.8H, v16.8H, v17.8H
|
||||
.if \avg
|
||||
ld1 {v7.8b}, [x0]
|
||||
urhadd v5.8b, v5.8b, v7.8b
|
||||
ld1 {v7.8B}, [x0]
|
||||
urhadd v5.8B, v5.8B, v7.8B
|
||||
.endif
|
||||
NRND add v18.8H, v18.8H, v19.8H
|
||||
st1 {v5.8b}, [x0], x2
|
||||
mshrn v7.8b, v18.8h, #2
|
||||
st1 {v5.8B}, [x0], x2
|
||||
mshrn v7.8B, v18.8H, #2
|
||||
.if \avg
|
||||
ld1 {v5.8b}, [x0]
|
||||
urhadd v7.8b, v7.8b, v5.8b
|
||||
ld1 {v5.8B}, [x0]
|
||||
urhadd v7.8B, v7.8B, v5.8B
|
||||
.endif
|
||||
st1 {v7.8b}, [x0], x2
|
||||
st1 {v7.8B}, [x0], x2
|
||||
|
||||
ret
|
||||
.endm
|
||||
|
||||
@@ -19,7 +19,6 @@
|
||||
#ifndef AVCODEC_AARCH64_IDCT_H
|
||||
#define AVCODEC_AARCH64_IDCT_H
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
|
||||
void ff_simple_idct_neon(int16_t *data);
|
||||
|
||||
+96
-96
@@ -17,133 +17,133 @@
|
||||
*/
|
||||
|
||||
.macro transpose_8x8B r0, r1, r2, r3, r4, r5, r6, r7, r8, r9
|
||||
trn1 \r8\().8b, \r0\().8b, \r1\().8b
|
||||
trn2 \r9\().8b, \r0\().8b, \r1\().8b
|
||||
trn1 \r1\().8b, \r2\().8b, \r3\().8b
|
||||
trn2 \r3\().8b, \r2\().8b, \r3\().8b
|
||||
trn1 \r0\().8b, \r4\().8b, \r5\().8b
|
||||
trn2 \r5\().8b, \r4\().8b, \r5\().8b
|
||||
trn1 \r2\().8b, \r6\().8b, \r7\().8b
|
||||
trn2 \r7\().8b, \r6\().8b, \r7\().8b
|
||||
trn1 \r8\().8B, \r0\().8B, \r1\().8B
|
||||
trn2 \r9\().8B, \r0\().8B, \r1\().8B
|
||||
trn1 \r1\().8B, \r2\().8B, \r3\().8B
|
||||
trn2 \r3\().8B, \r2\().8B, \r3\().8B
|
||||
trn1 \r0\().8B, \r4\().8B, \r5\().8B
|
||||
trn2 \r5\().8B, \r4\().8B, \r5\().8B
|
||||
trn1 \r2\().8B, \r6\().8B, \r7\().8B
|
||||
trn2 \r7\().8B, \r6\().8B, \r7\().8B
|
||||
|
||||
trn1 \r4\().4h, \r0\().4h, \r2\().4h
|
||||
trn2 \r2\().4h, \r0\().4h, \r2\().4h
|
||||
trn1 \r6\().4h, \r5\().4h, \r7\().4h
|
||||
trn2 \r7\().4h, \r5\().4h, \r7\().4h
|
||||
trn1 \r5\().4h, \r9\().4h, \r3\().4h
|
||||
trn2 \r9\().4h, \r9\().4h, \r3\().4h
|
||||
trn1 \r3\().4h, \r8\().4h, \r1\().4h
|
||||
trn2 \r8\().4h, \r8\().4h, \r1\().4h
|
||||
trn1 \r4\().4H, \r0\().4H, \r2\().4H
|
||||
trn2 \r2\().4H, \r0\().4H, \r2\().4H
|
||||
trn1 \r6\().4H, \r5\().4H, \r7\().4H
|
||||
trn2 \r7\().4H, \r5\().4H, \r7\().4H
|
||||
trn1 \r5\().4H, \r9\().4H, \r3\().4H
|
||||
trn2 \r9\().4H, \r9\().4H, \r3\().4H
|
||||
trn1 \r3\().4H, \r8\().4H, \r1\().4H
|
||||
trn2 \r8\().4H, \r8\().4H, \r1\().4H
|
||||
|
||||
trn1 \r0\().2s, \r3\().2s, \r4\().2s
|
||||
trn2 \r4\().2s, \r3\().2s, \r4\().2s
|
||||
trn1 \r0\().2S, \r3\().2S, \r4\().2S
|
||||
trn2 \r4\().2S, \r3\().2S, \r4\().2S
|
||||
|
||||
trn1 \r1\().2s, \r5\().2s, \r6\().2s
|
||||
trn2 \r5\().2s, \r5\().2s, \r6\().2s
|
||||
trn1 \r1\().2S, \r5\().2S, \r6\().2S
|
||||
trn2 \r5\().2S, \r5\().2S, \r6\().2S
|
||||
|
||||
trn2 \r6\().2s, \r8\().2s, \r2\().2s
|
||||
trn1 \r2\().2s, \r8\().2s, \r2\().2s
|
||||
trn2 \r6\().2S, \r8\().2S, \r2\().2S
|
||||
trn1 \r2\().2S, \r8\().2S, \r2\().2S
|
||||
|
||||
trn1 \r3\().2s, \r9\().2s, \r7\().2s
|
||||
trn2 \r7\().2s, \r9\().2s, \r7\().2s
|
||||
trn1 \r3\().2S, \r9\().2S, \r7\().2S
|
||||
trn2 \r7\().2S, \r9\().2S, \r7\().2S
|
||||
.endm
|
||||
|
||||
.macro transpose_8x16B r0, r1, r2, r3, r4, r5, r6, r7, t0, t1
|
||||
trn1 \t0\().16b, \r0\().16b, \r1\().16b
|
||||
trn2 \t1\().16b, \r0\().16b, \r1\().16b
|
||||
trn1 \r1\().16b, \r2\().16b, \r3\().16b
|
||||
trn2 \r3\().16b, \r2\().16b, \r3\().16b
|
||||
trn1 \r0\().16b, \r4\().16b, \r5\().16b
|
||||
trn2 \r5\().16b, \r4\().16b, \r5\().16b
|
||||
trn1 \r2\().16b, \r6\().16b, \r7\().16b
|
||||
trn2 \r7\().16b, \r6\().16b, \r7\().16b
|
||||
trn1 \t0\().16B, \r0\().16B, \r1\().16B
|
||||
trn2 \t1\().16B, \r0\().16B, \r1\().16B
|
||||
trn1 \r1\().16B, \r2\().16B, \r3\().16B
|
||||
trn2 \r3\().16B, \r2\().16B, \r3\().16B
|
||||
trn1 \r0\().16B, \r4\().16B, \r5\().16B
|
||||
trn2 \r5\().16B, \r4\().16B, \r5\().16B
|
||||
trn1 \r2\().16B, \r6\().16B, \r7\().16B
|
||||
trn2 \r7\().16B, \r6\().16B, \r7\().16B
|
||||
|
||||
trn1 \r4\().8h, \r0\().8h, \r2\().8h
|
||||
trn2 \r2\().8h, \r0\().8h, \r2\().8h
|
||||
trn1 \r6\().8h, \r5\().8h, \r7\().8h
|
||||
trn2 \r7\().8h, \r5\().8h, \r7\().8h
|
||||
trn1 \r5\().8h, \t1\().8h, \r3\().8h
|
||||
trn2 \t1\().8h, \t1\().8h, \r3\().8h
|
||||
trn1 \r3\().8h, \t0\().8h, \r1\().8h
|
||||
trn2 \t0\().8h, \t0\().8h, \r1\().8h
|
||||
trn1 \r4\().8H, \r0\().8H, \r2\().8H
|
||||
trn2 \r2\().8H, \r0\().8H, \r2\().8H
|
||||
trn1 \r6\().8H, \r5\().8H, \r7\().8H
|
||||
trn2 \r7\().8H, \r5\().8H, \r7\().8H
|
||||
trn1 \r5\().8H, \t1\().8H, \r3\().8H
|
||||
trn2 \t1\().8H, \t1\().8H, \r3\().8H
|
||||
trn1 \r3\().8H, \t0\().8H, \r1\().8H
|
||||
trn2 \t0\().8H, \t0\().8H, \r1\().8H
|
||||
|
||||
trn1 \r0\().4s, \r3\().4s, \r4\().4s
|
||||
trn2 \r4\().4s, \r3\().4s, \r4\().4s
|
||||
trn1 \r0\().4S, \r3\().4S, \r4\().4S
|
||||
trn2 \r4\().4S, \r3\().4S, \r4\().4S
|
||||
|
||||
trn1 \r1\().4s, \r5\().4s, \r6\().4s
|
||||
trn2 \r5\().4s, \r5\().4s, \r6\().4s
|
||||
trn1 \r1\().4S, \r5\().4S, \r6\().4S
|
||||
trn2 \r5\().4S, \r5\().4S, \r6\().4S
|
||||
|
||||
trn2 \r6\().4s, \t0\().4s, \r2\().4s
|
||||
trn1 \r2\().4s, \t0\().4s, \r2\().4s
|
||||
trn2 \r6\().4S, \t0\().4S, \r2\().4S
|
||||
trn1 \r2\().4S, \t0\().4S, \r2\().4S
|
||||
|
||||
trn1 \r3\().4s, \t1\().4s, \r7\().4s
|
||||
trn2 \r7\().4s, \t1\().4s, \r7\().4s
|
||||
trn1 \r3\().4S, \t1\().4S, \r7\().4S
|
||||
trn2 \r7\().4S, \t1\().4S, \r7\().4S
|
||||
.endm
|
||||
|
||||
.macro transpose_4x16B r0, r1, r2, r3, t4, t5, t6, t7
|
||||
trn1 \t4\().16b, \r0\().16b, \r1\().16b
|
||||
trn2 \t5\().16b, \r0\().16b, \r1\().16b
|
||||
trn1 \t6\().16b, \r2\().16b, \r3\().16b
|
||||
trn2 \t7\().16b, \r2\().16b, \r3\().16b
|
||||
trn1 \t4\().16B, \r0\().16B, \r1\().16B
|
||||
trn2 \t5\().16B, \r0\().16B, \r1\().16B
|
||||
trn1 \t6\().16B, \r2\().16B, \r3\().16B
|
||||
trn2 \t7\().16B, \r2\().16B, \r3\().16B
|
||||
|
||||
trn1 \r0\().8h, \t4\().8h, \t6\().8h
|
||||
trn2 \r2\().8h, \t4\().8h, \t6\().8h
|
||||
trn1 \r1\().8h, \t5\().8h, \t7\().8h
|
||||
trn2 \r3\().8h, \t5\().8h, \t7\().8h
|
||||
trn1 \r0\().8H, \t4\().8H, \t6\().8H
|
||||
trn2 \r2\().8H, \t4\().8H, \t6\().8H
|
||||
trn1 \r1\().8H, \t5\().8H, \t7\().8H
|
||||
trn2 \r3\().8H, \t5\().8H, \t7\().8H
|
||||
.endm
|
||||
|
||||
.macro transpose_4x8B r0, r1, r2, r3, t4, t5, t6, t7
|
||||
trn1 \t4\().8b, \r0\().8b, \r1\().8b
|
||||
trn2 \t5\().8b, \r0\().8b, \r1\().8b
|
||||
trn1 \t6\().8b, \r2\().8b, \r3\().8b
|
||||
trn2 \t7\().8b, \r2\().8b, \r3\().8b
|
||||
trn1 \t4\().8B, \r0\().8B, \r1\().8B
|
||||
trn2 \t5\().8B, \r0\().8B, \r1\().8B
|
||||
trn1 \t6\().8B, \r2\().8B, \r3\().8B
|
||||
trn2 \t7\().8B, \r2\().8B, \r3\().8B
|
||||
|
||||
trn1 \r0\().4h, \t4\().4h, \t6\().4h
|
||||
trn2 \r2\().4h, \t4\().4h, \t6\().4h
|
||||
trn1 \r1\().4h, \t5\().4h, \t7\().4h
|
||||
trn2 \r3\().4h, \t5\().4h, \t7\().4h
|
||||
trn1 \r0\().4H, \t4\().4H, \t6\().4H
|
||||
trn2 \r2\().4H, \t4\().4H, \t6\().4H
|
||||
trn1 \r1\().4H, \t5\().4H, \t7\().4H
|
||||
trn2 \r3\().4H, \t5\().4H, \t7\().4H
|
||||
.endm
|
||||
|
||||
.macro transpose_4x4H r0, r1, r2, r3, r4, r5, r6, r7
|
||||
trn1 \r4\().4h, \r0\().4h, \r1\().4h
|
||||
trn2 \r5\().4h, \r0\().4h, \r1\().4h
|
||||
trn1 \r6\().4h, \r2\().4h, \r3\().4h
|
||||
trn2 \r7\().4h, \r2\().4h, \r3\().4h
|
||||
trn1 \r0\().2s, \r4\().2s, \r6\().2s
|
||||
trn2 \r2\().2s, \r4\().2s, \r6\().2s
|
||||
trn1 \r1\().2s, \r5\().2s, \r7\().2s
|
||||
trn2 \r3\().2s, \r5\().2s, \r7\().2s
|
||||
trn1 \r4\().4H, \r0\().4H, \r1\().4H
|
||||
trn2 \r5\().4H, \r0\().4H, \r1\().4H
|
||||
trn1 \r6\().4H, \r2\().4H, \r3\().4H
|
||||
trn2 \r7\().4H, \r2\().4H, \r3\().4H
|
||||
trn1 \r0\().2S, \r4\().2S, \r6\().2S
|
||||
trn2 \r2\().2S, \r4\().2S, \r6\().2S
|
||||
trn1 \r1\().2S, \r5\().2S, \r7\().2S
|
||||
trn2 \r3\().2S, \r5\().2S, \r7\().2S
|
||||
.endm
|
||||
|
||||
.macro transpose_8x8H r0, r1, r2, r3, r4, r5, r6, r7, r8, r9
|
||||
trn1 \r8\().8h, \r0\().8h, \r1\().8h
|
||||
trn2 \r9\().8h, \r0\().8h, \r1\().8h
|
||||
trn1 \r1\().8h, \r2\().8h, \r3\().8h
|
||||
trn2 \r3\().8h, \r2\().8h, \r3\().8h
|
||||
trn1 \r0\().8h, \r4\().8h, \r5\().8h
|
||||
trn2 \r5\().8h, \r4\().8h, \r5\().8h
|
||||
trn1 \r2\().8h, \r6\().8h, \r7\().8h
|
||||
trn2 \r7\().8h, \r6\().8h, \r7\().8h
|
||||
trn1 \r8\().8H, \r0\().8H, \r1\().8H
|
||||
trn2 \r9\().8H, \r0\().8H, \r1\().8H
|
||||
trn1 \r1\().8H, \r2\().8H, \r3\().8H
|
||||
trn2 \r3\().8H, \r2\().8H, \r3\().8H
|
||||
trn1 \r0\().8H, \r4\().8H, \r5\().8H
|
||||
trn2 \r5\().8H, \r4\().8H, \r5\().8H
|
||||
trn1 \r2\().8H, \r6\().8H, \r7\().8H
|
||||
trn2 \r7\().8H, \r6\().8H, \r7\().8H
|
||||
|
||||
trn1 \r4\().4s, \r0\().4s, \r2\().4s
|
||||
trn2 \r2\().4s, \r0\().4s, \r2\().4s
|
||||
trn1 \r6\().4s, \r5\().4s, \r7\().4s
|
||||
trn2 \r7\().4s, \r5\().4s, \r7\().4s
|
||||
trn1 \r5\().4s, \r9\().4s, \r3\().4s
|
||||
trn2 \r9\().4s, \r9\().4s, \r3\().4s
|
||||
trn1 \r3\().4s, \r8\().4s, \r1\().4s
|
||||
trn2 \r8\().4s, \r8\().4s, \r1\().4s
|
||||
trn1 \r4\().4S, \r0\().4S, \r2\().4S
|
||||
trn2 \r2\().4S, \r0\().4S, \r2\().4S
|
||||
trn1 \r6\().4S, \r5\().4S, \r7\().4S
|
||||
trn2 \r7\().4S, \r5\().4S, \r7\().4S
|
||||
trn1 \r5\().4S, \r9\().4S, \r3\().4S
|
||||
trn2 \r9\().4S, \r9\().4S, \r3\().4S
|
||||
trn1 \r3\().4S, \r8\().4S, \r1\().4S
|
||||
trn2 \r8\().4S, \r8\().4S, \r1\().4S
|
||||
|
||||
trn1 \r0\().2d, \r3\().2d, \r4\().2d
|
||||
trn2 \r4\().2d, \r3\().2d, \r4\().2d
|
||||
trn1 \r0\().2D, \r3\().2D, \r4\().2D
|
||||
trn2 \r4\().2D, \r3\().2D, \r4\().2D
|
||||
|
||||
trn1 \r1\().2d, \r5\().2d, \r6\().2d
|
||||
trn2 \r5\().2d, \r5\().2d, \r6\().2d
|
||||
trn1 \r1\().2D, \r5\().2D, \r6\().2D
|
||||
trn2 \r5\().2D, \r5\().2D, \r6\().2D
|
||||
|
||||
trn2 \r6\().2d, \r8\().2d, \r2\().2d
|
||||
trn1 \r2\().2d, \r8\().2d, \r2\().2d
|
||||
trn2 \r6\().2D, \r8\().2D, \r2\().2D
|
||||
trn1 \r2\().2D, \r8\().2D, \r2\().2D
|
||||
|
||||
trn1 \r3\().2d, \r9\().2d, \r7\().2d
|
||||
trn2 \r7\().2d, \r9\().2d, \r7\().2d
|
||||
trn1 \r3\().2D, \r9\().2D, \r7\().2D
|
||||
trn2 \r7\().2D, \r9\().2D, \r7\().2D
|
||||
|
||||
.endm
|
||||
|
||||
@@ -33,81 +33,81 @@ const tab_x2, align=4
|
||||
endconst
|
||||
|
||||
function ff_opus_deemphasis_neon, export=1
|
||||
movrel x4, tab_st
|
||||
ld1 {v4.4s}, [x4]
|
||||
movrel x4, tab_x0
|
||||
ld1 {v5.4s}, [x4]
|
||||
movrel x4, tab_x1
|
||||
ld1 {v6.4s}, [x4]
|
||||
movrel x4, tab_x2
|
||||
ld1 {v7.4s}, [x4]
|
||||
movrel x4, tab_st
|
||||
ld1 {v4.4s}, [x4]
|
||||
movrel x4, tab_x0
|
||||
ld1 {v5.4s}, [x4]
|
||||
movrel x4, tab_x1
|
||||
ld1 {v6.4s}, [x4]
|
||||
movrel x4, tab_x2
|
||||
ld1 {v7.4s}, [x4]
|
||||
|
||||
fmul v0.4s, v4.4s, v0.s[0]
|
||||
fmul v0.4s, v4.4s, v0.s[0]
|
||||
|
||||
1: ld1 {v1.4s, v2.4s}, [x1], #32
|
||||
1: ld1 {v1.4s, v2.4s}, [x1], #32
|
||||
|
||||
fmla v0.4s, v5.4s, v1.s[0]
|
||||
fmul v3.4s, v7.4s, v2.s[2]
|
||||
fmla v0.4s, v5.4s, v1.s[0]
|
||||
fmul v3.4s, v7.4s, v2.s[2]
|
||||
|
||||
fmla v0.4s, v6.4s, v1.s[1]
|
||||
fmla v3.4s, v6.4s, v2.s[1]
|
||||
fmla v0.4s, v6.4s, v1.s[1]
|
||||
fmla v3.4s, v6.4s, v2.s[1]
|
||||
|
||||
fmla v0.4s, v7.4s, v1.s[2]
|
||||
fmla v3.4s, v5.4s, v2.s[0]
|
||||
fmla v0.4s, v7.4s, v1.s[2]
|
||||
fmla v3.4s, v5.4s, v2.s[0]
|
||||
|
||||
fadd v1.4s, v1.4s, v0.4s
|
||||
fadd v2.4s, v2.4s, v3.4s
|
||||
fadd v1.4s, v1.4s, v0.4s
|
||||
fadd v2.4s, v2.4s, v3.4s
|
||||
|
||||
fmla v2.4s, v4.4s, v1.s[3]
|
||||
fmla v2.4s, v4.4s, v1.s[3]
|
||||
|
||||
st1 {v1.4s, v2.4s}, [x0], #32
|
||||
fmul v0.4s, v4.4s, v2.s[3]
|
||||
st1 {v1.4s, v2.4s}, [x0], #32
|
||||
fmul v0.4s, v4.4s, v2.s[3]
|
||||
|
||||
subs w2, w2, #8
|
||||
b.gt 1b
|
||||
subs w2, w2, #8
|
||||
b.gt 1b
|
||||
|
||||
mov s0, v2.s[3]
|
||||
mov s0, v2.s[3]
|
||||
|
||||
ret
|
||||
endfunc
|
||||
|
||||
function ff_opus_postfilter_neon, export=1
|
||||
ld1 {v0.4s}, [x2]
|
||||
dup v1.4s, v0.s[1]
|
||||
dup v2.4s, v0.s[2]
|
||||
dup v0.4s, v0.s[0]
|
||||
ld1 {v0.4s}, [x2]
|
||||
dup v1.4s, v0.s[1]
|
||||
dup v2.4s, v0.s[2]
|
||||
dup v0.4s, v0.s[0]
|
||||
|
||||
add w1, w1, #2
|
||||
sub x1, x0, x1, lsl #2
|
||||
add w1, w1, #2
|
||||
sub x1, x0, x1, lsl #2
|
||||
|
||||
ld1 {v3.4s}, [x1]
|
||||
fmul v3.4s, v3.4s, v2.4s
|
||||
ld1 {v3.4s}, [x1]
|
||||
fmul v3.4s, v3.4s, v2.4s
|
||||
|
||||
1: add x1, x1, #4
|
||||
ld1 {v4.4s}, [x1]
|
||||
add x1, x1, #4
|
||||
ld1 {v5.4s}, [x1]
|
||||
add x1, x1, #4
|
||||
ld1 {v6.4s}, [x1]
|
||||
add x1, x1, #4
|
||||
ld1 {v7.4s}, [x1]
|
||||
1: add x1, x1, #4
|
||||
ld1 {v4.4s}, [x1]
|
||||
add x1, x1, #4
|
||||
ld1 {v5.4s}, [x1]
|
||||
add x1, x1, #4
|
||||
ld1 {v6.4s}, [x1]
|
||||
add x1, x1, #4
|
||||
ld1 {v7.4s}, [x1]
|
||||
|
||||
fmla v3.4s, v7.4s, v2.4s
|
||||
fadd v6.4s, v6.4s, v4.4s
|
||||
fmla v3.4s, v7.4s, v2.4s
|
||||
fadd v6.4s, v6.4s, v4.4s
|
||||
|
||||
ld1 {v4.4s}, [x0]
|
||||
fmla v4.4s, v5.4s, v0.4s
|
||||
ld1 {v4.4s}, [x0]
|
||||
fmla v4.4s, v5.4s, v0.4s
|
||||
|
||||
fmul v6.4s, v6.4s, v1.4s
|
||||
fadd v6.4s, v6.4s, v3.4s
|
||||
fmul v6.4s, v6.4s, v1.4s
|
||||
fadd v6.4s, v6.4s, v3.4s
|
||||
|
||||
fadd v4.4s, v4.4s, v6.4s
|
||||
fmul v3.4s, v7.4s, v2.4s
|
||||
fadd v4.4s, v4.4s, v6.4s
|
||||
fmul v3.4s, v7.4s, v2.4s
|
||||
|
||||
st1 {v4.4s}, [x0], #16
|
||||
st1 {v4.4s}, [x0], #16
|
||||
|
||||
subs w3, w3, #4
|
||||
b.gt 1b
|
||||
subs w3, w3, #4
|
||||
b.gt 1b
|
||||
|
||||
ret
|
||||
endfunc
|
||||
|
||||
+147
-147
@@ -46,49 +46,49 @@ function ff_sbr_sum64x5_neon, export=1
|
||||
add x3, x0, #192*4
|
||||
add x4, x0, #256*4
|
||||
mov x5, #64
|
||||
1: ld1 {v0.4s}, [x0]
|
||||
ld1 {v1.4s}, [x1], #16
|
||||
fadd v0.4s, v0.4s, v1.4s
|
||||
ld1 {v2.4s}, [x2], #16
|
||||
fadd v0.4s, v0.4s, v2.4s
|
||||
ld1 {v3.4s}, [x3], #16
|
||||
fadd v0.4s, v0.4s, v3.4s
|
||||
ld1 {v4.4s}, [x4], #16
|
||||
fadd v0.4s, v0.4s, v4.4s
|
||||
st1 {v0.4s}, [x0], #16
|
||||
1: ld1 {v0.4S}, [x0]
|
||||
ld1 {v1.4S}, [x1], #16
|
||||
fadd v0.4S, v0.4S, v1.4S
|
||||
ld1 {v2.4S}, [x2], #16
|
||||
fadd v0.4S, v0.4S, v2.4S
|
||||
ld1 {v3.4S}, [x3], #16
|
||||
fadd v0.4S, v0.4S, v3.4S
|
||||
ld1 {v4.4S}, [x4], #16
|
||||
fadd v0.4S, v0.4S, v4.4S
|
||||
st1 {v0.4S}, [x0], #16
|
||||
subs x5, x5, #4
|
||||
b.gt 1b
|
||||
ret
|
||||
endfunc
|
||||
|
||||
function ff_sbr_sum_square_neon, export=1
|
||||
movi v0.4s, #0
|
||||
1: ld1 {v1.4s}, [x0], #16
|
||||
fmla v0.4s, v1.4s, v1.4s
|
||||
movi v0.4S, #0
|
||||
1: ld1 {v1.4S}, [x0], #16
|
||||
fmla v0.4S, v1.4S, v1.4S
|
||||
subs w1, w1, #2
|
||||
b.gt 1b
|
||||
faddp v0.4s, v0.4s, v0.4s
|
||||
faddp v0.4s, v0.4s, v0.4s
|
||||
faddp v0.4S, v0.4S, v0.4S
|
||||
faddp v0.4S, v0.4S, v0.4S
|
||||
ret
|
||||
endfunc
|
||||
|
||||
function ff_sbr_neg_odd_64_neon, export=1
|
||||
mov x1, x0
|
||||
movi v5.4s, #1<<7, lsl #24
|
||||
ld2 {v0.4s, v1.4s}, [x0], #32
|
||||
eor v1.16b, v1.16b, v5.16b
|
||||
ld2 {v2.4s, v3.4s}, [x0], #32
|
||||
movi v5.4S, #1<<7, lsl #24
|
||||
ld2 {v0.4S, v1.4S}, [x0], #32
|
||||
eor v1.16B, v1.16B, v5.16B
|
||||
ld2 {v2.4S, v3.4S}, [x0], #32
|
||||
.rept 3
|
||||
st2 {v0.4s, v1.4s}, [x1], #32
|
||||
eor v3.16b, v3.16b, v5.16b
|
||||
ld2 {v0.4s, v1.4s}, [x0], #32
|
||||
st2 {v2.4s, v3.4s}, [x1], #32
|
||||
eor v1.16b, v1.16b, v5.16b
|
||||
ld2 {v2.4s, v3.4s}, [x0], #32
|
||||
st2 {v0.4S, v1.4S}, [x1], #32
|
||||
eor v3.16B, v3.16B, v5.16B
|
||||
ld2 {v0.4S, v1.4S}, [x0], #32
|
||||
st2 {v2.4S, v3.4S}, [x1], #32
|
||||
eor v1.16B, v1.16B, v5.16B
|
||||
ld2 {v2.4S, v3.4S}, [x0], #32
|
||||
.endr
|
||||
eor v3.16b, v3.16b, v5.16b
|
||||
st2 {v0.4s, v1.4s}, [x1], #32
|
||||
st2 {v2.4s, v3.4s}, [x1], #32
|
||||
eor v3.16B, v3.16B, v5.16B
|
||||
st2 {v0.4S, v1.4S}, [x1], #32
|
||||
st2 {v2.4S, v3.4S}, [x1], #32
|
||||
ret
|
||||
endfunc
|
||||
|
||||
@@ -97,26 +97,26 @@ function ff_sbr_qmf_pre_shuffle_neon, export=1
|
||||
add x2, x0, #64*4
|
||||
mov x3, #-16
|
||||
mov x4, #-4
|
||||
movi v6.4s, #1<<7, lsl #24
|
||||
ld1 {v0.2s}, [x0], #8
|
||||
st1 {v0.2s}, [x2], #8
|
||||
movi v6.4S, #1<<7, lsl #24
|
||||
ld1 {v0.2S}, [x0], #8
|
||||
st1 {v0.2S}, [x2], #8
|
||||
.rept 7
|
||||
ld1 {v1.4s}, [x1], x3
|
||||
ld1 {v2.4s}, [x0], #16
|
||||
eor v1.16b, v1.16b, v6.16b
|
||||
rev64 v1.4s, v1.4s
|
||||
ext v1.16b, v1.16b, v1.16b, #8
|
||||
st2 {v1.4s, v2.4s}, [x2], #32
|
||||
ld1 {v1.4S}, [x1], x3
|
||||
ld1 {v2.4S}, [x0], #16
|
||||
eor v1.16B, v1.16B, v6.16B
|
||||
rev64 v1.4S, v1.4S
|
||||
ext v1.16B, v1.16B, v1.16B, #8
|
||||
st2 {v1.4S, v2.4S}, [x2], #32
|
||||
.endr
|
||||
add x1, x1, #8
|
||||
ld1 {v1.2s}, [x1], x4
|
||||
ld1 {v2.2s}, [x0], #8
|
||||
ld1 {v1.s}[3], [x1]
|
||||
ld1 {v2.s}[2], [x0]
|
||||
eor v1.16b, v1.16b, v6.16b
|
||||
rev64 v1.4s, v1.4s
|
||||
st2 {v1.2s, v2.2s}, [x2], #16
|
||||
st2 {v1.s, v2.s}[2], [x2]
|
||||
ld1 {v1.2S}, [x1], x4
|
||||
ld1 {v2.2S}, [x0], #8
|
||||
ld1 {v1.S}[3], [x1]
|
||||
ld1 {v2.S}[2], [x0]
|
||||
eor v1.16B, v1.16B, v6.16B
|
||||
rev64 v1.4S, v1.4S
|
||||
st2 {v1.2S, v2.2S}, [x2], #16
|
||||
st2 {v1.S, v2.S}[2], [x2]
|
||||
ret
|
||||
endfunc
|
||||
|
||||
@@ -124,13 +124,13 @@ function ff_sbr_qmf_post_shuffle_neon, export=1
|
||||
add x2, x1, #60*4
|
||||
mov x3, #-16
|
||||
mov x4, #32
|
||||
movi v6.4s, #1<<7, lsl #24
|
||||
1: ld1 {v0.4s}, [x2], x3
|
||||
ld1 {v1.4s}, [x1], #16
|
||||
eor v0.16b, v0.16b, v6.16b
|
||||
rev64 v0.4s, v0.4s
|
||||
ext v0.16b, v0.16b, v0.16b, #8
|
||||
st2 {v0.4s, v1.4s}, [x0], #32
|
||||
movi v6.4S, #1<<7, lsl #24
|
||||
1: ld1 {v0.4S}, [x2], x3
|
||||
ld1 {v1.4S}, [x1], #16
|
||||
eor v0.16B, v0.16B, v6.16B
|
||||
rev64 v0.4S, v0.4S
|
||||
ext v0.16B, v0.16B, v0.16B, #8
|
||||
st2 {v0.4S, v1.4S}, [x0], #32
|
||||
subs x4, x4, #4
|
||||
b.gt 1b
|
||||
ret
|
||||
@@ -141,13 +141,13 @@ function ff_sbr_qmf_deint_neg_neon, export=1
|
||||
add x2, x0, #60*4
|
||||
mov x3, #-32
|
||||
mov x4, #32
|
||||
movi v2.4s, #1<<7, lsl #24
|
||||
1: ld2 {v0.4s, v1.4s}, [x1], x3
|
||||
eor v0.16b, v0.16b, v2.16b
|
||||
rev64 v1.4s, v1.4s
|
||||
ext v1.16b, v1.16b, v1.16b, #8
|
||||
st1 {v0.4s}, [x2]
|
||||
st1 {v1.4s}, [x0], #16
|
||||
movi v2.4S, #1<<7, lsl #24
|
||||
1: ld2 {v0.4S, v1.4S}, [x1], x3
|
||||
eor v0.16B, v0.16B, v2.16B
|
||||
rev64 v1.4S, v1.4S
|
||||
ext v1.16B, v1.16B, v1.16B, #8
|
||||
st1 {v0.4S}, [x2]
|
||||
st1 {v1.4S}, [x0], #16
|
||||
sub x2, x2, #16
|
||||
subs x4, x4, #4
|
||||
b.gt 1b
|
||||
@@ -159,16 +159,16 @@ function ff_sbr_qmf_deint_bfly_neon, export=1
|
||||
add x3, x0, #124*4
|
||||
mov x4, #64
|
||||
mov x5, #-16
|
||||
1: ld1 {v0.4s}, [x1], #16
|
||||
ld1 {v1.4s}, [x2], x5
|
||||
rev64 v2.4s, v0.4s
|
||||
ext v2.16b, v2.16b, v2.16b, #8
|
||||
rev64 v3.4s, v1.4s
|
||||
ext v3.16b, v3.16b, v3.16b, #8
|
||||
fadd v1.4s, v1.4s, v2.4s
|
||||
fsub v0.4s, v0.4s, v3.4s
|
||||
st1 {v0.4s}, [x0], #16
|
||||
st1 {v1.4s}, [x3], x5
|
||||
1: ld1 {v0.4S}, [x1], #16
|
||||
ld1 {v1.4S}, [x2], x5
|
||||
rev64 v2.4S, v0.4S
|
||||
ext v2.16B, v2.16B, v2.16B, #8
|
||||
rev64 v3.4S, v1.4S
|
||||
ext v3.16B, v3.16B, v3.16B, #8
|
||||
fadd v1.4S, v1.4S, v2.4S
|
||||
fsub v0.4S, v0.4S, v3.4S
|
||||
st1 {v0.4S}, [x0], #16
|
||||
st1 {v1.4S}, [x3], x5
|
||||
subs x4, x4, #4
|
||||
b.gt 1b
|
||||
ret
|
||||
@@ -178,32 +178,32 @@ function ff_sbr_hf_gen_neon, export=1
|
||||
sxtw x4, w4
|
||||
sxtw x5, w5
|
||||
movrel x6, factors
|
||||
ld1 {v7.4s}, [x6]
|
||||
dup v1.4s, v0.s[0]
|
||||
mov v2.8b, v1.8b
|
||||
mov v2.s[2], v7.s[0]
|
||||
mov v2.s[3], v7.s[0]
|
||||
fmul v1.4s, v1.4s, v2.4s
|
||||
ld1 {v0.d}[0], [x3]
|
||||
ld1 {v0.d}[1], [x2]
|
||||
fmul v0.4s, v0.4s, v1.4s
|
||||
fmul v1.4s, v0.4s, v7.4s
|
||||
rev64 v0.4s, v0.4s
|
||||
ld1 {v7.4S}, [x6]
|
||||
dup v1.4S, v0.S[0]
|
||||
mov v2.8B, v1.8B
|
||||
mov v2.S[2], v7.S[0]
|
||||
mov v2.S[3], v7.S[0]
|
||||
fmul v1.4S, v1.4S, v2.4S
|
||||
ld1 {v0.D}[0], [x3]
|
||||
ld1 {v0.D}[1], [x2]
|
||||
fmul v0.4S, v0.4S, v1.4S
|
||||
fmul v1.4S, v0.4S, v7.4S
|
||||
rev64 v0.4S, v0.4S
|
||||
sub x7, x5, x4
|
||||
add x0, x0, x4, lsl #3
|
||||
add x1, x1, x4, lsl #3
|
||||
sub x1, x1, #16
|
||||
1: ld1 {v2.4s}, [x1], #16
|
||||
ld1 {v3.2s}, [x1]
|
||||
fmul v4.4s, v2.4s, v1.4s
|
||||
fmul v5.4s, v2.4s, v0.4s
|
||||
faddp v4.4s, v4.4s, v4.4s
|
||||
faddp v5.4s, v5.4s, v5.4s
|
||||
faddp v4.4s, v4.4s, v4.4s
|
||||
faddp v5.4s, v5.4s, v5.4s
|
||||
mov v4.s[1], v5.s[0]
|
||||
fadd v4.2s, v4.2s, v3.2s
|
||||
st1 {v4.2s}, [x0], #8
|
||||
1: ld1 {v2.4S}, [x1], #16
|
||||
ld1 {v3.2S}, [x1]
|
||||
fmul v4.4S, v2.4S, v1.4S
|
||||
fmul v5.4S, v2.4S, v0.4S
|
||||
faddp v4.4S, v4.4S, v4.4S
|
||||
faddp v5.4S, v5.4S, v5.4S
|
||||
faddp v4.4S, v4.4S, v4.4S
|
||||
faddp v5.4S, v5.4S, v5.4S
|
||||
mov v4.S[1], v5.S[0]
|
||||
fadd v4.2S, v4.2S, v3.2S
|
||||
st1 {v4.2S}, [x0], #8
|
||||
sub x1, x1, #8
|
||||
subs x7, x7, #1
|
||||
b.gt 1b
|
||||
@@ -215,10 +215,10 @@ function ff_sbr_hf_g_filt_neon, export=1
|
||||
sxtw x4, w4
|
||||
mov x5, #40*2*4
|
||||
add x1, x1, x4, lsl #3
|
||||
1: ld1 {v0.2s}, [x1], x5
|
||||
ld1 {v1.s}[0], [x2], #4
|
||||
fmul v2.4s, v0.4s, v1.s[0]
|
||||
st1 {v2.2s}, [x0], #8
|
||||
1: ld1 {v0.2S}, [x1], x5
|
||||
ld1 {v1.S}[0], [x2], #4
|
||||
fmul v2.4S, v0.4S, v1.S[0]
|
||||
st1 {v2.2S}, [x0], #8
|
||||
subs x3, x3, #1
|
||||
b.gt 1b
|
||||
ret
|
||||
@@ -227,46 +227,46 @@ endfunc
|
||||
function ff_sbr_autocorrelate_neon, export=1
|
||||
mov x2, #38
|
||||
movrel x3, factors
|
||||
ld1 {v0.4s}, [x3]
|
||||
movi v1.4s, #0
|
||||
movi v2.4s, #0
|
||||
movi v3.4s, #0
|
||||
ld1 {v4.2s}, [x0], #8
|
||||
ld1 {v5.2s}, [x0], #8
|
||||
fmul v16.2s, v4.2s, v4.2s
|
||||
fmul v17.2s, v5.2s, v4.s[0]
|
||||
fmul v18.2s, v5.2s, v4.s[1]
|
||||
1: ld1 {v5.d}[1], [x0], #8
|
||||
fmla v1.2s, v4.2s, v4.2s
|
||||
fmla v2.4s, v5.4s, v4.s[0]
|
||||
fmla v3.4s, v5.4s, v4.s[1]
|
||||
mov v4.d[0], v5.d[0]
|
||||
mov v5.d[0], v5.d[1]
|
||||
ld1 {v0.4S}, [x3]
|
||||
movi v1.4S, #0
|
||||
movi v2.4S, #0
|
||||
movi v3.4S, #0
|
||||
ld1 {v4.2S}, [x0], #8
|
||||
ld1 {v5.2S}, [x0], #8
|
||||
fmul v16.2S, v4.2S, v4.2S
|
||||
fmul v17.2S, v5.2S, v4.S[0]
|
||||
fmul v18.2S, v5.2S, v4.S[1]
|
||||
1: ld1 {v5.D}[1], [x0], #8
|
||||
fmla v1.2S, v4.2S, v4.2S
|
||||
fmla v2.4S, v5.4S, v4.S[0]
|
||||
fmla v3.4S, v5.4S, v4.S[1]
|
||||
mov v4.D[0], v5.D[0]
|
||||
mov v5.D[0], v5.D[1]
|
||||
subs x2, x2, #1
|
||||
b.gt 1b
|
||||
fmul v19.2s, v4.2s, v4.2s
|
||||
fmul v20.2s, v5.2s, v4.s[0]
|
||||
fmul v21.2s, v5.2s, v4.s[1]
|
||||
fadd v22.4s, v2.4s, v20.4s
|
||||
fsub v22.4s, v22.4s, v17.4s
|
||||
fadd v23.4s, v3.4s, v21.4s
|
||||
fsub v23.4s, v23.4s, v18.4s
|
||||
rev64 v23.4s, v23.4s
|
||||
fmul v23.4s, v23.4s, v0.4s
|
||||
fadd v22.4s, v22.4s, v23.4s
|
||||
st1 {v22.4s}, [x1], #16
|
||||
fadd v23.2s, v1.2s, v19.2s
|
||||
fsub v23.2s, v23.2s, v16.2s
|
||||
faddp v23.2s, v23.2s, v23.2s
|
||||
st1 {v23.s}[0], [x1]
|
||||
fmul v19.2S, v4.2S, v4.2S
|
||||
fmul v20.2S, v5.2S, v4.S[0]
|
||||
fmul v21.2S, v5.2S, v4.S[1]
|
||||
fadd v22.4S, v2.4S, v20.4S
|
||||
fsub v22.4S, v22.4S, v17.4S
|
||||
fadd v23.4S, v3.4S, v21.4S
|
||||
fsub v23.4S, v23.4S, v18.4S
|
||||
rev64 v23.4S, v23.4S
|
||||
fmul v23.4S, v23.4S, v0.4S
|
||||
fadd v22.4S, v22.4S, v23.4S
|
||||
st1 {v22.4S}, [x1], #16
|
||||
fadd v23.2S, v1.2S, v19.2S
|
||||
fsub v23.2S, v23.2S, v16.2S
|
||||
faddp v23.2S, v23.2S, v23.2S
|
||||
st1 {v23.S}[0], [x1]
|
||||
add x1, x1, #8
|
||||
rev64 v3.2s, v3.2s
|
||||
fmul v3.2s, v3.2s, v0.2s
|
||||
fadd v2.2s, v2.2s, v3.2s
|
||||
st1 {v2.2s}, [x1]
|
||||
rev64 v3.2S, v3.2S
|
||||
fmul v3.2S, v3.2S, v0.2S
|
||||
fadd v2.2S, v2.2S, v3.2S
|
||||
st1 {v2.2S}, [x1]
|
||||
add x1, x1, #16
|
||||
faddp v1.2s, v1.2s, v1.2s
|
||||
st1 {v1.s}[0], [x1]
|
||||
faddp v1.2S, v1.2S, v1.2S
|
||||
st1 {v1.S}[0], [x1]
|
||||
ret
|
||||
endfunc
|
||||
|
||||
@@ -278,25 +278,25 @@ endfunc
|
||||
1: and x3, x3, #0x1ff
|
||||
add x8, x7, x3, lsl #3
|
||||
add x3, x3, #2
|
||||
ld1 {v2.4s}, [x0]
|
||||
ld1 {v3.2s}, [x1], #8
|
||||
ld1 {v4.2s}, [x2], #8
|
||||
ld1 {v5.4s}, [x8]
|
||||
mov v6.16b, v2.16b
|
||||
zip1 v3.4s, v3.4s, v3.4s
|
||||
zip1 v4.4s, v4.4s, v4.4s
|
||||
fmla v6.4s, v1.4s, v3.4s
|
||||
fmla v2.4s, v5.4s, v4.4s
|
||||
fcmeq v7.4s, v3.4s, #0
|
||||
bif v2.16b, v6.16b, v7.16b
|
||||
st1 {v2.4s}, [x0], #16
|
||||
ld1 {v2.4S}, [x0]
|
||||
ld1 {v3.2S}, [x1], #8
|
||||
ld1 {v4.2S}, [x2], #8
|
||||
ld1 {v5.4S}, [x8]
|
||||
mov v6.16B, v2.16B
|
||||
zip1 v3.4S, v3.4S, v3.4S
|
||||
zip1 v4.4S, v4.4S, v4.4S
|
||||
fmla v6.4S, v1.4S, v3.4S
|
||||
fmla v2.4S, v5.4S, v4.4S
|
||||
fcmeq v7.4S, v3.4S, #0
|
||||
bif v2.16B, v6.16B, v7.16B
|
||||
st1 {v2.4S}, [x0], #16
|
||||
subs x5, x5, #2
|
||||
b.gt 1b
|
||||
.endm
|
||||
|
||||
function ff_sbr_hf_apply_noise_0_neon, export=1
|
||||
movrel x9, phi_noise_0
|
||||
ld1 {v1.4s}, [x9]
|
||||
ld1 {v1.4S}, [x9]
|
||||
apply_noise_common
|
||||
ret
|
||||
endfunc
|
||||
@@ -305,14 +305,14 @@ function ff_sbr_hf_apply_noise_1_neon, export=1
|
||||
movrel x9, phi_noise_1
|
||||
and x4, x4, #1
|
||||
add x9, x9, x4, lsl #4
|
||||
ld1 {v1.4s}, [x9]
|
||||
ld1 {v1.4S}, [x9]
|
||||
apply_noise_common
|
||||
ret
|
||||
endfunc
|
||||
|
||||
function ff_sbr_hf_apply_noise_2_neon, export=1
|
||||
movrel x9, phi_noise_2
|
||||
ld1 {v1.4s}, [x9]
|
||||
ld1 {v1.4S}, [x9]
|
||||
apply_noise_common
|
||||
ret
|
||||
endfunc
|
||||
@@ -321,7 +321,7 @@ function ff_sbr_hf_apply_noise_3_neon, export=1
|
||||
movrel x9, phi_noise_3
|
||||
and x4, x4, #1
|
||||
add x9, x9, x4, lsl #4
|
||||
ld1 {v1.4s}, [x9]
|
||||
ld1 {v1.4S}, [x9]
|
||||
apply_noise_common
|
||||
ret
|
||||
endfunc
|
||||
|
||||
@@ -54,7 +54,7 @@ endconst
|
||||
prfm pldl1keep, [\data]
|
||||
mov x10, x30
|
||||
movrel x3, idct_coeff_neon
|
||||
ld1 {v0.2d}, [x3]
|
||||
ld1 {v0.2D}, [x3]
|
||||
.endm
|
||||
|
||||
.macro idct_end
|
||||
@@ -74,146 +74,146 @@ endconst
|
||||
.endm
|
||||
|
||||
.macro idct_col4_top y1, y2, y3, y4, i, l
|
||||
smull\i v7.4s, \y3\l, z2
|
||||
smull\i v16.4s, \y3\l, z6
|
||||
smull\i v17.4s, \y2\l, z1
|
||||
add v19.4s, v23.4s, v7.4s
|
||||
smull\i v18.4s, \y2\l, z3
|
||||
add v20.4s, v23.4s, v16.4s
|
||||
smull\i v5.4s, \y2\l, z5
|
||||
sub v21.4s, v23.4s, v16.4s
|
||||
smull\i v6.4s, \y2\l, z7
|
||||
sub v22.4s, v23.4s, v7.4s
|
||||
smull\i v7.4S, \y3\l, z2
|
||||
smull\i v16.4S, \y3\l, z6
|
||||
smull\i v17.4S, \y2\l, z1
|
||||
add v19.4S, v23.4S, v7.4S
|
||||
smull\i v18.4S, \y2\l, z3
|
||||
add v20.4S, v23.4S, v16.4S
|
||||
smull\i v5.4S, \y2\l, z5
|
||||
sub v21.4S, v23.4S, v16.4S
|
||||
smull\i v6.4S, \y2\l, z7
|
||||
sub v22.4S, v23.4S, v7.4S
|
||||
|
||||
smlal\i v17.4s, \y4\l, z3
|
||||
smlsl\i v18.4s, \y4\l, z7
|
||||
smlsl\i v5.4s, \y4\l, z1
|
||||
smlsl\i v6.4s, \y4\l, z5
|
||||
smlal\i v17.4S, \y4\l, z3
|
||||
smlsl\i v18.4S, \y4\l, z7
|
||||
smlsl\i v5.4S, \y4\l, z1
|
||||
smlsl\i v6.4S, \y4\l, z5
|
||||
.endm
|
||||
|
||||
.macro idct_row4_neon y1, y2, y3, y4, pass
|
||||
ld1 {\y1\().2d,\y2\().2d}, [x2], #32
|
||||
movi v23.4s, #1<<2, lsl #8
|
||||
orr v5.16b, \y1\().16b, \y2\().16b
|
||||
ld1 {\y3\().2d,\y4\().2d}, [x2], #32
|
||||
orr v6.16b, \y3\().16b, \y4\().16b
|
||||
orr v5.16b, v5.16b, v6.16b
|
||||
mov x3, v5.d[1]
|
||||
smlal v23.4s, \y1\().4h, z4
|
||||
ld1 {\y1\().2D,\y2\().2D}, [x2], #32
|
||||
movi v23.4S, #1<<2, lsl #8
|
||||
orr v5.16B, \y1\().16B, \y2\().16B
|
||||
ld1 {\y3\().2D,\y4\().2D}, [x2], #32
|
||||
orr v6.16B, \y3\().16B, \y4\().16B
|
||||
orr v5.16B, v5.16B, v6.16B
|
||||
mov x3, v5.D[1]
|
||||
smlal v23.4S, \y1\().4H, z4
|
||||
|
||||
idct_col4_top \y1, \y2, \y3, \y4, 1, .4h
|
||||
idct_col4_top \y1, \y2, \y3, \y4, 1, .4H
|
||||
|
||||
cmp x3, #0
|
||||
b.eq \pass\()f
|
||||
|
||||
smull2 v7.4s, \y1\().8h, z4
|
||||
smlal2 v17.4s, \y2\().8h, z5
|
||||
smlsl2 v18.4s, \y2\().8h, z1
|
||||
smull2 v16.4s, \y3\().8h, z2
|
||||
smlal2 v5.4s, \y2\().8h, z7
|
||||
add v19.4s, v19.4s, v7.4s
|
||||
sub v20.4s, v20.4s, v7.4s
|
||||
sub v21.4s, v21.4s, v7.4s
|
||||
add v22.4s, v22.4s, v7.4s
|
||||
smlal2 v6.4s, \y2\().8h, z3
|
||||
smull2 v7.4s, \y3\().8h, z6
|
||||
smlal2 v17.4s, \y4\().8h, z7
|
||||
smlsl2 v18.4s, \y4\().8h, z5
|
||||
smlal2 v5.4s, \y4\().8h, z3
|
||||
smlsl2 v6.4s, \y4\().8h, z1
|
||||
add v19.4s, v19.4s, v7.4s
|
||||
sub v20.4s, v20.4s, v16.4s
|
||||
add v21.4s, v21.4s, v16.4s
|
||||
sub v22.4s, v22.4s, v7.4s
|
||||
smull2 v7.4S, \y1\().8H, z4
|
||||
smlal2 v17.4S, \y2\().8H, z5
|
||||
smlsl2 v18.4S, \y2\().8H, z1
|
||||
smull2 v16.4S, \y3\().8H, z2
|
||||
smlal2 v5.4S, \y2\().8H, z7
|
||||
add v19.4S, v19.4S, v7.4S
|
||||
sub v20.4S, v20.4S, v7.4S
|
||||
sub v21.4S, v21.4S, v7.4S
|
||||
add v22.4S, v22.4S, v7.4S
|
||||
smlal2 v6.4S, \y2\().8H, z3
|
||||
smull2 v7.4S, \y3\().8H, z6
|
||||
smlal2 v17.4S, \y4\().8H, z7
|
||||
smlsl2 v18.4S, \y4\().8H, z5
|
||||
smlal2 v5.4S, \y4\().8H, z3
|
||||
smlsl2 v6.4S, \y4\().8H, z1
|
||||
add v19.4S, v19.4S, v7.4S
|
||||
sub v20.4S, v20.4S, v16.4S
|
||||
add v21.4S, v21.4S, v16.4S
|
||||
sub v22.4S, v22.4S, v7.4S
|
||||
|
||||
\pass: add \y3\().4S, v19.4S, v17.4S
|
||||
add \y4\().4s, v20.4s, v18.4s
|
||||
shrn \y1\().4h, \y3\().4s, #ROW_SHIFT
|
||||
shrn \y2\().4h, \y4\().4s, #ROW_SHIFT
|
||||
add v7.4s, v21.4s, v5.4s
|
||||
add v16.4s, v22.4s, v6.4s
|
||||
shrn \y3\().4h, v7.4s, #ROW_SHIFT
|
||||
shrn \y4\().4h, v16.4s, #ROW_SHIFT
|
||||
sub v22.4s, v22.4s, v6.4s
|
||||
sub v19.4s, v19.4s, v17.4s
|
||||
sub v21.4s, v21.4s, v5.4s
|
||||
shrn2 \y1\().8h, v22.4s, #ROW_SHIFT
|
||||
sub v20.4s, v20.4s, v18.4s
|
||||
shrn2 \y2\().8h, v21.4s, #ROW_SHIFT
|
||||
shrn2 \y3\().8h, v20.4s, #ROW_SHIFT
|
||||
shrn2 \y4\().8h, v19.4s, #ROW_SHIFT
|
||||
add \y4\().4S, v20.4S, v18.4S
|
||||
shrn \y1\().4H, \y3\().4S, #ROW_SHIFT
|
||||
shrn \y2\().4H, \y4\().4S, #ROW_SHIFT
|
||||
add v7.4S, v21.4S, v5.4S
|
||||
add v16.4S, v22.4S, v6.4S
|
||||
shrn \y3\().4H, v7.4S, #ROW_SHIFT
|
||||
shrn \y4\().4H, v16.4S, #ROW_SHIFT
|
||||
sub v22.4S, v22.4S, v6.4S
|
||||
sub v19.4S, v19.4S, v17.4S
|
||||
sub v21.4S, v21.4S, v5.4S
|
||||
shrn2 \y1\().8H, v22.4S, #ROW_SHIFT
|
||||
sub v20.4S, v20.4S, v18.4S
|
||||
shrn2 \y2\().8H, v21.4S, #ROW_SHIFT
|
||||
shrn2 \y3\().8H, v20.4S, #ROW_SHIFT
|
||||
shrn2 \y4\().8H, v19.4S, #ROW_SHIFT
|
||||
|
||||
trn1 v16.8h, \y1\().8h, \y2\().8h
|
||||
trn2 v17.8h, \y1\().8h, \y2\().8h
|
||||
trn1 v18.8h, \y3\().8h, \y4\().8h
|
||||
trn2 v19.8h, \y3\().8h, \y4\().8h
|
||||
trn1 \y1\().4s, v16.4s, v18.4s
|
||||
trn1 \y2\().4s, v17.4s, v19.4s
|
||||
trn2 \y3\().4s, v16.4s, v18.4s
|
||||
trn2 \y4\().4s, v17.4s, v19.4s
|
||||
trn1 v16.8H, \y1\().8H, \y2\().8H
|
||||
trn2 v17.8H, \y1\().8H, \y2\().8H
|
||||
trn1 v18.8H, \y3\().8H, \y4\().8H
|
||||
trn2 v19.8H, \y3\().8H, \y4\().8H
|
||||
trn1 \y1\().4S, v16.4S, v18.4S
|
||||
trn1 \y2\().4S, v17.4S, v19.4S
|
||||
trn2 \y3\().4S, v16.4S, v18.4S
|
||||
trn2 \y4\().4S, v17.4S, v19.4S
|
||||
.endm
|
||||
|
||||
.macro declare_idct_col4_neon i, l
|
||||
function idct_col4_neon\i
|
||||
dup v23.4h, z4c
|
||||
dup v23.4H, z4c
|
||||
.if \i == 1
|
||||
add v23.4h, v23.4h, v24.4h
|
||||
add v23.4H, v23.4H, v24.4H
|
||||
.else
|
||||
mov v5.d[0], v24.d[1]
|
||||
add v23.4h, v23.4h, v5.4h
|
||||
mov v5.D[0], v24.D[1]
|
||||
add v23.4H, v23.4H, v5.4H
|
||||
.endif
|
||||
smull v23.4s, v23.4h, z4
|
||||
smull v23.4S, v23.4H, z4
|
||||
|
||||
idct_col4_top v24, v25, v26, v27, \i, \l
|
||||
|
||||
mov x4, v28.d[\i - 1]
|
||||
mov x5, v29.d[\i - 1]
|
||||
mov x4, v28.D[\i - 1]
|
||||
mov x5, v29.D[\i - 1]
|
||||
cmp x4, #0
|
||||
b.eq 1f
|
||||
|
||||
smull\i v7.4s, v28\l, z4
|
||||
add v19.4s, v19.4s, v7.4s
|
||||
sub v20.4s, v20.4s, v7.4s
|
||||
sub v21.4s, v21.4s, v7.4s
|
||||
add v22.4s, v22.4s, v7.4s
|
||||
smull\i v7.4S, v28\l, z4
|
||||
add v19.4S, v19.4S, v7.4S
|
||||
sub v20.4S, v20.4S, v7.4S
|
||||
sub v21.4S, v21.4S, v7.4S
|
||||
add v22.4S, v22.4S, v7.4S
|
||||
|
||||
1: mov x4, v30.d[\i - 1]
|
||||
1: mov x4, v30.D[\i - 1]
|
||||
cmp x5, #0
|
||||
b.eq 2f
|
||||
|
||||
smlal\i v17.4s, v29\l, z5
|
||||
smlsl\i v18.4s, v29\l, z1
|
||||
smlal\i v5.4s, v29\l, z7
|
||||
smlal\i v6.4s, v29\l, z3
|
||||
smlal\i v17.4S, v29\l, z5
|
||||
smlsl\i v18.4S, v29\l, z1
|
||||
smlal\i v5.4S, v29\l, z7
|
||||
smlal\i v6.4S, v29\l, z3
|
||||
|
||||
2: mov x5, v31.d[\i - 1]
|
||||
2: mov x5, v31.D[\i - 1]
|
||||
cmp x4, #0
|
||||
b.eq 3f
|
||||
|
||||
smull\i v7.4s, v30\l, z6
|
||||
smull\i v16.4s, v30\l, z2
|
||||
add v19.4s, v19.4s, v7.4s
|
||||
sub v22.4s, v22.4s, v7.4s
|
||||
sub v20.4s, v20.4s, v16.4s
|
||||
add v21.4s, v21.4s, v16.4s
|
||||
smull\i v7.4S, v30\l, z6
|
||||
smull\i v16.4S, v30\l, z2
|
||||
add v19.4S, v19.4S, v7.4S
|
||||
sub v22.4S, v22.4S, v7.4S
|
||||
sub v20.4S, v20.4S, v16.4S
|
||||
add v21.4S, v21.4S, v16.4S
|
||||
|
||||
3: cmp x5, #0
|
||||
b.eq 4f
|
||||
|
||||
smlal\i v17.4s, v31\l, z7
|
||||
smlsl\i v18.4s, v31\l, z5
|
||||
smlal\i v5.4s, v31\l, z3
|
||||
smlsl\i v6.4s, v31\l, z1
|
||||
smlal\i v17.4S, v31\l, z7
|
||||
smlsl\i v18.4S, v31\l, z5
|
||||
smlal\i v5.4S, v31\l, z3
|
||||
smlsl\i v6.4S, v31\l, z1
|
||||
|
||||
4: addhn v7.4h, v19.4s, v17.4s
|
||||
addhn2 v7.8h, v20.4s, v18.4s
|
||||
subhn v18.4h, v20.4s, v18.4s
|
||||
subhn2 v18.8h, v19.4s, v17.4s
|
||||
4: addhn v7.4H, v19.4S, v17.4S
|
||||
addhn2 v7.8H, v20.4S, v18.4S
|
||||
subhn v18.4H, v20.4S, v18.4S
|
||||
subhn2 v18.8H, v19.4S, v17.4S
|
||||
|
||||
addhn v16.4h, v21.4s, v5.4s
|
||||
addhn2 v16.8h, v22.4s, v6.4s
|
||||
subhn v17.4h, v22.4s, v6.4s
|
||||
subhn2 v17.8h, v21.4s, v5.4s
|
||||
addhn v16.4H, v21.4S, v5.4S
|
||||
addhn2 v16.8H, v22.4S, v6.4S
|
||||
subhn v17.4H, v22.4S, v6.4S
|
||||
subhn2 v17.8H, v21.4S, v5.4S
|
||||
|
||||
ret
|
||||
endfunc
|
||||
@@ -229,33 +229,33 @@ function ff_simple_idct_put_neon, export=1
|
||||
idct_row4_neon v28, v29, v30, v31, 2
|
||||
bl idct_col4_neon1
|
||||
|
||||
sqshrun v1.8b, v7.8h, #COL_SHIFT-16
|
||||
sqshrun2 v1.16b, v16.8h, #COL_SHIFT-16
|
||||
sqshrun v3.8b, v17.8h, #COL_SHIFT-16
|
||||
sqshrun2 v3.16b, v18.8h, #COL_SHIFT-16
|
||||
sqshrun v1.8B, v7.8H, #COL_SHIFT-16
|
||||
sqshrun2 v1.16B, v16.8H, #COL_SHIFT-16
|
||||
sqshrun v3.8B, v17.8H, #COL_SHIFT-16
|
||||
sqshrun2 v3.16B, v18.8H, #COL_SHIFT-16
|
||||
|
||||
bl idct_col4_neon2
|
||||
|
||||
sqshrun v2.8b, v7.8h, #COL_SHIFT-16
|
||||
sqshrun2 v2.16b, v16.8h, #COL_SHIFT-16
|
||||
sqshrun v4.8b, v17.8h, #COL_SHIFT-16
|
||||
sqshrun2 v4.16b, v18.8h, #COL_SHIFT-16
|
||||
sqshrun v2.8B, v7.8H, #COL_SHIFT-16
|
||||
sqshrun2 v2.16B, v16.8H, #COL_SHIFT-16
|
||||
sqshrun v4.8B, v17.8H, #COL_SHIFT-16
|
||||
sqshrun2 v4.16B, v18.8H, #COL_SHIFT-16
|
||||
|
||||
zip1 v16.4s, v1.4s, v2.4s
|
||||
zip2 v17.4s, v1.4s, v2.4s
|
||||
zip1 v16.4S, v1.4S, v2.4S
|
||||
zip2 v17.4S, v1.4S, v2.4S
|
||||
|
||||
st1 {v16.d}[0], [x0], x1
|
||||
st1 {v16.d}[1], [x0], x1
|
||||
st1 {v16.D}[0], [x0], x1
|
||||
st1 {v16.D}[1], [x0], x1
|
||||
|
||||
zip1 v18.4s, v3.4s, v4.4s
|
||||
zip2 v19.4s, v3.4s, v4.4s
|
||||
zip1 v18.4S, v3.4S, v4.4S
|
||||
zip2 v19.4S, v3.4S, v4.4S
|
||||
|
||||
st1 {v17.d}[0], [x0], x1
|
||||
st1 {v17.d}[1], [x0], x1
|
||||
st1 {v18.d}[0], [x0], x1
|
||||
st1 {v18.d}[1], [x0], x1
|
||||
st1 {v19.d}[0], [x0], x1
|
||||
st1 {v19.d}[1], [x0], x1
|
||||
st1 {v17.D}[0], [x0], x1
|
||||
st1 {v17.D}[1], [x0], x1
|
||||
st1 {v18.D}[0], [x0], x1
|
||||
st1 {v18.D}[1], [x0], x1
|
||||
st1 {v19.D}[0], [x0], x1
|
||||
st1 {v19.D}[1], [x0], x1
|
||||
|
||||
idct_end
|
||||
endfunc
|
||||
@@ -267,59 +267,59 @@ function ff_simple_idct_add_neon, export=1
|
||||
idct_row4_neon v28, v29, v30, v31, 2
|
||||
bl idct_col4_neon1
|
||||
|
||||
sshr v1.8h, v7.8h, #COL_SHIFT-16
|
||||
sshr v2.8h, v16.8h, #COL_SHIFT-16
|
||||
sshr v3.8h, v17.8h, #COL_SHIFT-16
|
||||
sshr v4.8h, v18.8h, #COL_SHIFT-16
|
||||
sshr v1.8H, v7.8H, #COL_SHIFT-16
|
||||
sshr v2.8H, v16.8H, #COL_SHIFT-16
|
||||
sshr v3.8H, v17.8H, #COL_SHIFT-16
|
||||
sshr v4.8H, v18.8H, #COL_SHIFT-16
|
||||
|
||||
bl idct_col4_neon2
|
||||
|
||||
sshr v7.8h, v7.8h, #COL_SHIFT-16
|
||||
sshr v16.8h, v16.8h, #COL_SHIFT-16
|
||||
sshr v17.8h, v17.8h, #COL_SHIFT-16
|
||||
sshr v18.8h, v18.8h, #COL_SHIFT-16
|
||||
sshr v7.8H, v7.8H, #COL_SHIFT-16
|
||||
sshr v16.8H, v16.8H, #COL_SHIFT-16
|
||||
sshr v17.8H, v17.8H, #COL_SHIFT-16
|
||||
sshr v18.8H, v18.8H, #COL_SHIFT-16
|
||||
|
||||
mov x9, x0
|
||||
ld1 {v19.d}[0], [x0], x1
|
||||
zip1 v23.2d, v1.2d, v7.2d
|
||||
zip2 v24.2d, v1.2d, v7.2d
|
||||
ld1 {v19.d}[1], [x0], x1
|
||||
zip1 v25.2d, v2.2d, v16.2d
|
||||
zip2 v26.2d, v2.2d, v16.2d
|
||||
ld1 {v20.d}[0], [x0], x1
|
||||
zip1 v27.2d, v3.2d, v17.2d
|
||||
zip2 v28.2d, v3.2d, v17.2d
|
||||
ld1 {v20.d}[1], [x0], x1
|
||||
zip1 v29.2d, v4.2d, v18.2d
|
||||
zip2 v30.2d, v4.2d, v18.2d
|
||||
ld1 {v21.d}[0], [x0], x1
|
||||
uaddw v23.8h, v23.8h, v19.8b
|
||||
uaddw2 v24.8h, v24.8h, v19.16b
|
||||
ld1 {v21.d}[1], [x0], x1
|
||||
sqxtun v23.8b, v23.8h
|
||||
sqxtun2 v23.16b, v24.8h
|
||||
ld1 {v22.d}[0], [x0], x1
|
||||
uaddw v24.8h, v25.8h, v20.8b
|
||||
uaddw2 v25.8h, v26.8h, v20.16b
|
||||
ld1 {v22.d}[1], [x0], x1
|
||||
sqxtun v24.8b, v24.8h
|
||||
sqxtun2 v24.16b, v25.8h
|
||||
st1 {v23.d}[0], [x9], x1
|
||||
uaddw v25.8h, v27.8h, v21.8b
|
||||
uaddw2 v26.8h, v28.8h, v21.16b
|
||||
st1 {v23.d}[1], [x9], x1
|
||||
sqxtun v25.8b, v25.8h
|
||||
sqxtun2 v25.16b, v26.8h
|
||||
st1 {v24.d}[0], [x9], x1
|
||||
uaddw v26.8h, v29.8h, v22.8b
|
||||
uaddw2 v27.8h, v30.8h, v22.16b
|
||||
st1 {v24.d}[1], [x9], x1
|
||||
sqxtun v26.8b, v26.8h
|
||||
sqxtun2 v26.16b, v27.8h
|
||||
st1 {v25.d}[0], [x9], x1
|
||||
st1 {v25.d}[1], [x9], x1
|
||||
st1 {v26.d}[0], [x9], x1
|
||||
st1 {v26.d}[1], [x9], x1
|
||||
ld1 {v19.D}[0], [x0], x1
|
||||
zip1 v23.2D, v1.2D, v7.2D
|
||||
zip2 v24.2D, v1.2D, v7.2D
|
||||
ld1 {v19.D}[1], [x0], x1
|
||||
zip1 v25.2D, v2.2D, v16.2D
|
||||
zip2 v26.2D, v2.2D, v16.2D
|
||||
ld1 {v20.D}[0], [x0], x1
|
||||
zip1 v27.2D, v3.2D, v17.2D
|
||||
zip2 v28.2D, v3.2D, v17.2D
|
||||
ld1 {v20.D}[1], [x0], x1
|
||||
zip1 v29.2D, v4.2D, v18.2D
|
||||
zip2 v30.2D, v4.2D, v18.2D
|
||||
ld1 {v21.D}[0], [x0], x1
|
||||
uaddw v23.8H, v23.8H, v19.8B
|
||||
uaddw2 v24.8H, v24.8H, v19.16B
|
||||
ld1 {v21.D}[1], [x0], x1
|
||||
sqxtun v23.8B, v23.8H
|
||||
sqxtun2 v23.16B, v24.8H
|
||||
ld1 {v22.D}[0], [x0], x1
|
||||
uaddw v24.8H, v25.8H, v20.8B
|
||||
uaddw2 v25.8H, v26.8H, v20.16B
|
||||
ld1 {v22.D}[1], [x0], x1
|
||||
sqxtun v24.8B, v24.8H
|
||||
sqxtun2 v24.16B, v25.8H
|
||||
st1 {v23.D}[0], [x9], x1
|
||||
uaddw v25.8H, v27.8H, v21.8B
|
||||
uaddw2 v26.8H, v28.8H, v21.16B
|
||||
st1 {v23.D}[1], [x9], x1
|
||||
sqxtun v25.8B, v25.8H
|
||||
sqxtun2 v25.16B, v26.8H
|
||||
st1 {v24.D}[0], [x9], x1
|
||||
uaddw v26.8H, v29.8H, v22.8B
|
||||
uaddw2 v27.8H, v30.8H, v22.16B
|
||||
st1 {v24.D}[1], [x9], x1
|
||||
sqxtun v26.8B, v26.8H
|
||||
sqxtun2 v26.16B, v27.8H
|
||||
st1 {v25.D}[0], [x9], x1
|
||||
st1 {v25.D}[1], [x9], x1
|
||||
st1 {v26.D}[0], [x9], x1
|
||||
st1 {v26.D}[1], [x9], x1
|
||||
|
||||
idct_end
|
||||
endfunc
|
||||
@@ -333,30 +333,30 @@ function ff_simple_idct_neon, export=1
|
||||
sub x2, x2, #128
|
||||
bl idct_col4_neon1
|
||||
|
||||
sshr v1.8h, v7.8h, #COL_SHIFT-16
|
||||
sshr v2.8h, v16.8h, #COL_SHIFT-16
|
||||
sshr v3.8h, v17.8h, #COL_SHIFT-16
|
||||
sshr v4.8h, v18.8h, #COL_SHIFT-16
|
||||
sshr v1.8H, v7.8H, #COL_SHIFT-16
|
||||
sshr v2.8H, v16.8H, #COL_SHIFT-16
|
||||
sshr v3.8H, v17.8H, #COL_SHIFT-16
|
||||
sshr v4.8H, v18.8H, #COL_SHIFT-16
|
||||
|
||||
bl idct_col4_neon2
|
||||
|
||||
sshr v7.8h, v7.8h, #COL_SHIFT-16
|
||||
sshr v16.8h, v16.8h, #COL_SHIFT-16
|
||||
sshr v17.8h, v17.8h, #COL_SHIFT-16
|
||||
sshr v18.8h, v18.8h, #COL_SHIFT-16
|
||||
sshr v7.8H, v7.8H, #COL_SHIFT-16
|
||||
sshr v16.8H, v16.8H, #COL_SHIFT-16
|
||||
sshr v17.8H, v17.8H, #COL_SHIFT-16
|
||||
sshr v18.8H, v18.8H, #COL_SHIFT-16
|
||||
|
||||
zip1 v23.2d, v1.2d, v7.2d
|
||||
zip2 v24.2d, v1.2d, v7.2d
|
||||
st1 {v23.2d,v24.2d}, [x2], #32
|
||||
zip1 v25.2d, v2.2d, v16.2d
|
||||
zip2 v26.2d, v2.2d, v16.2d
|
||||
st1 {v25.2d,v26.2d}, [x2], #32
|
||||
zip1 v27.2d, v3.2d, v17.2d
|
||||
zip2 v28.2d, v3.2d, v17.2d
|
||||
st1 {v27.2d,v28.2d}, [x2], #32
|
||||
zip1 v29.2d, v4.2d, v18.2d
|
||||
zip2 v30.2d, v4.2d, v18.2d
|
||||
st1 {v29.2d,v30.2d}, [x2], #32
|
||||
zip1 v23.2D, v1.2D, v7.2D
|
||||
zip2 v24.2D, v1.2D, v7.2D
|
||||
st1 {v23.2D,v24.2D}, [x2], #32
|
||||
zip1 v25.2D, v2.2D, v16.2D
|
||||
zip2 v26.2D, v2.2D, v16.2D
|
||||
st1 {v25.2D,v26.2D}, [x2], #32
|
||||
zip1 v27.2D, v3.2D, v17.2D
|
||||
zip2 v28.2D, v3.2D, v17.2D
|
||||
st1 {v27.2D,v28.2D}, [x2], #32
|
||||
zip1 v29.2D, v4.2D, v18.2D
|
||||
zip2 v30.2D, v4.2D, v18.2D
|
||||
st1 {v29.2D,v30.2D}, [x2], #32
|
||||
|
||||
idct_end
|
||||
endfunc
|
||||
|
||||
+151
-151
@@ -330,32 +330,32 @@ endfunc
|
||||
// v17: hev
|
||||
|
||||
// convert to signed value:
|
||||
eor v3.16b, v3.16b, v21.16b // PS0 = P0 ^ 0x80
|
||||
eor v4.16b, v4.16b, v21.16b // QS0 = Q0 ^ 0x80
|
||||
eor v3.16b, v3.16b, v21.16b // PS0 = P0 ^ 0x80
|
||||
eor v4.16b, v4.16b, v21.16b // QS0 = Q0 ^ 0x80
|
||||
|
||||
movi v20.8h, #3
|
||||
ssubl v18.8h, v4.8b, v3.8b // QS0 - PS0
|
||||
ssubl2 v19.8h, v4.16b, v3.16b // (widened to 16bit)
|
||||
eor v2.16b, v2.16b, v21.16b // PS1 = P1 ^ 0x80
|
||||
eor v5.16b, v5.16b, v21.16b // QS1 = Q1 ^ 0x80
|
||||
mul v18.8h, v18.8h, v20.8h // w = 3 * (QS0 - PS0)
|
||||
mul v19.8h, v19.8h, v20.8h
|
||||
movi v20.8h, #3
|
||||
ssubl v18.8h, v4.8b, v3.8b // QS0 - PS0
|
||||
ssubl2 v19.8h, v4.16b, v3.16b // (widened to 16bit)
|
||||
eor v2.16b, v2.16b, v21.16b // PS1 = P1 ^ 0x80
|
||||
eor v5.16b, v5.16b, v21.16b // QS1 = Q1 ^ 0x80
|
||||
mul v18.8h, v18.8h, v20.8h // w = 3 * (QS0 - PS0)
|
||||
mul v19.8h, v19.8h, v20.8h
|
||||
|
||||
sqsub v20.16b, v2.16b, v5.16b // clamp(PS1-QS1)
|
||||
movi v22.16b, #4
|
||||
movi v23.16b, #3
|
||||
sqsub v20.16b, v2.16b, v5.16b // clamp(PS1-QS1)
|
||||
movi v22.16b, #4
|
||||
movi v23.16b, #3
|
||||
.if \inner
|
||||
and v20.16b, v20.16b, v17.16b // if(hev) w += clamp(PS1-QS1)
|
||||
and v20.16b, v20.16b, v17.16b // if(hev) w += clamp(PS1-QS1)
|
||||
.endif
|
||||
saddw v18.8h, v18.8h, v20.8b // w += clamp(PS1-QS1)
|
||||
saddw2 v19.8h, v19.8h, v20.16b
|
||||
sqxtn v18.8b, v18.8h // narrow result back into v18
|
||||
sqxtn2 v18.16b, v19.8h
|
||||
saddw v18.8h, v18.8h, v20.8b // w += clamp(PS1-QS1)
|
||||
saddw2 v19.8h, v19.8h, v20.16b
|
||||
sqxtn v18.8b, v18.8h // narrow result back into v18
|
||||
sqxtn2 v18.16b, v19.8h
|
||||
.if !\inner && !\simple
|
||||
eor v1.16b, v1.16b, v21.16b // PS2 = P2 ^ 0x80
|
||||
eor v6.16b, v6.16b, v21.16b // QS2 = Q2 ^ 0x80
|
||||
eor v1.16b, v1.16b, v21.16b // PS2 = P2 ^ 0x80
|
||||
eor v6.16b, v6.16b, v21.16b // QS2 = Q2 ^ 0x80
|
||||
.endif
|
||||
and v18.16b, v18.16b, v16.16b // w &= normal_limit
|
||||
and v18.16b, v18.16b, v16.16b // w &= normal_limit
|
||||
|
||||
// registers used at this point..
|
||||
// v0 -> P3 (don't corrupt)
|
||||
@@ -375,44 +375,44 @@ endfunc
|
||||
// P0 = s2u(PS0 + c2);
|
||||
|
||||
.if \simple
|
||||
sqadd v19.16b, v18.16b, v22.16b // c1 = clamp((w&hev)+4)
|
||||
sqadd v20.16b, v18.16b, v23.16b // c2 = clamp((w&hev)+3)
|
||||
sshr v19.16b, v19.16b, #3 // c1 >>= 3
|
||||
sshr v20.16b, v20.16b, #3 // c2 >>= 3
|
||||
sqsub v4.16b, v4.16b, v19.16b // QS0 = clamp(QS0-c1)
|
||||
sqadd v3.16b, v3.16b, v20.16b // PS0 = clamp(PS0+c2)
|
||||
eor v4.16b, v4.16b, v21.16b // Q0 = QS0 ^ 0x80
|
||||
eor v3.16b, v3.16b, v21.16b // P0 = PS0 ^ 0x80
|
||||
eor v5.16b, v5.16b, v21.16b // Q1 = QS1 ^ 0x80
|
||||
eor v2.16b, v2.16b, v21.16b // P1 = PS1 ^ 0x80
|
||||
sqadd v19.16b, v18.16b, v22.16b // c1 = clamp((w&hev)+4)
|
||||
sqadd v20.16b, v18.16b, v23.16b // c2 = clamp((w&hev)+3)
|
||||
sshr v19.16b, v19.16b, #3 // c1 >>= 3
|
||||
sshr v20.16b, v20.16b, #3 // c2 >>= 3
|
||||
sqsub v4.16b, v4.16b, v19.16b // QS0 = clamp(QS0-c1)
|
||||
sqadd v3.16b, v3.16b, v20.16b // PS0 = clamp(PS0+c2)
|
||||
eor v4.16b, v4.16b, v21.16b // Q0 = QS0 ^ 0x80
|
||||
eor v3.16b, v3.16b, v21.16b // P0 = PS0 ^ 0x80
|
||||
eor v5.16b, v5.16b, v21.16b // Q1 = QS1 ^ 0x80
|
||||
eor v2.16b, v2.16b, v21.16b // P1 = PS1 ^ 0x80
|
||||
.elseif \inner
|
||||
// the !is4tap case of filter_common, only used for inner blocks
|
||||
// c3 = ((c1&~hev) + 1) >> 1;
|
||||
// Q1 = s2u(QS1 - c3);
|
||||
// P1 = s2u(PS1 + c3);
|
||||
sqadd v19.16b, v18.16b, v22.16b // c1 = clamp((w&hev)+4)
|
||||
sqadd v20.16b, v18.16b, v23.16b // c2 = clamp((w&hev)+3)
|
||||
sshr v19.16b, v19.16b, #3 // c1 >>= 3
|
||||
sshr v20.16b, v20.16b, #3 // c2 >>= 3
|
||||
sqsub v4.16b, v4.16b, v19.16b // QS0 = clamp(QS0-c1)
|
||||
sqadd v3.16b, v3.16b, v20.16b // PS0 = clamp(PS0+c2)
|
||||
bic v19.16b, v19.16b, v17.16b // c1 & ~hev
|
||||
eor v4.16b, v4.16b, v21.16b // Q0 = QS0 ^ 0x80
|
||||
srshr v19.16b, v19.16b, #1 // c3 >>= 1
|
||||
eor v3.16b, v3.16b, v21.16b // P0 = PS0 ^ 0x80
|
||||
sqsub v5.16b, v5.16b, v19.16b // QS1 = clamp(QS1-c3)
|
||||
sqadd v2.16b, v2.16b, v19.16b // PS1 = clamp(PS1+c3)
|
||||
eor v5.16b, v5.16b, v21.16b // Q1 = QS1 ^ 0x80
|
||||
eor v2.16b, v2.16b, v21.16b // P1 = PS1 ^ 0x80
|
||||
sqadd v19.16b, v18.16b, v22.16b // c1 = clamp((w&hev)+4)
|
||||
sqadd v20.16b, v18.16b, v23.16b // c2 = clamp((w&hev)+3)
|
||||
sshr v19.16b, v19.16b, #3 // c1 >>= 3
|
||||
sshr v20.16b, v20.16b, #3 // c2 >>= 3
|
||||
sqsub v4.16b, v4.16b, v19.16b // QS0 = clamp(QS0-c1)
|
||||
sqadd v3.16b, v3.16b, v20.16b // PS0 = clamp(PS0+c2)
|
||||
bic v19.16b, v19.16b, v17.16b // c1 & ~hev
|
||||
eor v4.16b, v4.16b, v21.16b // Q0 = QS0 ^ 0x80
|
||||
srshr v19.16b, v19.16b, #1 // c3 >>= 1
|
||||
eor v3.16b, v3.16b, v21.16b // P0 = PS0 ^ 0x80
|
||||
sqsub v5.16b, v5.16b, v19.16b // QS1 = clamp(QS1-c3)
|
||||
sqadd v2.16b, v2.16b, v19.16b // PS1 = clamp(PS1+c3)
|
||||
eor v5.16b, v5.16b, v21.16b // Q1 = QS1 ^ 0x80
|
||||
eor v2.16b, v2.16b, v21.16b // P1 = PS1 ^ 0x80
|
||||
.else
|
||||
and v20.16b, v18.16b, v17.16b // w & hev
|
||||
sqadd v19.16b, v20.16b, v22.16b // c1 = clamp((w&hev)+4)
|
||||
sqadd v20.16b, v20.16b, v23.16b // c2 = clamp((w&hev)+3)
|
||||
sshr v19.16b, v19.16b, #3 // c1 >>= 3
|
||||
sshr v20.16b, v20.16b, #3 // c2 >>= 3
|
||||
bic v18.16b, v18.16b, v17.16b // w &= ~hev
|
||||
sqsub v4.16b, v4.16b, v19.16b // QS0 = clamp(QS0-c1)
|
||||
sqadd v3.16b, v3.16b, v20.16b // PS0 = clamp(PS0+c2)
|
||||
and v20.16b, v18.16b, v17.16b // w & hev
|
||||
sqadd v19.16b, v20.16b, v22.16b // c1 = clamp((w&hev)+4)
|
||||
sqadd v20.16b, v20.16b, v23.16b // c2 = clamp((w&hev)+3)
|
||||
sshr v19.16b, v19.16b, #3 // c1 >>= 3
|
||||
sshr v20.16b, v20.16b, #3 // c2 >>= 3
|
||||
bic v18.16b, v18.16b, v17.16b // w &= ~hev
|
||||
sqsub v4.16b, v4.16b, v19.16b // QS0 = clamp(QS0-c1)
|
||||
sqadd v3.16b, v3.16b, v20.16b // PS0 = clamp(PS0+c2)
|
||||
|
||||
// filter_mbedge:
|
||||
// a = clamp((27*w + 63) >> 7);
|
||||
@@ -424,35 +424,35 @@ endfunc
|
||||
// a = clamp((9*w + 63) >> 7);
|
||||
// Q2 = s2u(QS2 - a);
|
||||
// P2 = s2u(PS2 + a);
|
||||
movi v17.8h, #63
|
||||
sshll v22.8h, v18.8b, #3
|
||||
sshll2 v23.8h, v18.16b, #3
|
||||
saddw v22.8h, v22.8h, v18.8b
|
||||
saddw2 v23.8h, v23.8h, v18.16b
|
||||
add v16.8h, v17.8h, v22.8h
|
||||
add v17.8h, v17.8h, v23.8h // 9*w + 63
|
||||
add v19.8h, v16.8h, v22.8h
|
||||
add v20.8h, v17.8h, v23.8h // 18*w + 63
|
||||
add v22.8h, v19.8h, v22.8h
|
||||
add v23.8h, v20.8h, v23.8h // 27*w + 63
|
||||
sqshrn v16.8b, v16.8h, #7
|
||||
sqshrn2 v16.16b, v17.8h, #7 // clamp(( 9*w + 63)>>7)
|
||||
sqshrn v19.8b, v19.8h, #7
|
||||
sqshrn2 v19.16b, v20.8h, #7 // clamp((18*w + 63)>>7)
|
||||
sqshrn v22.8b, v22.8h, #7
|
||||
sqshrn2 v22.16b, v23.8h, #7 // clamp((27*w + 63)>>7)
|
||||
sqadd v1.16b, v1.16b, v16.16b // PS2 = clamp(PS2+a)
|
||||
sqsub v6.16b, v6.16b, v16.16b // QS2 = clamp(QS2-a)
|
||||
sqadd v2.16b, v2.16b, v19.16b // PS1 = clamp(PS1+a)
|
||||
sqsub v5.16b, v5.16b, v19.16b // QS1 = clamp(QS1-a)
|
||||
sqadd v3.16b, v3.16b, v22.16b // PS0 = clamp(PS0+a)
|
||||
sqsub v4.16b, v4.16b, v22.16b // QS0 = clamp(QS0-a)
|
||||
eor v3.16b, v3.16b, v21.16b // P0 = PS0 ^ 0x80
|
||||
eor v4.16b, v4.16b, v21.16b // Q0 = QS0 ^ 0x80
|
||||
eor v2.16b, v2.16b, v21.16b // P1 = PS1 ^ 0x80
|
||||
eor v5.16b, v5.16b, v21.16b // Q1 = QS1 ^ 0x80
|
||||
eor v1.16b, v1.16b, v21.16b // P2 = PS2 ^ 0x80
|
||||
eor v6.16b, v6.16b, v21.16b // Q2 = QS2 ^ 0x80
|
||||
movi v17.8h, #63
|
||||
sshll v22.8h, v18.8b, #3
|
||||
sshll2 v23.8h, v18.16b, #3
|
||||
saddw v22.8h, v22.8h, v18.8b
|
||||
saddw2 v23.8h, v23.8h, v18.16b
|
||||
add v16.8h, v17.8h, v22.8h
|
||||
add v17.8h, v17.8h, v23.8h // 9*w + 63
|
||||
add v19.8h, v16.8h, v22.8h
|
||||
add v20.8h, v17.8h, v23.8h // 18*w + 63
|
||||
add v22.8h, v19.8h, v22.8h
|
||||
add v23.8h, v20.8h, v23.8h // 27*w + 63
|
||||
sqshrn v16.8b, v16.8h, #7
|
||||
sqshrn2 v16.16b, v17.8h, #7 // clamp(( 9*w + 63)>>7)
|
||||
sqshrn v19.8b, v19.8h, #7
|
||||
sqshrn2 v19.16b, v20.8h, #7 // clamp((18*w + 63)>>7)
|
||||
sqshrn v22.8b, v22.8h, #7
|
||||
sqshrn2 v22.16b, v23.8h, #7 // clamp((27*w + 63)>>7)
|
||||
sqadd v1.16b, v1.16b, v16.16b // PS2 = clamp(PS2+a)
|
||||
sqsub v6.16b, v6.16b, v16.16b // QS2 = clamp(QS2-a)
|
||||
sqadd v2.16b, v2.16b, v19.16b // PS1 = clamp(PS1+a)
|
||||
sqsub v5.16b, v5.16b, v19.16b // QS1 = clamp(QS1-a)
|
||||
sqadd v3.16b, v3.16b, v22.16b // PS0 = clamp(PS0+a)
|
||||
sqsub v4.16b, v4.16b, v22.16b // QS0 = clamp(QS0-a)
|
||||
eor v3.16b, v3.16b, v21.16b // P0 = PS0 ^ 0x80
|
||||
eor v4.16b, v4.16b, v21.16b // Q0 = QS0 ^ 0x80
|
||||
eor v2.16b, v2.16b, v21.16b // P1 = PS1 ^ 0x80
|
||||
eor v5.16b, v5.16b, v21.16b // Q1 = QS1 ^ 0x80
|
||||
eor v1.16b, v1.16b, v21.16b // P2 = PS2 ^ 0x80
|
||||
eor v6.16b, v6.16b, v21.16b // Q2 = QS2 ^ 0x80
|
||||
.endif
|
||||
.endm
|
||||
|
||||
@@ -507,48 +507,48 @@ function ff_vp8_v_loop_filter8uv\name\()_neon, export=1
|
||||
sub x0, x0, x2, lsl #2
|
||||
sub x1, x1, x2, lsl #2
|
||||
// Load pixels:
|
||||
ld1 {v0.d}[0], [x0], x2 // P3
|
||||
ld1 {v0.d}[1], [x1], x2 // P3
|
||||
ld1 {v1.d}[0], [x0], x2 // P2
|
||||
ld1 {v1.d}[1], [x1], x2 // P2
|
||||
ld1 {v2.d}[0], [x0], x2 // P1
|
||||
ld1 {v2.d}[1], [x1], x2 // P1
|
||||
ld1 {v3.d}[0], [x0], x2 // P0
|
||||
ld1 {v3.d}[1], [x1], x2 // P0
|
||||
ld1 {v4.d}[0], [x0], x2 // Q0
|
||||
ld1 {v4.d}[1], [x1], x2 // Q0
|
||||
ld1 {v5.d}[0], [x0], x2 // Q1
|
||||
ld1 {v5.d}[1], [x1], x2 // Q1
|
||||
ld1 {v6.d}[0], [x0], x2 // Q2
|
||||
ld1 {v6.d}[1], [x1], x2 // Q2
|
||||
ld1 {v7.d}[0], [x0] // Q3
|
||||
ld1 {v7.d}[1], [x1] // Q3
|
||||
ld1 {v0.d}[0], [x0], x2 // P3
|
||||
ld1 {v0.d}[1], [x1], x2 // P3
|
||||
ld1 {v1.d}[0], [x0], x2 // P2
|
||||
ld1 {v1.d}[1], [x1], x2 // P2
|
||||
ld1 {v2.d}[0], [x0], x2 // P1
|
||||
ld1 {v2.d}[1], [x1], x2 // P1
|
||||
ld1 {v3.d}[0], [x0], x2 // P0
|
||||
ld1 {v3.d}[1], [x1], x2 // P0
|
||||
ld1 {v4.d}[0], [x0], x2 // Q0
|
||||
ld1 {v4.d}[1], [x1], x2 // Q0
|
||||
ld1 {v5.d}[0], [x0], x2 // Q1
|
||||
ld1 {v5.d}[1], [x1], x2 // Q1
|
||||
ld1 {v6.d}[0], [x0], x2 // Q2
|
||||
ld1 {v6.d}[1], [x1], x2 // Q2
|
||||
ld1 {v7.d}[0], [x0] // Q3
|
||||
ld1 {v7.d}[1], [x1] // Q3
|
||||
|
||||
dup v22.16b, w3 // flim_E
|
||||
dup v23.16b, w4 // flim_I
|
||||
dup v22.16b, w3 // flim_E
|
||||
dup v23.16b, w4 // flim_I
|
||||
|
||||
vp8_loop_filter inner=\inner, hev_thresh=w5
|
||||
|
||||
// back up to P2: u,v -= stride * 6
|
||||
sub x0, x0, x2, lsl #2
|
||||
sub x1, x1, x2, lsl #2
|
||||
sub x0, x0, x2, lsl #1
|
||||
sub x1, x1, x2, lsl #1
|
||||
sub x0, x0, x2, lsl #2
|
||||
sub x1, x1, x2, lsl #2
|
||||
sub x0, x0, x2, lsl #1
|
||||
sub x1, x1, x2, lsl #1
|
||||
|
||||
// Store pixels:
|
||||
|
||||
st1 {v1.d}[0], [x0], x2 // P2
|
||||
st1 {v1.d}[1], [x1], x2 // P2
|
||||
st1 {v2.d}[0], [x0], x2 // P1
|
||||
st1 {v2.d}[1], [x1], x2 // P1
|
||||
st1 {v3.d}[0], [x0], x2 // P0
|
||||
st1 {v3.d}[1], [x1], x2 // P0
|
||||
st1 {v4.d}[0], [x0], x2 // Q0
|
||||
st1 {v4.d}[1], [x1], x2 // Q0
|
||||
st1 {v5.d}[0], [x0], x2 // Q1
|
||||
st1 {v5.d}[1], [x1], x2 // Q1
|
||||
st1 {v6.d}[0], [x0] // Q2
|
||||
st1 {v6.d}[1], [x1] // Q2
|
||||
st1 {v1.d}[0], [x0], x2 // P2
|
||||
st1 {v1.d}[1], [x1], x2 // P2
|
||||
st1 {v2.d}[0], [x0], x2 // P1
|
||||
st1 {v2.d}[1], [x1], x2 // P1
|
||||
st1 {v3.d}[0], [x0], x2 // P0
|
||||
st1 {v3.d}[1], [x1], x2 // P0
|
||||
st1 {v4.d}[0], [x0], x2 // Q0
|
||||
st1 {v4.d}[1], [x1], x2 // Q0
|
||||
st1 {v5.d}[0], [x0], x2 // Q1
|
||||
st1 {v5.d}[1], [x1], x2 // Q1
|
||||
st1 {v6.d}[0], [x0] // Q2
|
||||
st1 {v6.d}[1], [x1] // Q2
|
||||
|
||||
ret
|
||||
endfunc
|
||||
@@ -579,7 +579,7 @@ function ff_vp8_h_loop_filter16\name\()_neon, export=1
|
||||
ld1 {v6.d}[1], [x0], x1
|
||||
ld1 {v7.d}[1], [x0], x1
|
||||
|
||||
transpose_8x16B v0, v1, v2, v3, v4, v5, v6, v7, v30, v31
|
||||
transpose_8x16B v0, v1, v2, v3, v4, v5, v6, v7, v30, v31
|
||||
|
||||
dup v22.16b, w2 // flim_E
|
||||
.if !\simple
|
||||
@@ -590,7 +590,7 @@ function ff_vp8_h_loop_filter16\name\()_neon, export=1
|
||||
|
||||
sub x0, x0, x1, lsl #4 // backup 16 rows
|
||||
|
||||
transpose_8x16B v0, v1, v2, v3, v4, v5, v6, v7, v30, v31
|
||||
transpose_8x16B v0, v1, v2, v3, v4, v5, v6, v7, v30, v31
|
||||
|
||||
// Store pixels:
|
||||
st1 {v0.d}[0], [x0], x1
|
||||
@@ -624,24 +624,24 @@ function ff_vp8_h_loop_filter8uv\name\()_neon, export=1
|
||||
sub x1, x1, #4
|
||||
|
||||
// Load pixels:
|
||||
ld1 {v0.d}[0], [x0], x2 // load u
|
||||
ld1 {v0.d}[1], [x1], x2 // load v
|
||||
ld1 {v1.d}[0], [x0], x2
|
||||
ld1 {v1.d}[1], [x1], x2
|
||||
ld1 {v2.d}[0], [x0], x2
|
||||
ld1 {v2.d}[1], [x1], x2
|
||||
ld1 {v3.d}[0], [x0], x2
|
||||
ld1 {v3.d}[1], [x1], x2
|
||||
ld1 {v4.d}[0], [x0], x2
|
||||
ld1 {v4.d}[1], [x1], x2
|
||||
ld1 {v5.d}[0], [x0], x2
|
||||
ld1 {v5.d}[1], [x1], x2
|
||||
ld1 {v6.d}[0], [x0], x2
|
||||
ld1 {v6.d}[1], [x1], x2
|
||||
ld1 {v7.d}[0], [x0], x2
|
||||
ld1 {v7.d}[1], [x1], x2
|
||||
ld1 {v0.d}[0], [x0], x2 // load u
|
||||
ld1 {v0.d}[1], [x1], x2 // load v
|
||||
ld1 {v1.d}[0], [x0], x2
|
||||
ld1 {v1.d}[1], [x1], x2
|
||||
ld1 {v2.d}[0], [x0], x2
|
||||
ld1 {v2.d}[1], [x1], x2
|
||||
ld1 {v3.d}[0], [x0], x2
|
||||
ld1 {v3.d}[1], [x1], x2
|
||||
ld1 {v4.d}[0], [x0], x2
|
||||
ld1 {v4.d}[1], [x1], x2
|
||||
ld1 {v5.d}[0], [x0], x2
|
||||
ld1 {v5.d}[1], [x1], x2
|
||||
ld1 {v6.d}[0], [x0], x2
|
||||
ld1 {v6.d}[1], [x1], x2
|
||||
ld1 {v7.d}[0], [x0], x2
|
||||
ld1 {v7.d}[1], [x1], x2
|
||||
|
||||
transpose_8x16B v0, v1, v2, v3, v4, v5, v6, v7, v30, v31
|
||||
transpose_8x16B v0, v1, v2, v3, v4, v5, v6, v7, v30, v31
|
||||
|
||||
dup v22.16b, w3 // flim_E
|
||||
dup v23.16b, w4 // flim_I
|
||||
@@ -651,25 +651,25 @@ function ff_vp8_h_loop_filter8uv\name\()_neon, export=1
|
||||
sub x0, x0, x2, lsl #3 // backup u 8 rows
|
||||
sub x1, x1, x2, lsl #3 // backup v 8 rows
|
||||
|
||||
transpose_8x16B v0, v1, v2, v3, v4, v5, v6, v7, v30, v31
|
||||
transpose_8x16B v0, v1, v2, v3, v4, v5, v6, v7, v30, v31
|
||||
|
||||
// Store pixels:
|
||||
st1 {v0.d}[0], [x0], x2 // load u
|
||||
st1 {v0.d}[1], [x1], x2 // load v
|
||||
st1 {v1.d}[0], [x0], x2
|
||||
st1 {v1.d}[1], [x1], x2
|
||||
st1 {v2.d}[0], [x0], x2
|
||||
st1 {v2.d}[1], [x1], x2
|
||||
st1 {v3.d}[0], [x0], x2
|
||||
st1 {v3.d}[1], [x1], x2
|
||||
st1 {v4.d}[0], [x0], x2
|
||||
st1 {v4.d}[1], [x1], x2
|
||||
st1 {v5.d}[0], [x0], x2
|
||||
st1 {v5.d}[1], [x1], x2
|
||||
st1 {v6.d}[0], [x0], x2
|
||||
st1 {v6.d}[1], [x1], x2
|
||||
st1 {v7.d}[0], [x0]
|
||||
st1 {v7.d}[1], [x1]
|
||||
st1 {v0.d}[0], [x0], x2 // load u
|
||||
st1 {v0.d}[1], [x1], x2 // load v
|
||||
st1 {v1.d}[0], [x0], x2
|
||||
st1 {v1.d}[1], [x1], x2
|
||||
st1 {v2.d}[0], [x0], x2
|
||||
st1 {v2.d}[1], [x1], x2
|
||||
st1 {v3.d}[0], [x0], x2
|
||||
st1 {v3.d}[1], [x1], x2
|
||||
st1 {v4.d}[0], [x0], x2
|
||||
st1 {v4.d}[1], [x1], x2
|
||||
st1 {v5.d}[0], [x0], x2
|
||||
st1 {v5.d}[1], [x1], x2
|
||||
st1 {v6.d}[0], [x0], x2
|
||||
st1 {v6.d}[1], [x1], x2
|
||||
st1 {v7.d}[0], [x0]
|
||||
st1 {v7.d}[1], [x1]
|
||||
|
||||
ret
|
||||
|
||||
|
||||
@@ -230,9 +230,6 @@ function \type\()_8tap_\size\()h_\idx1\idx2
|
||||
// reduced dst stride
|
||||
.if \size >= 16
|
||||
sub x1, x1, x5
|
||||
.elseif \size == 4
|
||||
add x12, x2, #8
|
||||
add x13, x7, #8
|
||||
.endif
|
||||
// size >= 16 loads two qwords and increments x2,
|
||||
// for size 4/8 it's enough with one qword and no
|
||||
@@ -251,14 +248,9 @@ function \type\()_8tap_\size\()h_\idx1\idx2
|
||||
.if \size >= 16
|
||||
ld1 {v4.8b, v5.8b, v6.8b}, [x2], #24
|
||||
ld1 {v16.8b, v17.8b, v18.8b}, [x7], #24
|
||||
.elseif \size == 8
|
||||
.else
|
||||
ld1 {v4.8b, v5.8b}, [x2]
|
||||
ld1 {v16.8b, v17.8b}, [x7]
|
||||
.else // \size == 4
|
||||
ld1 {v4.8b}, [x2]
|
||||
ld1 {v16.8b}, [x7]
|
||||
ld1 {v5.s}[0], [x12], x3
|
||||
ld1 {v17.s}[0], [x13], x3
|
||||
.endif
|
||||
uxtl v4.8h, v4.8b
|
||||
uxtl v5.8h, v5.8b
|
||||
|
||||
+17
-17
@@ -104,26 +104,26 @@ static int aasc_decode_frame(AVCodecContext *avctx,
|
||||
ff_msrle_decode(avctx, s->frame, 8, &s->gb);
|
||||
break;
|
||||
case MKTAG('A', 'A', 'S', 'C'):
|
||||
switch (compr) {
|
||||
case 0:
|
||||
stride = (avctx->width * psize + psize) & ~psize;
|
||||
if (buf_size < stride * avctx->height)
|
||||
return AVERROR_INVALIDDATA;
|
||||
for (i = avctx->height - 1; i >= 0; i--) {
|
||||
memcpy(s->frame->data[0] + i * s->frame->linesize[0], buf, avctx->width * psize);
|
||||
buf += stride;
|
||||
buf_size -= stride;
|
||||
}
|
||||
break;
|
||||
case 1:
|
||||
bytestream2_init(&s->gb, buf, buf_size);
|
||||
ff_msrle_decode(avctx, s->frame, 8, &s->gb);
|
||||
break;
|
||||
default:
|
||||
av_log(avctx, AV_LOG_ERROR, "Unknown compression type %d\n", compr);
|
||||
switch (compr) {
|
||||
case 0:
|
||||
stride = (avctx->width * psize + psize) & ~psize;
|
||||
if (buf_size < stride * avctx->height)
|
||||
return AVERROR_INVALIDDATA;
|
||||
for (i = avctx->height - 1; i >= 0; i--) {
|
||||
memcpy(s->frame->data[0] + i * s->frame->linesize[0], buf, avctx->width * psize);
|
||||
buf += stride;
|
||||
buf_size -= stride;
|
||||
}
|
||||
break;
|
||||
case 1:
|
||||
bytestream2_init(&s->gb, buf, buf_size);
|
||||
ff_msrle_decode(avctx, s->frame, 8, &s->gb);
|
||||
break;
|
||||
default:
|
||||
av_log(avctx, AV_LOG_ERROR, "Unknown compression type %d\n", compr);
|
||||
return AVERROR_INVALIDDATA;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
av_log(avctx, AV_LOG_ERROR, "Unknown FourCC: %X\n", avctx->codec_tag);
|
||||
return -1;
|
||||
|
||||
@@ -75,7 +75,6 @@
|
||||
#define AC3_DYNAMIC_RANGE1 0
|
||||
|
||||
typedef int INTFLOAT;
|
||||
typedef unsigned int UINTFLOAT;
|
||||
typedef int16_t SHORTFLOAT;
|
||||
|
||||
#else /* USE_FIXED */
|
||||
@@ -95,7 +94,6 @@ typedef int16_t SHORTFLOAT;
|
||||
#define AC3_DYNAMIC_RANGE1 1.0f
|
||||
|
||||
typedef float INTFLOAT;
|
||||
typedef float UINTFLOAT;
|
||||
typedef float SHORTFLOAT;
|
||||
|
||||
#endif /* USE_FIXED */
|
||||
|
||||
@@ -179,9 +179,7 @@ int av_ac3_parse_header(const uint8_t *buf, size_t size,
|
||||
AC3HeaderInfo hdr;
|
||||
int err;
|
||||
|
||||
err = init_get_bits8(&gb, buf, size);
|
||||
if (err < 0)
|
||||
return AVERROR_INVALIDDATA;
|
||||
init_get_bits8(&gb, buf, size);
|
||||
err = ff_ac3_parse_header(&gb, &hdr);
|
||||
if (err < 0)
|
||||
return AVERROR_INVALIDDATA;
|
||||
|
||||
@@ -43,7 +43,7 @@ int AC3_NAME(allocate_sample_buffers)(AC3EncodeContext *s)
|
||||
|
||||
FF_ALLOC_OR_GOTO(s->avctx, s->windowed_samples, AC3_WINDOW_SIZE *
|
||||
sizeof(*s->windowed_samples), alloc_fail);
|
||||
FF_ALLOCZ_ARRAY_OR_GOTO(s->avctx, s->planar_samples, s->channels, sizeof(*s->planar_samples),
|
||||
FF_ALLOC_ARRAY_OR_GOTO(s->avctx, s->planar_samples, s->channels, sizeof(*s->planar_samples),
|
||||
alloc_fail);
|
||||
for (ch = 0; ch < s->channels; ch++) {
|
||||
FF_ALLOCZ_OR_GOTO(s->avctx, s->planar_samples[ch],
|
||||
|
||||
+2
-2
@@ -423,8 +423,8 @@ static int decode_inter_plane(AGMContext *s, GetBitContext *gb, int size,
|
||||
int map = s->map[x];
|
||||
|
||||
if (orig_mv_x >= -32) {
|
||||
if (y * 8 + mv_y < 0 || y * 8 + mv_y + 8 > h ||
|
||||
x * 8 + mv_x < 0 || x * 8 + mv_x + 8 > w)
|
||||
if (y * 8 + mv_y < 0 || y * 8 + mv_y + 8 >= h ||
|
||||
x * 8 + mv_x < 0 || x * 8 + mv_x + 8 >= w)
|
||||
return AVERROR_INVALIDDATA;
|
||||
|
||||
copy_block8(frame->data[plane] + (s->blocks_h - 1 - y) * 8 * frame->linesize[plane] + x * 8,
|
||||
|
||||
+2
-1
@@ -470,7 +470,8 @@ static av_cold int aic_decode_init(AVCodecContext *avctx)
|
||||
}
|
||||
}
|
||||
|
||||
ctx->slice_data = av_calloc(ctx->slice_width, AIC_BAND_COEFFS * sizeof(*ctx->slice_data));
|
||||
ctx->slice_data = av_malloc_array(ctx->slice_width, AIC_BAND_COEFFS
|
||||
* sizeof(*ctx->slice_data));
|
||||
if (!ctx->slice_data) {
|
||||
av_log(avctx, AV_LOG_ERROR, "Error allocating slice buffer\n");
|
||||
|
||||
|
||||
@@ -302,9 +302,6 @@ static int decode_element(AVCodecContext *avctx, AVFrame *frame, int ch_index,
|
||||
decorr_shift = get_bits(&alac->gb, 8);
|
||||
decorr_left_weight = get_bits(&alac->gb, 8);
|
||||
|
||||
if (channels == 2 && decorr_left_weight && decorr_shift > 31)
|
||||
return AVERROR_INVALIDDATA;
|
||||
|
||||
for (ch = 0; ch < channels; ch++) {
|
||||
prediction_type[ch] = get_bits(&alac->gb, 4);
|
||||
lpc_quant[ch] = get_bits(&alac->gb, 4);
|
||||
|
||||
@@ -29,12 +29,12 @@ static void decorrelate_stereo(int32_t *buffer[2], int nb_samples,
|
||||
int i;
|
||||
|
||||
for (i = 0; i < nb_samples; i++) {
|
||||
uint32_t a, b;
|
||||
int32_t a, b;
|
||||
|
||||
a = buffer[0][i];
|
||||
b = buffer[1][i];
|
||||
|
||||
a -= (int)(b * decorr_left_weight) >> decorr_shift;
|
||||
a -= (b * decorr_left_weight) >> decorr_shift;
|
||||
b += a;
|
||||
|
||||
buffer[0][i] = b;
|
||||
|
||||
@@ -679,7 +679,9 @@ extern AVCodec ff_xsub_decoder;
|
||||
/* external libraries */
|
||||
extern AVCodec ff_aac_at_encoder;
|
||||
extern AVCodec ff_aac_at_decoder;
|
||||
extern AVCodec ff_aac_mf_encoder;
|
||||
extern AVCodec ff_ac3_at_decoder;
|
||||
extern AVCodec ff_ac3_mf_encoder;
|
||||
extern AVCodec ff_adpcm_ima_qt_at_decoder;
|
||||
extern AVCodec ff_alac_at_encoder;
|
||||
extern AVCodec ff_alac_at_decoder;
|
||||
@@ -691,6 +693,7 @@ extern AVCodec ff_ilbc_at_decoder;
|
||||
extern AVCodec ff_mp1_at_decoder;
|
||||
extern AVCodec ff_mp2_at_decoder;
|
||||
extern AVCodec ff_mp3_at_decoder;
|
||||
extern AVCodec ff_mp3_mf_encoder;
|
||||
extern AVCodec ff_pcm_alaw_at_encoder;
|
||||
extern AVCodec ff_pcm_alaw_at_decoder;
|
||||
extern AVCodec ff_pcm_mulaw_at_encoder;
|
||||
@@ -754,8 +757,6 @@ extern AVCodec ff_idf_decoder;
|
||||
|
||||
/* external libraries, that shouldn't be used by default if one of the
|
||||
* above is available */
|
||||
extern AVCodec ff_aac_mf_encoder;
|
||||
extern AVCodec ff_ac3_mf_encoder;
|
||||
extern AVCodec ff_h263_v4l2m2m_encoder;
|
||||
extern AVCodec ff_libaom_av1_decoder;
|
||||
extern AVCodec ff_libopenh264_encoder;
|
||||
@@ -788,7 +789,6 @@ extern AVCodec ff_mjpeg_cuvid_decoder;
|
||||
extern AVCodec ff_mjpeg_qsv_encoder;
|
||||
extern AVCodec ff_mjpeg_qsv_decoder;
|
||||
extern AVCodec ff_mjpeg_vaapi_encoder;
|
||||
extern AVCodec ff_mp3_mf_encoder;
|
||||
extern AVCodec ff_mpeg1_cuvid_decoder;
|
||||
extern AVCodec ff_mpeg2_cuvid_decoder;
|
||||
extern AVCodec ff_mpeg2_qsv_encoder;
|
||||
|
||||
+6
-17
@@ -762,7 +762,7 @@ static int read_var_block_data(ALSDecContext *ctx, ALSBlockData *bd)
|
||||
}
|
||||
|
||||
for (k = 2; k < opt_order; k++)
|
||||
quant_cof[k] = (quant_cof[k] * (1U << 14)) + (add_base << 13);
|
||||
quant_cof[k] = (quant_cof[k] * (1 << 14)) + (add_base << 13);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1016,10 +1016,6 @@ static int read_block(ALSDecContext *ctx, ALSBlockData *bd)
|
||||
ALSSpecificConfig *sconf = &ctx->sconf;
|
||||
|
||||
*bd->shift_lsbs = 0;
|
||||
|
||||
if (get_bits_left(gb) < 7)
|
||||
return AVERROR_INVALIDDATA;
|
||||
|
||||
// read block type flag and read the samples accordingly
|
||||
if (get_bits1(gb)) {
|
||||
ret = read_var_block_data(ctx, bd);
|
||||
@@ -1632,7 +1628,7 @@ static int read_frame_data(ALSDecContext *ctx, unsigned int ra_frame)
|
||||
AVCodecContext *avctx = ctx->avctx;
|
||||
GetBitContext *gb = &ctx->gb;
|
||||
unsigned int div_blocks[32]; ///< block sizes.
|
||||
int c;
|
||||
unsigned int c;
|
||||
unsigned int js_blocks[2];
|
||||
uint32_t bs_info = 0;
|
||||
int ret;
|
||||
@@ -1810,17 +1806,14 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame_ptr,
|
||||
else
|
||||
ctx->cur_frame_length = sconf->frame_length;
|
||||
|
||||
ctx->highest_decoded_channel = -1;
|
||||
ctx->highest_decoded_channel = 0;
|
||||
// decode the frame data
|
||||
if ((invalid_frame = read_frame_data(ctx, ra_frame)) < 0)
|
||||
av_log(ctx->avctx, AV_LOG_WARNING,
|
||||
"Reading frame data failed. Skipping RA unit.\n");
|
||||
|
||||
if (ctx->highest_decoded_channel == -1) {
|
||||
av_log(ctx->avctx, AV_LOG_WARNING,
|
||||
"No channel data decoded.\n");
|
||||
if (ctx->highest_decoded_channel == 0)
|
||||
return AVERROR_INVALIDDATA;
|
||||
}
|
||||
|
||||
ctx->frame_id++;
|
||||
|
||||
@@ -2116,8 +2109,8 @@ static av_cold int decode_init(AVCodecContext *avctx)
|
||||
ctx->nbits = av_malloc_array(ctx->cur_frame_length, sizeof(*ctx->nbits));
|
||||
ctx->mlz = av_mallocz(sizeof(*ctx->mlz));
|
||||
|
||||
if (!ctx->larray || !ctx->nbits || !ctx->mlz || !ctx->acf || !ctx->shift_value
|
||||
|| !ctx->last_shift_value || !ctx->last_acf_mantissa || !ctx->raw_mantissa) {
|
||||
if (!ctx->mlz || !ctx->acf || !ctx->shift_value || !ctx->last_shift_value
|
||||
|| !ctx->last_acf_mantissa || !ctx->raw_mantissa) {
|
||||
av_log(avctx, AV_LOG_ERROR, "Allocating buffer memory failed.\n");
|
||||
ret = AVERROR(ENOMEM);
|
||||
goto fail;
|
||||
@@ -2128,10 +2121,6 @@ static av_cold int decode_init(AVCodecContext *avctx)
|
||||
|
||||
for (c = 0; c < avctx->channels; ++c) {
|
||||
ctx->raw_mantissa[c] = av_mallocz_array(ctx->cur_frame_length, sizeof(**ctx->raw_mantissa));
|
||||
if (!ctx->raw_mantissa[c]) {
|
||||
av_log(avctx, AV_LOG_ERROR, "Allocating buffer memory failed.\n");
|
||||
return AVERROR(ENOMEM);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
+1
-8
@@ -431,8 +431,7 @@ static int decode_frame(AVCodecContext *avctx,
|
||||
s->args[s->nb_args] = FFMAX(s->args[s->nb_args], 0) * 10 + buf[0] - '0';
|
||||
break;
|
||||
case ';':
|
||||
if (s->nb_args < MAX_NB_ARGS)
|
||||
s->nb_args++;
|
||||
s->nb_args++;
|
||||
if (s->nb_args < MAX_NB_ARGS)
|
||||
s->args[s->nb_args] = 0;
|
||||
break;
|
||||
@@ -475,11 +474,6 @@ static av_cold int decode_close(AVCodecContext *avctx)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static const AVCodecDefault ansi_defaults[] = {
|
||||
{ "max_pixels", "640*480" },
|
||||
{ NULL },
|
||||
};
|
||||
|
||||
AVCodec ff_ansi_decoder = {
|
||||
.name = "ansi",
|
||||
.long_name = NULL_IF_CONFIG_SMALL("ASCII/ANSI art"),
|
||||
@@ -491,5 +485,4 @@ AVCodec ff_ansi_decoder = {
|
||||
.decode = decode_frame,
|
||||
.capabilities = AV_CODEC_CAP_DR1,
|
||||
.caps_internal = FF_CODEC_CAP_INIT_THREADSAFE,
|
||||
.defaults = ansi_defaults,
|
||||
};
|
||||
|
||||
+21
-32
@@ -102,7 +102,7 @@ typedef struct APEFilter {
|
||||
int16_t *historybuffer; ///< filter memory
|
||||
int16_t *delay; ///< filtered values
|
||||
|
||||
uint32_t avg;
|
||||
int avg;
|
||||
} APEFilter;
|
||||
|
||||
typedef struct APERice {
|
||||
@@ -852,7 +852,7 @@ static av_always_inline int filter_fast_3320(APEPredictor *p,
|
||||
}
|
||||
|
||||
predictionA = p->buf[delayA] * 2U - p->buf[delayA - 1];
|
||||
p->lastA[filter] = decoded + (unsigned)((int32_t)(predictionA * p->coeffsA[filter][0]) >> 9);
|
||||
p->lastA[filter] = decoded + ((int32_t)(predictionA * p->coeffsA[filter][0]) >> 9);
|
||||
|
||||
if ((decoded ^ predictionA) > 0)
|
||||
p->coeffsA[filter][0]++;
|
||||
@@ -882,8 +882,8 @@ static av_always_inline int filter_3800(APEPredictor *p,
|
||||
return predictionA;
|
||||
}
|
||||
d2 = p->buf[delayA];
|
||||
d1 = (p->buf[delayA] - (unsigned)p->buf[delayA - 1]) * 2;
|
||||
d0 = p->buf[delayA] + ((p->buf[delayA - 2] - (unsigned)p->buf[delayA - 1]) * 8);
|
||||
d1 = (p->buf[delayA] - p->buf[delayA - 1]) * 2U;
|
||||
d0 = p->buf[delayA] + ((p->buf[delayA - 2] - p->buf[delayA - 1]) * 8U);
|
||||
d3 = p->buf[delayB] * 2U - p->buf[delayB - 1];
|
||||
d4 = p->buf[delayB];
|
||||
|
||||
@@ -903,7 +903,7 @@ static av_always_inline int filter_3800(APEPredictor *p,
|
||||
p->coeffsB[filter][0] += (((d3 >> 29) & 4) - 2) * sign;
|
||||
p->coeffsB[filter][1] -= (((d4 >> 30) & 2) - 1) * sign;
|
||||
|
||||
p->filterB[filter] = p->lastA[filter] + (unsigned)(predictionB >> shift);
|
||||
p->filterB[filter] = p->lastA[filter] + (predictionB >> shift);
|
||||
p->filterA[filter] = p->filterB[filter] + (unsigned)((int)(p->filterA[filter] * 31U) >> 5);
|
||||
|
||||
return p->filterA[filter];
|
||||
@@ -928,7 +928,7 @@ static void long_filter_high_3800(int32_t *buffer, int order, int shift, int len
|
||||
dotprod += delay[j] * (unsigned)coeffs[j];
|
||||
coeffs[j] += ((delay[j] >> 31) | 1) * sign;
|
||||
}
|
||||
buffer[i] -= (unsigned)(dotprod >> shift);
|
||||
buffer[i] -= dotprod >> shift;
|
||||
for (j = 0; j < order - 1; j++)
|
||||
delay[j] = delay[j + 1];
|
||||
delay[order - 1] = buffer[i];
|
||||
@@ -952,7 +952,7 @@ static void long_filter_ehigh_3830(int32_t *buffer, int length)
|
||||
for (j = 7; j > 0; j--)
|
||||
delay[j] = delay[j - 1];
|
||||
delay[0] = buffer[i];
|
||||
buffer[i] -= (unsigned)(dotprod >> 9);
|
||||
buffer[i] -= dotprod >> 9;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1061,13 +1061,13 @@ static av_always_inline int predictor_update_3930(APEPredictor *p,
|
||||
const int delayA)
|
||||
{
|
||||
int32_t predictionA, sign;
|
||||
uint32_t d0, d1, d2, d3;
|
||||
int32_t d0, d1, d2, d3;
|
||||
|
||||
p->buf[delayA] = p->lastA[filter];
|
||||
d0 = p->buf[delayA ];
|
||||
d1 = p->buf[delayA ] - (unsigned)p->buf[delayA - 1];
|
||||
d2 = p->buf[delayA - 1] - (unsigned)p->buf[delayA - 2];
|
||||
d3 = p->buf[delayA - 2] - (unsigned)p->buf[delayA - 3];
|
||||
d1 = p->buf[delayA ] - p->buf[delayA - 1];
|
||||
d2 = p->buf[delayA - 1] - p->buf[delayA - 2];
|
||||
d3 = p->buf[delayA - 2] - p->buf[delayA - 3];
|
||||
|
||||
predictionA = d0 * p->coeffsA[filter][0] +
|
||||
d1 * p->coeffsA[filter][1] +
|
||||
@@ -1078,10 +1078,10 @@ static av_always_inline int predictor_update_3930(APEPredictor *p,
|
||||
p->filterA[filter] = p->lastA[filter] + ((int)(p->filterA[filter] * 31U) >> 5);
|
||||
|
||||
sign = APESIGN(decoded);
|
||||
p->coeffsA[filter][0] += (((int32_t)d0 < 0) * 2 - 1) * sign;
|
||||
p->coeffsA[filter][1] += (((int32_t)d1 < 0) * 2 - 1) * sign;
|
||||
p->coeffsA[filter][2] += (((int32_t)d2 < 0) * 2 - 1) * sign;
|
||||
p->coeffsA[filter][3] += (((int32_t)d3 < 0) * 2 - 1) * sign;
|
||||
p->coeffsA[filter][0] += ((d0 < 0) * 2 - 1) * sign;
|
||||
p->coeffsA[filter][1] += ((d1 < 0) * 2 - 1) * sign;
|
||||
p->coeffsA[filter][2] += ((d2 < 0) * 2 - 1) * sign;
|
||||
p->coeffsA[filter][3] += ((d3 < 0) * 2 - 1) * sign;
|
||||
|
||||
return p->filterA[filter];
|
||||
}
|
||||
@@ -1309,7 +1309,7 @@ static void do_apply_filter(APEContext *ctx, int version, APEFilter *f,
|
||||
absres = res < 0 ? -(unsigned)res : res;
|
||||
if (absres)
|
||||
*f->adaptcoeffs = APESIGN(res) *
|
||||
(8 << ((absres > f->avg * 3LL) + (absres > (f->avg + f->avg / 3))));
|
||||
(8 << ((absres > f->avg * 3) + (absres > f->avg * 4 / 3)));
|
||||
/* equivalent to the following code
|
||||
if (absres <= f->avg * 4 / 3)
|
||||
*f->adaptcoeffs = APESIGN(res) * 8;
|
||||
@@ -1559,7 +1559,7 @@ static int ape_decode_frame(AVCodecContext *avctx, void *data,
|
||||
for (ch = 0; ch < s->channels; ch++) {
|
||||
sample8 = (uint8_t *)frame->data[ch];
|
||||
for (i = 0; i < blockstodecode; i++)
|
||||
*sample8++ = (s->decoded[ch][i] + 0x80U) & 0xff;
|
||||
*sample8++ = (s->decoded[ch][i] + 0x80) & 0xff;
|
||||
}
|
||||
break;
|
||||
case 16:
|
||||
@@ -1573,7 +1573,7 @@ static int ape_decode_frame(AVCodecContext *avctx, void *data,
|
||||
for (ch = 0; ch < s->channels; ch++) {
|
||||
sample24 = (int32_t *)frame->data[ch];
|
||||
for (i = 0; i < blockstodecode; i++)
|
||||
*sample24++ = s->decoded[ch][i] * 256U;
|
||||
*sample24++ = s->decoded[ch][i] * 256;
|
||||
}
|
||||
break;
|
||||
}
|
||||
@@ -1581,24 +1581,13 @@ static int ape_decode_frame(AVCodecContext *avctx, void *data,
|
||||
s->samples -= blockstodecode;
|
||||
|
||||
if (avctx->err_recognition & AV_EF_CRCCHECK &&
|
||||
s->fileversion >= 3900) {
|
||||
s->fileversion >= 3900 && s->bps < 24) {
|
||||
uint32_t crc = s->CRC_state;
|
||||
const AVCRC *crc_tab = av_crc_get_table(AV_CRC_32_IEEE_LE);
|
||||
int stride = s->bps == 24 ? 4 : (s->bps>>3);
|
||||
int offset = s->bps == 24;
|
||||
int bytes = s->bps >> 3;
|
||||
|
||||
for (i = 0; i < blockstodecode; i++) {
|
||||
for (ch = 0; ch < s->channels; ch++) {
|
||||
#if HAVE_BIGENDIAN
|
||||
uint8_t *smp_native = frame->data[ch] + i*stride;
|
||||
uint8_t smp[4];
|
||||
for(int j = 0; j<stride; j++)
|
||||
smp[j] = smp_native[stride-j-1];
|
||||
#else
|
||||
uint8_t *smp = frame->data[ch] + i*stride;
|
||||
#endif
|
||||
crc = av_crc(crc_tab, crc, smp+offset, bytes);
|
||||
uint8_t *smp = frame->data[ch] + (i*(s->bps >> 3));
|
||||
crc = av_crc(crc_tab, crc, smp, s->bps >> 3);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -48,3 +48,4 @@ function ff_scalarproduct_int16_neon, export=1
|
||||
vmov.32 r0, d3[0]
|
||||
bx lr
|
||||
endfunc
|
||||
|
||||
|
||||
@@ -229,7 +229,7 @@ A .endif
|
||||
.endif
|
||||
|
||||
// Begin loop
|
||||
1:
|
||||
01:
|
||||
.if TOTAL_TAPS == 0
|
||||
// Things simplify a lot in this case
|
||||
// In fact this could be pipelined further if it's worth it...
|
||||
@@ -241,7 +241,7 @@ A .endif
|
||||
str ST0, [PST, #-4]!
|
||||
str ST0, [PST, #4 * (MAX_BLOCKSIZE + MAX_FIR_ORDER)]
|
||||
str ST0, [PSAMP], #4 * MAX_CHANNELS
|
||||
bne 1b
|
||||
bne 01b
|
||||
.else
|
||||
.if \fir_taps & 1
|
||||
.set LOAD_REG, 1
|
||||
@@ -333,7 +333,7 @@ T orr AC0, AC0, AC1
|
||||
str ST3, [PST, #-4]!
|
||||
str ST2, [PST, #4 * (MAX_BLOCKSIZE + MAX_FIR_ORDER)]
|
||||
str ST3, [PSAMP], #4 * MAX_CHANNELS
|
||||
bne 1b
|
||||
bne 01b
|
||||
.endif
|
||||
b 99f
|
||||
|
||||
|
||||
@@ -279,13 +279,11 @@ function \type\()_8tap_\size\()h_\idx1\idx2
|
||||
sub r1, r1, r5
|
||||
.endif
|
||||
@ size >= 16 loads two qwords and increments r2,
|
||||
@ size 4 loads 1 d word, increments r2 and loads 1 32-bit lane
|
||||
@ for size 8 it's enough with one qword and no postincrement
|
||||
@ for size 4/8 it's enough with one qword and no
|
||||
@ postincrement
|
||||
.if \size >= 16
|
||||
sub r3, r3, r5
|
||||
sub r3, r3, #8
|
||||
.elseif \size == 4
|
||||
sub r3, r3, #8
|
||||
.endif
|
||||
@ Load the filter vector
|
||||
vld1.16 {q0}, [r12,:128]
|
||||
@@ -297,14 +295,9 @@ function \type\()_8tap_\size\()h_\idx1\idx2
|
||||
.if \size >= 16
|
||||
vld1.8 {d18, d19, d20}, [r2]!
|
||||
vld1.8 {d24, d25, d26}, [r7]!
|
||||
.elseif \size == 8
|
||||
.else
|
||||
vld1.8 {q9}, [r2]
|
||||
vld1.8 {q12}, [r7]
|
||||
.else @ size == 4
|
||||
vld1.8 {d18}, [r2]!
|
||||
vld1.8 {d24}, [r7]!
|
||||
vld1.32 {d19[0]}, [r2]
|
||||
vld1.32 {d25[0]}, [r7]
|
||||
.endif
|
||||
vmovl.u8 q8, d18
|
||||
vmovl.u8 q9, d19
|
||||
|
||||
@@ -362,10 +362,6 @@ static av_cold int atrac1_decode_init(AVCodecContext *avctx)
|
||||
ff_atrac_generate_tables();
|
||||
|
||||
q->fdsp = avpriv_float_dsp_alloc(avctx->flags & AV_CODEC_FLAG_BITEXACT);
|
||||
if (!q->fdsp) {
|
||||
atrac1_decode_end(avctx);
|
||||
return AVERROR(ENOMEM);
|
||||
}
|
||||
|
||||
q->bands[0] = q->low;
|
||||
q->bands[1] = q->mid;
|
||||
|
||||
+29
-44
@@ -45,10 +45,6 @@ static const enum AVPixelFormat pix_fmts_12bit[2][2] = {
|
||||
{ AV_PIX_FMT_YUV422P12, AV_PIX_FMT_YUV420P12 },
|
||||
};
|
||||
|
||||
static const enum AVPixelFormat pix_fmts_rgb[3] = {
|
||||
AV_PIX_FMT_GBRP, AV_PIX_FMT_GBRP10, AV_PIX_FMT_GBRP12,
|
||||
};
|
||||
|
||||
static int av1_parser_parse(AVCodecParserContext *ctx,
|
||||
AVCodecContext *avctx,
|
||||
const uint8_t **out_data, int *out_size,
|
||||
@@ -57,8 +53,6 @@ static int av1_parser_parse(AVCodecParserContext *ctx,
|
||||
AV1ParseContext *s = ctx->priv_data;
|
||||
CodedBitstreamFragment *td = &s->temporal_unit;
|
||||
CodedBitstreamAV1Context *av1 = s->cbc->priv_data;
|
||||
AV1RawSequenceHeader *seq;
|
||||
AV1RawColorConfig *color;
|
||||
int ret;
|
||||
|
||||
*out_data = data;
|
||||
@@ -92,12 +86,11 @@ static int av1_parser_parse(AVCodecParserContext *ctx,
|
||||
goto end;
|
||||
}
|
||||
|
||||
seq = av1->sequence_header;
|
||||
color = &seq->color_config;
|
||||
|
||||
for (int i = 0; i < td->nb_units; i++) {
|
||||
CodedBitstreamUnit *unit = &td->units[i];
|
||||
AV1RawOBU *obu = unit->content;
|
||||
AV1RawSequenceHeader *seq = av1->sequence_header;
|
||||
AV1RawColorConfig *color = &seq->color_config;
|
||||
AV1RawFrameHeader *frame;
|
||||
int frame_type;
|
||||
|
||||
@@ -134,6 +127,9 @@ static int av1_parser_parse(AVCodecParserContext *ctx,
|
||||
ctx->key_frame = frame_type == AV1_FRAME_KEY;
|
||||
}
|
||||
|
||||
avctx->profile = seq->seq_profile;
|
||||
avctx->level = seq->seq_level_idx[0];
|
||||
|
||||
switch (frame_type) {
|
||||
case AV1_FRAME_KEY:
|
||||
case AV1_FRAME_INTRA_ONLY:
|
||||
@@ -147,44 +143,33 @@ static int av1_parser_parse(AVCodecParserContext *ctx,
|
||||
break;
|
||||
}
|
||||
ctx->picture_structure = AV_PICTURE_STRUCTURE_FRAME;
|
||||
}
|
||||
|
||||
switch (av1->bit_depth) {
|
||||
case 8:
|
||||
ctx->format = color->mono_chrome ? AV_PIX_FMT_GRAY8
|
||||
: pix_fmts_8bit [color->subsampling_x][color->subsampling_y];
|
||||
break;
|
||||
case 10:
|
||||
ctx->format = color->mono_chrome ? AV_PIX_FMT_GRAY10
|
||||
: pix_fmts_10bit[color->subsampling_x][color->subsampling_y];
|
||||
break;
|
||||
case 12:
|
||||
ctx->format = color->mono_chrome ? AV_PIX_FMT_GRAY12
|
||||
: pix_fmts_12bit[color->subsampling_x][color->subsampling_y];
|
||||
break;
|
||||
}
|
||||
av_assert2(ctx->format != AV_PIX_FMT_NONE);
|
||||
switch (av1->bit_depth) {
|
||||
case 8:
|
||||
ctx->format = color->mono_chrome ? AV_PIX_FMT_GRAY8
|
||||
: pix_fmts_8bit [color->subsampling_x][color->subsampling_y];
|
||||
break;
|
||||
case 10:
|
||||
ctx->format = color->mono_chrome ? AV_PIX_FMT_GRAY10
|
||||
: pix_fmts_10bit[color->subsampling_x][color->subsampling_y];
|
||||
break;
|
||||
case 12:
|
||||
ctx->format = color->mono_chrome ? AV_PIX_FMT_GRAY12
|
||||
: pix_fmts_12bit[color->subsampling_x][color->subsampling_y];
|
||||
break;
|
||||
}
|
||||
av_assert2(ctx->format != AV_PIX_FMT_NONE);
|
||||
|
||||
if (!color->subsampling_x && !color->subsampling_y &&
|
||||
color->matrix_coefficients == AVCOL_SPC_RGB &&
|
||||
color->color_primaries == AVCOL_PRI_BT709 &&
|
||||
color->transfer_characteristics == AVCOL_TRC_IEC61966_2_1)
|
||||
ctx->format = pix_fmts_rgb[color->high_bitdepth + color->twelve_bit];
|
||||
avctx->colorspace = (enum AVColorSpace) color->matrix_coefficients;
|
||||
avctx->color_primaries = (enum AVColorPrimaries) color->color_primaries;
|
||||
avctx->color_trc = (enum AVColorTransferCharacteristic) color->transfer_characteristics;
|
||||
avctx->color_range = color->color_range ? AVCOL_RANGE_JPEG : AVCOL_RANGE_MPEG;
|
||||
|
||||
avctx->pix_fmt = ctx->format;
|
||||
|
||||
avctx->profile = seq->seq_profile;
|
||||
avctx->level = seq->seq_level_idx[0];
|
||||
|
||||
avctx->colorspace = (enum AVColorSpace) color->matrix_coefficients;
|
||||
avctx->color_primaries = (enum AVColorPrimaries) color->color_primaries;
|
||||
avctx->color_trc = (enum AVColorTransferCharacteristic) color->transfer_characteristics;
|
||||
avctx->color_range = color->color_range ? AVCOL_RANGE_JPEG : AVCOL_RANGE_MPEG;
|
||||
|
||||
if (ctx->width != avctx->width || ctx->height != avctx->height) {
|
||||
ret = ff_set_dimensions(avctx, ctx->width, ctx->height);
|
||||
if (ret < 0)
|
||||
goto end;
|
||||
if (ctx->width != avctx->width || ctx->height != avctx->height) {
|
||||
ret = ff_set_dimensions(avctx, ctx->width, ctx->height);
|
||||
if (ret < 0)
|
||||
goto end;
|
||||
}
|
||||
}
|
||||
|
||||
if (avctx->framerate.num)
|
||||
|
||||
@@ -1294,10 +1294,6 @@ typedef struct AVCodecContext {
|
||||
* this callback and filled with the extra buffers if there are more
|
||||
* buffers than buf[] can hold. extended_buf will be freed in
|
||||
* av_frame_unref().
|
||||
* Decoders will generally initialize the whole buffer before it is output
|
||||
* but it can in rare error conditions happen that uninitialized data is passed
|
||||
* through. \important The buffers returned by get_buffer* should thus not contain sensitive
|
||||
* data.
|
||||
*
|
||||
* If AV_CODEC_CAP_DR1 is not set then get_buffer2() must call
|
||||
* avcodec_default_get_buffer2() instead of providing buffers allocated by
|
||||
|
||||
@@ -54,8 +54,6 @@ static av_cold int init(AVCodecContext *avctx)
|
||||
}
|
||||
|
||||
a->mjpeg_avctx = avcodec_alloc_context3(codec);
|
||||
if (!a->mjpeg_avctx)
|
||||
return AVERROR(ENOMEM);
|
||||
|
||||
av_dict_set(&thread_opt, "threads", "1", 0); // Is this needed ?
|
||||
a->mjpeg_avctx->refcounted_frames = 1;
|
||||
@@ -171,5 +169,5 @@ AVCodec ff_avrn_decoder = {
|
||||
.close = end,
|
||||
.decode = decode_frame,
|
||||
.max_lowres = 3,
|
||||
.caps_internal = FF_CODEC_CAP_INIT_THREADSAFE | FF_CODEC_CAP_INIT_CLEANUP,
|
||||
.caps_internal = FF_CODEC_CAP_INIT_THREADSAFE,
|
||||
};
|
||||
|
||||
+8
-7
@@ -867,7 +867,7 @@ static int binkb_decode_plane(BinkContext *c, AVFrame *frame, GetBitContext *gb,
|
||||
|
||||
binkb_init_bundles(c);
|
||||
ref_start = frame->data[plane_idx];
|
||||
ref_end = frame->data[plane_idx] + ((bh - 1) * frame->linesize[plane_idx] + bw - 1) * 8;
|
||||
ref_end = frame->data[plane_idx] + (bh * frame->linesize[plane_idx] + bw) * 8;
|
||||
|
||||
for (i = 0; i < 64; i++)
|
||||
coordmap[i] = (i & 7) + (i >> 3) * stride;
|
||||
@@ -923,7 +923,7 @@ static int binkb_decode_plane(BinkContext *c, AVFrame *frame, GetBitContext *gb,
|
||||
xoff = binkb_get_value(c, BINKB_SRC_X_OFF);
|
||||
yoff = binkb_get_value(c, BINKB_SRC_Y_OFF) + ybias;
|
||||
ref = dst + xoff + yoff * stride;
|
||||
if (ref < ref_start || ref > ref_end) {
|
||||
if (ref < ref_start || ref + 8*stride > ref_end) {
|
||||
av_log(c->avctx, AV_LOG_WARNING, "Reference block is out of bounds\n");
|
||||
} else if (ref + 8*stride < dst || ref >= dst + 8*stride) {
|
||||
c->hdsp.put_pixels_tab[1][0](dst, ref, stride, 8);
|
||||
@@ -939,7 +939,7 @@ static int binkb_decode_plane(BinkContext *c, AVFrame *frame, GetBitContext *gb,
|
||||
xoff = binkb_get_value(c, BINKB_SRC_X_OFF);
|
||||
yoff = binkb_get_value(c, BINKB_SRC_Y_OFF) + ybias;
|
||||
ref = dst + xoff + yoff * stride;
|
||||
if (ref < ref_start || ref > ref_end) {
|
||||
if (ref < ref_start || ref + 8 * stride > ref_end) {
|
||||
av_log(c->avctx, AV_LOG_WARNING, "Reference block is out of bounds\n");
|
||||
} else if (ref + 8*stride < dst || ref >= dst + 8*stride) {
|
||||
c->hdsp.put_pixels_tab[1][0](dst, ref, stride, 8);
|
||||
@@ -971,7 +971,7 @@ static int binkb_decode_plane(BinkContext *c, AVFrame *frame, GetBitContext *gb,
|
||||
xoff = binkb_get_value(c, BINKB_SRC_X_OFF);
|
||||
yoff = binkb_get_value(c, BINKB_SRC_Y_OFF) + ybias;
|
||||
ref = dst + xoff + yoff * stride;
|
||||
if (ref < ref_start || ref > ref_end) {
|
||||
if (ref < ref_start || ref + 8 * stride > ref_end) {
|
||||
av_log(c->avctx, AV_LOG_WARNING, "Reference block is out of bounds\n");
|
||||
} else if (ref + 8*stride < dst || ref >= dst + 8*stride) {
|
||||
c->hdsp.put_pixels_tab[1][0](dst, ref, stride, 8);
|
||||
@@ -1084,7 +1084,7 @@ static int bink_decode_plane(BinkContext *c, AVFrame *frame, GetBitContext *gb,
|
||||
for (bx = 0; bx < bw; bx++, dst += 8, prev += 8) {
|
||||
blk = get_value(c, BINK_SRC_BLOCK_TYPES);
|
||||
// 16x16 block type on odd line means part of the already decoded block, so skip it
|
||||
if (((by & 1) || (bx & 1)) && blk == SCALED_BLOCK) {
|
||||
if ((by & 1) && blk == SCALED_BLOCK) {
|
||||
bx++;
|
||||
dst += 8;
|
||||
prev += 8;
|
||||
@@ -1381,8 +1381,10 @@ static av_cold int decode_init(AVCodecContext *avctx)
|
||||
ff_hpeldsp_init(&c->hdsp, avctx->flags);
|
||||
ff_binkdsp_init(&c->binkdsp);
|
||||
|
||||
if ((ret = init_bundles(c)) < 0)
|
||||
if ((ret = init_bundles(c)) < 0) {
|
||||
free_bundles(c);
|
||||
return ret;
|
||||
}
|
||||
|
||||
if (c->version == 'b') {
|
||||
if (!binkb_initialised) {
|
||||
@@ -1422,5 +1424,4 @@ AVCodec ff_bink_decoder = {
|
||||
.decode = decode_frame,
|
||||
.flush = flush,
|
||||
.capabilities = AV_CODEC_CAP_DR1,
|
||||
.caps_internal = FF_CODEC_CAP_INIT_CLEANUP,
|
||||
};
|
||||
|
||||
@@ -40,6 +40,8 @@
|
||||
#include "rdft.h"
|
||||
#include "wma_freqs.h"
|
||||
|
||||
static float quant_table[96];
|
||||
|
||||
#define MAX_CHANNELS 2
|
||||
#define BINK_BLOCK_MAX_SIZE (MAX_CHANNELS << 11)
|
||||
|
||||
@@ -56,7 +58,6 @@ typedef struct BinkAudioContext {
|
||||
float root;
|
||||
DECLARE_ALIGNED(32, FFTSample, coeffs)[BINK_BLOCK_MAX_SIZE];
|
||||
float previous[MAX_CHANNELS][BINK_BLOCK_MAX_SIZE / 16]; ///< coeffs from previous audio block
|
||||
float quant_table[96];
|
||||
AVPacket *pkt;
|
||||
union {
|
||||
RDFTContext rdft;
|
||||
@@ -115,7 +116,7 @@ static av_cold int decode_init(AVCodecContext *avctx)
|
||||
s->root = s->frame_len / (sqrt(s->frame_len) * 32768.0);
|
||||
for (i = 0; i < 96; i++) {
|
||||
/* constant is result of 0.066399999/log10(M_E) */
|
||||
s->quant_table[i] = expf(i * 0.15289164787221953823f) * s->root;
|
||||
quant_table[i] = expf(i * 0.15289164787221953823f) * s->root;
|
||||
}
|
||||
|
||||
/* calculate number of bands */
|
||||
@@ -196,7 +197,7 @@ static int decode_block(BinkAudioContext *s, float **out, int use_dct)
|
||||
return AVERROR_INVALIDDATA;
|
||||
for (i = 0; i < s->num_bands; i++) {
|
||||
int value = get_bits(gb, 8);
|
||||
quant[i] = s->quant_table[FFMIN(value, 95)];
|
||||
quant[i] = quant_table[FFMIN(value, 95)];
|
||||
}
|
||||
|
||||
k = 0;
|
||||
|
||||
@@ -129,7 +129,7 @@ static int alloc_table(VLC *vlc, int size, int use_static)
|
||||
|
||||
typedef struct VLCcode {
|
||||
uint8_t bits;
|
||||
VLC_TYPE symbol;
|
||||
uint16_t symbol;
|
||||
/** codeword, with the first bit-to-be-read in the msb
|
||||
* (even if intended for a little-endian bitstream reader) */
|
||||
uint32_t code;
|
||||
@@ -162,9 +162,9 @@ static int build_table(VLC *vlc, int table_nb_bits, int nb_codes,
|
||||
uint32_t code;
|
||||
volatile VLC_TYPE (* volatile table)[2]; // the double volatile is needed to prevent an internal compiler error in gcc 4.2
|
||||
|
||||
table_size = 1 << table_nb_bits;
|
||||
if (table_nb_bits > 30)
|
||||
return AVERROR(EINVAL);
|
||||
table_size = 1 << table_nb_bits;
|
||||
table_index = alloc_table(vlc, table_size, flags & INIT_VLC_USE_NEW_STATIC);
|
||||
ff_dlog(NULL, "new table index=%d size=%d\n", table_index, table_size);
|
||||
if (table_index < 0)
|
||||
|
||||
+2
-2
@@ -693,11 +693,11 @@ static int cbs_insert_unit(CodedBitstreamContext *ctx,
|
||||
memmove(units + position + 1, units + position,
|
||||
(frag->nb_units - position) * sizeof(*units));
|
||||
} else {
|
||||
units = av_malloc_array(frag->nb_units*2 + 1, sizeof(*units));
|
||||
units = av_malloc_array(frag->nb_units + 1, sizeof(*units));
|
||||
if (!units)
|
||||
return AVERROR(ENOMEM);
|
||||
|
||||
frag->nb_units_allocated = 2*frag->nb_units_allocated + 1;
|
||||
++frag->nb_units_allocated;
|
||||
|
||||
if (position > 0)
|
||||
memcpy(units, frag->units, position * sizeof(*units));
|
||||
|
||||
+6
-19
@@ -36,7 +36,7 @@ static int cbs_av1_read_uvlc(CodedBitstreamContext *ctx, GetBitContext *gbc,
|
||||
position = get_bits_count(gbc);
|
||||
|
||||
zeroes = 0;
|
||||
while (zeroes < 32) {
|
||||
while (1) {
|
||||
if (get_bits_left(gbc) < 1) {
|
||||
av_log(ctx->log_ctx, AV_LOG_ERROR, "Invalid uvlc code at "
|
||||
"%s: bitstream ended.\n", name);
|
||||
@@ -49,18 +49,7 @@ static int cbs_av1_read_uvlc(CodedBitstreamContext *ctx, GetBitContext *gbc,
|
||||
}
|
||||
|
||||
if (zeroes >= 32) {
|
||||
// The spec allows at least thirty-two zero bits followed by a
|
||||
// one to mean 2^32-1, with no constraint on the number of
|
||||
// zeroes. The libaom reference decoder does not match this,
|
||||
// instead reading thirty-two zeroes but not the following one
|
||||
// to mean 2^32-1. These two interpretations are incompatible
|
||||
// and other implementations may follow one or the other.
|
||||
// Therefore we reject thirty-two zeroes because the intended
|
||||
// behaviour is not clear.
|
||||
av_log(ctx->log_ctx, AV_LOG_ERROR, "Thirty-two zero bits in "
|
||||
"%s uvlc code: considered invalid due to conflicting "
|
||||
"standard and reference decoder behaviour.\n", name);
|
||||
return AVERROR_INVALIDDATA;
|
||||
value = MAX_UINT_BITS(32);
|
||||
} else {
|
||||
if (get_bits_left(gbc) < zeroes) {
|
||||
av_log(ctx->log_ctx, AV_LOG_ERROR, "Invalid uvlc code at "
|
||||
@@ -136,9 +125,8 @@ static int cbs_av1_write_uvlc(CodedBitstreamContext *ctx, PutBitContext *pbc,
|
||||
put_bits(pbc, 1, 1);
|
||||
} else {
|
||||
zeroes = av_log2(value + 1);
|
||||
v = value - (1U << zeroes) + 1;
|
||||
put_bits(pbc, zeroes, 0);
|
||||
put_bits(pbc, 1, 1);
|
||||
v = value - (1 << zeroes) + 1;
|
||||
put_bits(pbc, zeroes + 1, 1);
|
||||
put_bits(pbc, zeroes, v);
|
||||
}
|
||||
|
||||
@@ -394,7 +382,7 @@ static int cbs_av1_write_increment(CodedBitstreamContext *ctx, PutBitContext *pb
|
||||
}
|
||||
|
||||
if (len > 0)
|
||||
put_bits(pbc, len, (1U << len) - 1 - (value != range_max));
|
||||
put_bits(pbc, len, (1 << len) - 1 - (value != range_max));
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -723,11 +711,10 @@ static size_t cbs_av1_get_payload_bytes_left(GetBitContext *gbc)
|
||||
|
||||
#define infer(name, value) do { \
|
||||
if (current->name != (value)) { \
|
||||
av_log(ctx->log_ctx, AV_LOG_ERROR, \
|
||||
av_log(ctx->log_ctx, AV_LOG_WARNING, "Warning: " \
|
||||
"%s does not match inferred value: " \
|
||||
"%"PRId64", but should be %"PRId64".\n", \
|
||||
#name, (int64_t)current->name, (int64_t)(value)); \
|
||||
return AVERROR_INVALIDDATA; \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
|
||||
@@ -158,8 +158,8 @@ typedef struct AV1RawFrameHeader {
|
||||
uint8_t use_superres;
|
||||
uint8_t coded_denom;
|
||||
uint8_t render_and_frame_size_different;
|
||||
uint16_t render_width_minus_1;
|
||||
uint16_t render_height_minus_1;
|
||||
uint8_t render_width_minus_1;
|
||||
uint8_t render_height_minus_1;
|
||||
|
||||
uint8_t found_ref[AV1_REFS_PER_FRAME];
|
||||
|
||||
@@ -429,7 +429,6 @@ typedef struct CodedBitstreamAV1Context {
|
||||
int operating_point_idc;
|
||||
|
||||
int bit_depth;
|
||||
int order_hint;
|
||||
int frame_width;
|
||||
int frame_height;
|
||||
int upscaled_width;
|
||||
|
||||
@@ -366,7 +366,7 @@ static int FUNC(set_frame_refs)(CodedBitstreamContext *ctx, RWContext *rw,
|
||||
for (i = 0; i < AV1_NUM_REF_FRAMES; i++)
|
||||
shifted_order_hints[i] = cur_frame_hint +
|
||||
cbs_av1_get_relative_dist(seq, priv->ref[i].order_hint,
|
||||
priv->order_hint);
|
||||
current->order_hint);
|
||||
|
||||
latest_order_hint = shifted_order_hints[current->last_frame_idx];
|
||||
earliest_order_hint = shifted_order_hints[current->golden_frame_idx];
|
||||
@@ -541,7 +541,7 @@ static int FUNC(frame_size_with_refs)(CodedBitstreamContext *ctx, RWContext *rw,
|
||||
}
|
||||
|
||||
priv->upscaled_width = ref->upscaled_width;
|
||||
priv->frame_width = priv->upscaled_width;
|
||||
priv->frame_width = ref->frame_width;
|
||||
priv->frame_height = ref->frame_height;
|
||||
priv->render_width = ref->render_width;
|
||||
priv->render_height = ref->render_height;
|
||||
@@ -993,7 +993,7 @@ static int FUNC(skip_mode_params)(CodedBitstreamContext *ctx, RWContext *rw,
|
||||
for (i = 0; i < AV1_REFS_PER_FRAME; i++) {
|
||||
ref_hint = priv->ref[current->ref_frame_idx[i]].order_hint;
|
||||
dist = cbs_av1_get_relative_dist(seq, ref_hint,
|
||||
priv->order_hint);
|
||||
current->order_hint);
|
||||
if (dist < 0) {
|
||||
if (forward_idx < 0 ||
|
||||
cbs_av1_get_relative_dist(seq, ref_hint,
|
||||
@@ -1261,10 +1261,10 @@ static int FUNC(uncompressed_header)(CodedBitstreamContext *ctx, RWContext *rw,
|
||||
flag(show_existing_frame);
|
||||
|
||||
if (current->show_existing_frame) {
|
||||
AV1ReferenceFrameState *ref;
|
||||
AV1ReferenceFrameState *frame;
|
||||
|
||||
fb(3, frame_to_show_map_idx);
|
||||
ref = &priv->ref[current->frame_to_show_map_idx];
|
||||
frame = &priv->ref[current->frame_to_show_map_idx];
|
||||
|
||||
if (seq->decoder_model_info_present_flag &&
|
||||
!seq->timing_info.equal_picture_interval) {
|
||||
@@ -1275,24 +1275,12 @@ static int FUNC(uncompressed_header)(CodedBitstreamContext *ctx, RWContext *rw,
|
||||
if (seq->frame_id_numbers_present_flag)
|
||||
fb(id_len, display_frame_id);
|
||||
|
||||
infer(frame_type, ref->frame_type);
|
||||
if (current->frame_type == AV1_FRAME_KEY) {
|
||||
if (frame->frame_type == AV1_FRAME_KEY)
|
||||
infer(refresh_frame_flags, all_frames);
|
||||
|
||||
// Section 7.21
|
||||
infer(current_frame_id, ref->frame_id);
|
||||
priv->upscaled_width = ref->upscaled_width;
|
||||
priv->frame_width = ref->frame_width;
|
||||
priv->frame_height = ref->frame_height;
|
||||
priv->render_width = ref->render_width;
|
||||
priv->render_height = ref->render_height;
|
||||
priv->bit_depth = ref->bit_depth;
|
||||
priv->order_hint = ref->order_hint;
|
||||
} else
|
||||
else
|
||||
infer(refresh_frame_flags, 0);
|
||||
|
||||
// Section 7.20
|
||||
goto update_refs;
|
||||
return 0;
|
||||
}
|
||||
|
||||
fb(2, frame_type);
|
||||
@@ -1378,7 +1366,6 @@ static int FUNC(uncompressed_header)(CodedBitstreamContext *ctx, RWContext *rw,
|
||||
fb(order_hint_bits, order_hint);
|
||||
else
|
||||
infer(order_hint, 0);
|
||||
priv->order_hint = current->order_hint;
|
||||
|
||||
if (frame_is_intra || current->error_resilient_mode)
|
||||
infer(primary_ref_frame, AV1_PRIMARY_REF_NONE);
|
||||
@@ -1394,7 +1381,7 @@ static int FUNC(uncompressed_header)(CodedBitstreamContext *ctx, RWContext *rw,
|
||||
int in_temporal_layer = (op_pt_idc >> priv->temporal_id ) & 1;
|
||||
int in_spatial_layer = (op_pt_idc >> (priv->spatial_id + 8)) & 1;
|
||||
if (seq->operating_point_idc[i] == 0 ||
|
||||
(in_temporal_layer && in_spatial_layer)) {
|
||||
in_temporal_layer || in_spatial_layer) {
|
||||
fbs(seq->decoder_model_info.buffer_removal_time_length_minus_1 + 1,
|
||||
buffer_removal_time[i], 1, i);
|
||||
}
|
||||
@@ -1554,16 +1541,6 @@ static int FUNC(uncompressed_header)(CodedBitstreamContext *ctx, RWContext *rw,
|
||||
|
||||
CHECK(FUNC(film_grain_params)(ctx, rw, current));
|
||||
|
||||
av_log(ctx->log_ctx, AV_LOG_DEBUG, "Frame %d: size %dx%d "
|
||||
"upscaled %d render %dx%d subsample %dx%d "
|
||||
"bitdepth %d tiles %dx%d.\n", priv->order_hint,
|
||||
priv->frame_width, priv->frame_height, priv->upscaled_width,
|
||||
priv->render_width, priv->render_height,
|
||||
seq->color_config.subsampling_x + 1,
|
||||
seq->color_config.subsampling_y + 1, priv->bit_depth,
|
||||
priv->tile_rows, priv->tile_cols);
|
||||
|
||||
update_refs:
|
||||
for (i = 0; i < AV1_NUM_REF_FRAMES; i++) {
|
||||
if (current->refresh_frame_flags & (1 << i)) {
|
||||
priv->ref[i] = (AV1ReferenceFrameState) {
|
||||
@@ -1578,11 +1555,20 @@ update_refs:
|
||||
.subsampling_x = seq->color_config.subsampling_x,
|
||||
.subsampling_y = seq->color_config.subsampling_y,
|
||||
.bit_depth = priv->bit_depth,
|
||||
.order_hint = priv->order_hint,
|
||||
.order_hint = current->order_hint,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
av_log(ctx->log_ctx, AV_LOG_DEBUG, "Frame %d: size %dx%d "
|
||||
"upscaled %d render %dx%d subsample %dx%d "
|
||||
"bitdepth %d tiles %dx%d.\n", current->order_hint,
|
||||
priv->frame_width, priv->frame_height, priv->upscaled_width,
|
||||
priv->render_width, priv->render_height,
|
||||
seq->color_config.subsampling_x + 1,
|
||||
seq->color_config.subsampling_y + 1, priv->bit_depth,
|
||||
priv->tile_rows, priv->tile_cols);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
@@ -408,11 +408,10 @@ static int cbs_h2645_read_more_rbsp_data(GetBitContext *gbc)
|
||||
|
||||
#define infer(name, value) do { \
|
||||
if (current->name != (value)) { \
|
||||
av_log(ctx->log_ctx, AV_LOG_ERROR, \
|
||||
av_log(ctx->log_ctx, AV_LOG_WARNING, "Warning: " \
|
||||
"%s does not match inferred value: " \
|
||||
"%"PRId64", but should be %"PRId64".\n", \
|
||||
#name, (int64_t)current->name, (int64_t)(value)); \
|
||||
return AVERROR_INVALIDDATA; \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
|
||||
@@ -728,7 +728,7 @@ static int FUNC(sps_scc_extension)(CodedBitstreamContext *ctx, RWContext *rw,
|
||||
|
||||
flag(sps_palette_predictor_initializer_present_flag);
|
||||
if (current->sps_palette_predictor_initializer_present_flag) {
|
||||
ue(sps_num_palette_predictor_initializer_minus1, 0, 127);
|
||||
ue(sps_num_palette_predictor_initializer_minus1, 0, 128);
|
||||
for (comp = 0; comp < (current->chroma_format_idc ? 3 : 1); comp++) {
|
||||
int bit_depth = comp == 0 ? current->bit_depth_luma_minus8 + 8
|
||||
: current->bit_depth_chroma_minus8 + 8;
|
||||
@@ -744,32 +744,6 @@ static int FUNC(sps_scc_extension)(CodedBitstreamContext *ctx, RWContext *rw,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int FUNC(vui_parameters_default)(CodedBitstreamContext *ctx,
|
||||
RWContext *rw, H265RawVUI *current,
|
||||
H265RawSPS *sps)
|
||||
{
|
||||
infer(aspect_ratio_idc, 0);
|
||||
|
||||
infer(video_format, 5);
|
||||
infer(video_full_range_flag, 0);
|
||||
infer(colour_primaries, 2);
|
||||
infer(transfer_characteristics, 2);
|
||||
infer(matrix_coefficients, 2);
|
||||
|
||||
infer(chroma_sample_loc_type_top_field, 0);
|
||||
infer(chroma_sample_loc_type_bottom_field, 0);
|
||||
|
||||
infer(tiles_fixed_structure_flag, 0);
|
||||
infer(motion_vectors_over_pic_boundaries_flag, 1);
|
||||
infer(min_spatial_segmentation_idc, 0);
|
||||
infer(max_bytes_per_pic_denom, 2);
|
||||
infer(max_bits_per_min_cu_denom, 1);
|
||||
infer(log2_max_mv_length_horizontal, 15);
|
||||
infer(log2_max_mv_length_vertical, 15);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int FUNC(sps)(CodedBitstreamContext *ctx, RWContext *rw,
|
||||
H265RawSPS *current)
|
||||
{
|
||||
@@ -934,8 +908,6 @@ static int FUNC(sps)(CodedBitstreamContext *ctx, RWContext *rw,
|
||||
flag(vui_parameters_present_flag);
|
||||
if (current->vui_parameters_present_flag)
|
||||
CHECK(FUNC(vui_parameters)(ctx, rw, ¤t->vui, current));
|
||||
else
|
||||
CHECK(FUNC(vui_parameters_default)(ctx, rw, ¤t->vui, current));
|
||||
|
||||
flag(sps_extension_present_flag);
|
||||
if (current->sps_extension_present_flag) {
|
||||
|
||||
@@ -149,7 +149,6 @@ static int cbs_jpeg_split_fragment(CodedBitstreamContext *ctx,
|
||||
break;
|
||||
} else if (marker == JPEG_MARKER_SOS) {
|
||||
next_marker = -1;
|
||||
end = start;
|
||||
for (i = start; i + 1 < frag->data_size; i++) {
|
||||
if (frag->data[i] != 0xff)
|
||||
continue;
|
||||
@@ -166,13 +165,13 @@ static int cbs_jpeg_split_fragment(CodedBitstreamContext *ctx,
|
||||
}
|
||||
} else {
|
||||
i = start;
|
||||
if (i > frag->data_size - 2) {
|
||||
if (i + 2 > frag->data_size) {
|
||||
av_log(ctx->log_ctx, AV_LOG_ERROR, "Invalid JPEG image: "
|
||||
"truncated at %02x marker.\n", marker);
|
||||
return AVERROR_INVALIDDATA;
|
||||
}
|
||||
length = AV_RB16(frag->data + i);
|
||||
if (length > frag->data_size - i) {
|
||||
if (i + length > frag->data_size) {
|
||||
av_log(ctx->log_ctx, AV_LOG_ERROR, "Invalid JPEG image: "
|
||||
"truncated at %02x marker segment.\n", marker);
|
||||
return AVERROR_INVALIDDATA;
|
||||
|
||||
@@ -422,7 +422,7 @@ static int cbs_vp9_split_fragment(CodedBitstreamContext *ctx,
|
||||
superframe_header = frag->data[frag->data_size - 1];
|
||||
|
||||
if ((superframe_header & 0xe0) == 0xc0) {
|
||||
VP9RawSuperframeIndex sfi = {0};
|
||||
VP9RawSuperframeIndex sfi;
|
||||
GetBitContext gbc;
|
||||
size_t index_size, pos;
|
||||
int i;
|
||||
|
||||
@@ -239,7 +239,7 @@ static void cdg_scroll(CDGraphicsContext *cc, uint8_t *data,
|
||||
for (y = FFMAX(0, vinc); y < FFMIN(CDG_FULL_HEIGHT + vinc, CDG_FULL_HEIGHT); y++)
|
||||
memcpy(out + FFMAX(0, hinc) + stride * y,
|
||||
in + FFMAX(0, hinc) - hinc + (y - vinc) * stride,
|
||||
FFABS(stride) - FFABS(hinc));
|
||||
FFMIN(stride + hinc, stride));
|
||||
|
||||
if (vinc > 0)
|
||||
cdg_fill_wrapper(0, 0, out,
|
||||
|
||||
@@ -65,11 +65,11 @@ int ff_celp_lp_synthesis_filter(int16_t *out, const int16_t *filter_coeffs,
|
||||
int i,n;
|
||||
|
||||
for (n = 0; n < buffer_length; n++) {
|
||||
int sum = rounder, sum1;
|
||||
int sum = -rounder, sum1;
|
||||
for (i = 1; i <= filter_length; i++)
|
||||
sum -= (unsigned)(filter_coeffs[i-1] * out[n-i]);
|
||||
sum += (unsigned)(filter_coeffs[i-1] * out[n-i]);
|
||||
|
||||
sum1 = ((sum >> 12) + in[n]) >> shift;
|
||||
sum1 = ((-sum >> 12) + in[n]) >> shift;
|
||||
sum = av_clip_int16(sum1);
|
||||
|
||||
if (stop_on_overflow && sum != sum1)
|
||||
|
||||
@@ -78,7 +78,7 @@ int64_t ff_dot_product(const int16_t *a, const int16_t *b, int length);
|
||||
*
|
||||
* @return value << offset, if offset>=0; value >> -offset - otherwise
|
||||
*/
|
||||
static inline unsigned bidir_sal(unsigned value, int offset)
|
||||
static inline int bidir_sal(int value, int offset)
|
||||
{
|
||||
if(offset < 0) return value >> -offset;
|
||||
else return value << offset;
|
||||
|
||||
@@ -503,10 +503,6 @@ static int cfhd_decode(AVCodecContext *avctx, void *data, int *got_frame,
|
||||
avpriv_report_missing_feature(avctx, "Transform type of %"PRIu16, data);
|
||||
ret = AVERROR_PATCHWELCOME;
|
||||
break;
|
||||
} else if (data == 1) {
|
||||
av_log(avctx, AV_LOG_ERROR, "unsupported transform type\n");
|
||||
ret = AVERROR_PATCHWELCOME;
|
||||
break;
|
||||
}
|
||||
av_log(avctx, AV_LOG_DEBUG, "Transform-type? %"PRIu16"\n", data);
|
||||
} else if (abstag >= 0x4000 && abstag <= 0x40ff) {
|
||||
@@ -611,12 +607,6 @@ static int cfhd_decode(AVCodecContext *avctx, void *data, int *got_frame,
|
||||
s->peak.level = 0;
|
||||
} else if (tag == -74 && s->peak.offset) {
|
||||
s->peak.level = data;
|
||||
if (s->peak.offset < 4 - bytestream2_tell(&s->peak.base) ||
|
||||
s->peak.offset > 4 + bytestream2_get_bytes_left(&s->peak.base)
|
||||
) {
|
||||
ret = AVERROR_INVALIDDATA;
|
||||
goto end;
|
||||
}
|
||||
bytestream2_seek(&s->peak.base, s->peak.offset - 4, SEEK_CUR);
|
||||
} else
|
||||
av_log(avctx, AV_LOG_DEBUG, "Unknown tag %i data %x\n", tag, data);
|
||||
|
||||
@@ -665,8 +665,8 @@ static av_cold int clv_decode_init(AVCodecContext *avctx)
|
||||
}
|
||||
|
||||
c->tile_shift = av_log2(c->tile_size);
|
||||
if (1U << c->tile_shift != c->tile_size || c->tile_shift < 1 || c->tile_shift > 30) {
|
||||
av_log(avctx, AV_LOG_ERROR, "Tile size: %d, is not power of 2 > 1 and < 2^31\n", c->tile_size);
|
||||
if (1U << c->tile_shift != c->tile_size) {
|
||||
av_log(avctx, AV_LOG_ERROR, "Tile size: %d, is not power of 2.\n", c->tile_size);
|
||||
return AVERROR_INVALIDDATA;
|
||||
}
|
||||
|
||||
|
||||
@@ -91,3 +91,4 @@ AVCodec ff_cljr_decoder = {
|
||||
.decode = decode_frame,
|
||||
.capabilities = AV_CODEC_CAP_DR1,
|
||||
};
|
||||
|
||||
|
||||
+4
-4
@@ -1084,10 +1084,6 @@ static av_cold int cook_decode_init(AVCodecContext *avctx)
|
||||
ff_audiodsp_init(&q->adsp);
|
||||
|
||||
while (bytestream2_get_bytes_left(&gb)) {
|
||||
if (s >= FFMIN(MAX_SUBPACKETS, avctx->block_align)) {
|
||||
avpriv_request_sample(avctx, "subpackets > %d", FFMIN(MAX_SUBPACKETS, avctx->block_align));
|
||||
return AVERROR_PATCHWELCOME;
|
||||
}
|
||||
/* 8 for mono, 16 for stereo, ? for multichannel
|
||||
Swap to right endianness so we don't need to care later on. */
|
||||
q->subpacket[s].cookversion = bytestream2_get_be32(&gb);
|
||||
@@ -1219,6 +1215,10 @@ static av_cold int cook_decode_init(AVCodecContext *avctx)
|
||||
|
||||
q->num_subpackets++;
|
||||
s++;
|
||||
if (s > FFMIN(MAX_SUBPACKETS, avctx->block_align)) {
|
||||
avpriv_request_sample(avctx, "subpackets > %d", FFMIN(MAX_SUBPACKETS, avctx->block_align));
|
||||
return AVERROR_PATCHWELCOME;
|
||||
}
|
||||
}
|
||||
|
||||
/* Try to catch some obviously faulty streams, otherwise it might be exploitable */
|
||||
|
||||
@@ -111,7 +111,6 @@ static int cpia_decode_frame(AVCodecContext *avctx,
|
||||
// Read line length, two byte little endian
|
||||
linelength = AV_RL16(src);
|
||||
src += 2;
|
||||
src_size -= 2;
|
||||
|
||||
if (src_size < linelength) {
|
||||
frame->decode_error_flags = FF_DECODE_ERROR_INVALID_BITSTREAM;
|
||||
|
||||
+2
-5
@@ -71,9 +71,6 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
|
||||
int buf_size = avpkt->size;
|
||||
CamStudioContext *c = avctx->priv_data;
|
||||
int ret;
|
||||
int bpp = avctx->bits_per_coded_sample / 8;
|
||||
int bugdelta = FFALIGN(avctx->width * bpp, 4) * avctx->height
|
||||
- (avctx->width & ~3) * bpp * avctx->height;
|
||||
|
||||
if (buf_size < 2) {
|
||||
av_log(avctx, AV_LOG_ERROR, "coded frame too small\n");
|
||||
@@ -87,7 +84,7 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
|
||||
switch ((buf[0] >> 1) & 7) {
|
||||
case 0: { // lzo compression
|
||||
int outlen = c->decomp_size, inlen = buf_size - 2;
|
||||
if (av_lzo1x_decode(c->decomp_buf, &outlen, &buf[2], &inlen) || (outlen && outlen != bugdelta)) {
|
||||
if (av_lzo1x_decode(c->decomp_buf, &outlen, &buf[2], &inlen) || outlen) {
|
||||
av_log(avctx, AV_LOG_ERROR, "error during lzo decompression\n");
|
||||
return AVERROR_INVALIDDATA;
|
||||
}
|
||||
@@ -96,7 +93,7 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
|
||||
case 1: { // zlib compression
|
||||
#if CONFIG_ZLIB
|
||||
unsigned long dlen = c->decomp_size;
|
||||
if (uncompress(c->decomp_buf, &dlen, &buf[2], buf_size - 2) != Z_OK || (dlen != c->decomp_size && dlen != c->decomp_size - bugdelta)) {
|
||||
if (uncompress(c->decomp_buf, &dlen, &buf[2], buf_size - 2) != Z_OK) {
|
||||
av_log(avctx, AV_LOG_ERROR, "error during zlib decompression\n");
|
||||
return AVERROR_INVALIDDATA;
|
||||
}
|
||||
|
||||
+17
-26
@@ -88,7 +88,7 @@ typedef struct CuvidContext
|
||||
CUVIDDECODECAPS caps8, caps10, caps12;
|
||||
|
||||
CUVIDPARSERPARAMS cuparseinfo;
|
||||
CUVIDEOFORMATEX *cuparse_ext;
|
||||
CUVIDEOFORMATEX cuparse_ext;
|
||||
|
||||
CudaFunctions *cudl;
|
||||
CuvidFunctions *cvdl;
|
||||
@@ -684,7 +684,6 @@ static av_cold int cuvid_decode_end(AVCodecContext *avctx)
|
||||
av_buffer_unref(&ctx->hwdevice);
|
||||
|
||||
av_freep(&ctx->key_frame);
|
||||
av_freep(&ctx->cuparse_ext);
|
||||
|
||||
cuvid_free_functions(&ctx->cvdl);
|
||||
|
||||
@@ -794,8 +793,6 @@ static av_cold int cuvid_decode_init(AVCodecContext *avctx)
|
||||
CUVIDSOURCEDATAPACKET seq_pkt;
|
||||
CUcontext cuda_ctx = NULL;
|
||||
CUcontext dummy;
|
||||
uint8_t *extradata;
|
||||
int extradata_size;
|
||||
int ret = 0;
|
||||
|
||||
enum AVPixelFormat pix_fmts[3] = { AV_PIX_FMT_CUDA,
|
||||
@@ -892,8 +889,11 @@ static av_cold int cuvid_decode_init(AVCodecContext *avctx)
|
||||
ctx->cudl = device_hwctx->internal->cuda_dl;
|
||||
|
||||
memset(&ctx->cuparseinfo, 0, sizeof(ctx->cuparseinfo));
|
||||
memset(&ctx->cuparse_ext, 0, sizeof(ctx->cuparse_ext));
|
||||
memset(&seq_pkt, 0, sizeof(seq_pkt));
|
||||
|
||||
ctx->cuparseinfo.pExtVideoInfo = &ctx->cuparse_ext;
|
||||
|
||||
switch (avctx->codec->id) {
|
||||
#if CONFIG_H264_CUVID_DECODER
|
||||
case AV_CODEC_ID_H264:
|
||||
@@ -947,26 +947,17 @@ static av_cold int cuvid_decode_init(AVCodecContext *avctx)
|
||||
|
||||
if (avctx->codec->bsfs) {
|
||||
const AVCodecParameters *par = avctx->internal->bsf->par_out;
|
||||
extradata = par->extradata;
|
||||
extradata_size = par->extradata_size;
|
||||
} else {
|
||||
extradata = avctx->extradata;
|
||||
extradata_size = avctx->extradata_size;
|
||||
ctx->cuparse_ext.format.seqhdr_data_length = par->extradata_size;
|
||||
memcpy(ctx->cuparse_ext.raw_seqhdr_data,
|
||||
par->extradata,
|
||||
FFMIN(sizeof(ctx->cuparse_ext.raw_seqhdr_data), par->extradata_size));
|
||||
} else if (avctx->extradata_size > 0) {
|
||||
ctx->cuparse_ext.format.seqhdr_data_length = avctx->extradata_size;
|
||||
memcpy(ctx->cuparse_ext.raw_seqhdr_data,
|
||||
avctx->extradata,
|
||||
FFMIN(sizeof(ctx->cuparse_ext.raw_seqhdr_data), avctx->extradata_size));
|
||||
}
|
||||
|
||||
ctx->cuparse_ext = av_mallocz(sizeof(*ctx->cuparse_ext)
|
||||
+ FFMAX(extradata_size - (int)sizeof(ctx->cuparse_ext->raw_seqhdr_data), 0));
|
||||
if (!ctx->cuparse_ext) {
|
||||
ret = AVERROR(ENOMEM);
|
||||
goto error;
|
||||
}
|
||||
|
||||
if (extradata_size > 0)
|
||||
memcpy(ctx->cuparse_ext->raw_seqhdr_data, extradata, extradata_size);
|
||||
ctx->cuparse_ext->format.seqhdr_data_length = extradata_size;
|
||||
|
||||
ctx->cuparseinfo.pExtVideoInfo = ctx->cuparse_ext;
|
||||
|
||||
ctx->key_frame = av_mallocz(ctx->nb_surfaces * sizeof(int));
|
||||
if (!ctx->key_frame) {
|
||||
ret = AVERROR(ENOMEM);
|
||||
@@ -995,8 +986,8 @@ static av_cold int cuvid_decode_init(AVCodecContext *avctx)
|
||||
if (ret < 0)
|
||||
goto error;
|
||||
|
||||
seq_pkt.payload = ctx->cuparse_ext->raw_seqhdr_data;
|
||||
seq_pkt.payload_size = ctx->cuparse_ext->format.seqhdr_data_length;
|
||||
seq_pkt.payload = ctx->cuparse_ext.raw_seqhdr_data;
|
||||
seq_pkt.payload_size = ctx->cuparse_ext.format.seqhdr_data_length;
|
||||
|
||||
if (seq_pkt.payload && seq_pkt.payload_size) {
|
||||
ret = CHECK_CU(ctx->cvdl->cuvidParseVideoData(ctx->cuparser, &seq_pkt));
|
||||
@@ -1055,8 +1046,8 @@ static void cuvid_flush(AVCodecContext *avctx)
|
||||
if (ret < 0)
|
||||
goto error;
|
||||
|
||||
seq_pkt.payload = ctx->cuparse_ext->raw_seqhdr_data;
|
||||
seq_pkt.payload_size = ctx->cuparse_ext->format.seqhdr_data_length;
|
||||
seq_pkt.payload = ctx->cuparse_ext.raw_seqhdr_data;
|
||||
seq_pkt.payload_size = ctx->cuparse_ext.format.seqhdr_data_length;
|
||||
|
||||
if (seq_pkt.payload && seq_pkt.payload_size) {
|
||||
ret = CHECK_CU(ctx->cvdl->cuvidParseVideoData(ctx->cuparser, &seq_pkt));
|
||||
|
||||
+1
-1
@@ -328,7 +328,7 @@ static void dmix_add_c(int32_t *dst, const int32_t *src, int coeff, ptrdiff_t le
|
||||
int i;
|
||||
|
||||
for (i = 0; i < len; i++)
|
||||
dst[i] += (unsigned)mul15(src[i], coeff);
|
||||
dst[i] += mul15(src[i], coeff);
|
||||
}
|
||||
|
||||
static void dmix_scale_c(int32_t *dst, int scale, ptrdiff_t len)
|
||||
|
||||
+1
-2
@@ -1858,8 +1858,7 @@ int ff_get_buffer(AVCodecContext *avctx, AVFrame *frame, int flags)
|
||||
int ret;
|
||||
|
||||
if (avctx->codec_type == AVMEDIA_TYPE_VIDEO) {
|
||||
if ((unsigned)avctx->width > INT_MAX - STRIDE_ALIGN ||
|
||||
(ret = av_image_check_size2(FFALIGN(avctx->width, STRIDE_ALIGN), avctx->height, avctx->max_pixels, AV_PIX_FMT_NONE, 0, avctx)) < 0 || avctx->pix_fmt<0) {
|
||||
if ((ret = av_image_check_size2(FFALIGN(avctx->width, STRIDE_ALIGN), avctx->height, avctx->max_pixels, AV_PIX_FMT_NONE, 0, avctx)) < 0 || avctx->pix_fmt<0) {
|
||||
av_log(avctx, AV_LOG_ERROR, "video_get_buffer: image parameters invalid\n");
|
||||
ret = AVERROR(EINVAL);
|
||||
goto fail;
|
||||
|
||||
@@ -215,7 +215,7 @@ static int dirac_combine_frame(AVCodecParserContext *s, AVCodecContext *avctx,
|
||||
int64_t pts = AV_RB32(cur_pu + 13);
|
||||
if (s->last_pts == 0 && s->last_dts == 0)
|
||||
s->dts = pts - 1;
|
||||
else if (s->last_dts != AV_NOPTS_VALUE)
|
||||
else
|
||||
s->dts = s->last_dts + 1;
|
||||
s->pts = pts;
|
||||
if (!avctx->has_b_frames && (cur_pu[4] & 0x03))
|
||||
|
||||
@@ -1431,8 +1431,8 @@ static void global_mv(DiracContext *s, DiracBlock *block, int x, int y, int ref)
|
||||
int *c = s->globalmc[ref].perspective;
|
||||
|
||||
int64_t m = (1<<ep) - (c[0]*(int64_t)x + c[1]*(int64_t)y);
|
||||
int64_t mx = m * (uint64_t)((A[0][0] * (int64_t)x + A[0][1]*(int64_t)y) + (1LL<<ez) * b[0]);
|
||||
int64_t my = m * (uint64_t)((A[1][0] * (int64_t)x + A[1][1]*(int64_t)y) + (1LL<<ez) * b[1]);
|
||||
int64_t mx = m * (int64_t)((A[0][0] * (int64_t)x + A[0][1]*(int64_t)y) + (1LL<<ez) * b[0]);
|
||||
int64_t my = m * (int64_t)((A[1][0] * (int64_t)x + A[1][1]*(int64_t)y) + (1LL<<ez) * b[1]);
|
||||
|
||||
block->u.mv[ref][0] = (mx + (1<<(ez+ep))) >> (ez+ep);
|
||||
block->u.mv[ref][1] = (my + (1<<(ez+ep))) >> (ez+ep);
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user