Compare commits

..

1 Commits

Author SHA1 Message Date
Michael Niedermayer c5079bf3bc Bump minor versions after branching 4.3
Signed-off-by: Michael Niedermayer <michael@niedermayer.cc>
2020-06-08 22:49:04 +02:00
657 changed files with 5664 additions and 10064 deletions
-23
View File
@@ -1,23 +0,0 @@
exclude: ^tests/ref/
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v5.0.0
hooks:
- id: check-case-conflict
- id: check-executables-have-shebangs
- id: check-illegal-windows-names
- id: check-shebang-scripts-are-executable
- id: check-yaml
- id: end-of-file-fixer
- id: fix-byte-order-marker
- id: mixed-line-ending
- id: trailing-whitespace
- repo: local
hooks:
- id: aarch64-asm-indent
name: fix aarch64 assembly indentation
files: ^.*/aarch64/.*\.S$
language: script
entry: ./tools/check_arm_indent.sh --apply
pass_filenames: false
-29
View File
@@ -1,29 +0,0 @@
name: Lint
on:
push:
branches:
- release/4.3
pull_request:
jobs:
lint:
name: Pre-Commit
runs-on: utilities
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Install pre-commit CI
id: install
run: |
python3 -m venv ~/pre-commit
~/pre-commit/bin/pip install --upgrade pip setuptools
~/pre-commit/bin/pip install pre-commit
echo "envhash=$({ python3 --version && cat .forgejo/pre-commit/config.yaml; } | sha256sum | cut -d' ' -f1)" >> $FORGEJO_OUTPUT
- name: Cache
uses: actions/cache@v4
with:
path: ~/.cache/pre-commit
key: pre-commit-${{ steps.install.outputs.envhash }}
- name: Run pre-commit CI
run: ~/pre-commit/bin/pre-commit run -c .forgejo/pre-commit/config.yaml --show-diff-on-failure --color=always --all-files
-80
View File
@@ -1,80 +0,0 @@
name: Test
on:
push:
branches:
- release/4.3
pull_request:
jobs:
run_fate:
name: Fate (${{ matrix.runner }}, ${{ matrix.shared }}, ${{ matrix.bits }} bit)
strategy:
fail-fast: false
matrix:
runner: [linux-aarch64]
shared: ['static']
bits: ['64']
include:
- runner: linux-amd64
shared: 'static'
bits: '32'
- runner: linux-amd64
shared: 'shared'
bits: '64'
runs-on: ${{ matrix.runner }}
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Configure
run: |
./configure --enable-gpl --enable-nonfree --enable-memory-poisoning --assert-level=2 \
$([ "${{ matrix.bits }}" != "32" ] || echo --arch=x86_32 --extra-cflags=-m32 --extra-cxxflags=-m32 --extra-ldflags=-m32) \
$([ "${{ matrix.shared }}" != "shared" ] || echo --enable-shared --disable-static) \
|| CFGRES=$? && CFGRES=$?
cat ffbuild/config.log
exit $CFGRES
- name: Build
run: make -j$(nproc)
- name: Restore Cached Fate-Suite
id: cache
uses: actions/cache/restore@v4
with:
path: fate-suite
key: fate-suite
restore-keys: |
fate-suite-
- name: Sync Fate-Suite
id: fate
run: |
make fate-rsync SAMPLES=$PWD/fate-suite
echo "hash=$(find fate-suite -type f -printf "%P %s %T@\n" | sort | sha256sum | cut -d' ' -f1)" >> $FORGEJO_OUTPUT
- name: Cache Fate-Suite
uses: actions/cache/save@v4
if: ${{ format('fate-suite-{0}', steps.fate.outputs.hash) != steps.cache.outputs.cache-matched-key }}
with:
path: fate-suite
key: fate-suite-${{ steps.fate.outputs.hash }}
- name: Run Fate
run: LD_LIBRARY_PATH="$(printf "%s:" "$PWD"/lib*)$PWD" make fate fate-build SAMPLES=$PWD/fate-suite -j$(nproc)
compile_only:
name: Fate (Win64, Build-Only)
strategy:
fail-fast: false
matrix:
image: ["ghcr.io/btbn/ffmpeg-builds/win64-gpl-4.3:latest"]
runs-on: linux-amd64
container: ${{ matrix.image }}
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Configure
run: |
./configure --pkg-config-flags="--static" $FFBUILD_TARGET_FLAGS $FF_CONFIGURE \
--cc="$CC" --cxx="$CXX" --ar="$AR" --ranlib="$RANLIB" --nm="$NM" \
--extra-cflags="$FF_CFLAGS" --extra-cxxflags="$FF_CXXFLAGS" \
--extra-libs="$FF_LIBS" --extra-ldflags="$FF_LDFLAGS" --extra-ldexeflags="$FF_LDEXEFLAGS"
- name: Build
run: make -j$(nproc)
- name: Run Fate
run: make -j$(nproc) fate-build
+9 -9
View File
@@ -55,7 +55,7 @@ modified by someone else and passed on, the recipients should know
that what they have is not the original version, so that the original
author's reputation will not be affected by problems that might be
introduced by others.
Finally, software patents pose a constant threat to the existence of
any free program. We wish to make sure that a company cannot
effectively restrict the users of a free program by obtaining a
@@ -111,7 +111,7 @@ modification follow. Pay close attention to the difference between a
"work based on the library" and a "work that uses the library". The
former contains code derived from the library, whereas the latter must
be combined with the library in order to run.
GNU LESSER GENERAL PUBLIC LICENSE
TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
@@ -158,7 +158,7 @@ Library.
You may charge a fee for the physical act of transferring a copy,
and you may at your option offer warranty protection in exchange for a
fee.
2. You may modify your copy or copies of the Library or any portion
of it, thus forming a work based on the Library, and copy and
distribute such modifications or work under the terms of Section 1
@@ -216,7 +216,7 @@ instead of to this License. (If a newer version than version 2 of the
ordinary GNU General Public License has appeared, then you can specify
that version instead if you wish.) Do not make any other change in
these notices.
Once this change is made in a given copy, it is irreversible for
that copy, so the ordinary GNU General Public License applies to all
subsequent copies and derivative works made from that copy.
@@ -267,7 +267,7 @@ Library will still fall under Section 6.)
distribute the object code for the work under the terms of Section 6.
Any executables containing that work also fall under Section 6,
whether or not they are linked directly with the Library itself.
6. As an exception to the Sections above, you may also combine or
link a "work that uses the Library" with the Library to produce a
work containing portions of the Library, and distribute that work
@@ -329,7 +329,7 @@ restrictions of other proprietary libraries that do not normally
accompany the operating system. Such a contradiction means you cannot
use both them and the Library together in an executable that you
distribute.
7. You may place library facilities that are a work based on the
Library side-by-side in a single library together with other library
facilities not covered by this License, and distribute such a combined
@@ -370,7 +370,7 @@ subject to these terms and conditions. You may not impose any further
restrictions on the recipients' exercise of the rights granted herein.
You are not responsible for enforcing compliance by third parties with
this License.
11. If, as a consequence of a court judgment or allegation of patent
infringement or for any other reason (not limited to patent issues),
conditions are imposed on you (whether by court order, agreement or
@@ -422,7 +422,7 @@ conditions either of that version or of any later version published by
the Free Software Foundation. If the Library does not specify a
license version number, you may choose any version ever published by
the Free Software Foundation.
14. If you wish to incorporate parts of the Library into other free
programs whose distribution conditions are incompatible with these,
write to the author to ask for permission. For software which is
@@ -456,7 +456,7 @@ SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
DAMAGES.
END OF TERMS AND CONDITIONS
How to Apply These Terms to Your New Libraries
If you develop a new library, and you want it to be of the greatest
+2 -2
View File
@@ -1,6 +1,6 @@
See the Git history of the project (https://git.ffmpeg.org/ffmpeg) to
See the Git history of the project (git://source.ffmpeg.org/ffmpeg) to
get the names of people who have contributed to FFmpeg.
To check the log, you can type the command "git log" in the FFmpeg
source directory, or browse the online repository at
https://git.ffmpeg.org/ffmpeg
http://source.ffmpeg.org.
-1213
View File
File diff suppressed because it is too large Load Diff
-8
View File
@@ -15,11 +15,3 @@ NOTICE
------
- Non system dependencies (e.g. libx264, libvpx) are disabled by default.
NOTICE for Package Maintainers
------------------------------
- It is recommended to build FFmpeg twice, first with minimal external dependencies so
that 3rd party packages, which depend on FFmpegs libavutil/libavfilter/libavcodec/libavformat
can then be built. And last build FFmpeg with full dependancies (which may in turn depend on
some of these 3rd party packages). This avoids circular dependencies during build.
+3 -6
View File
@@ -577,12 +577,10 @@ wm4
Releases
========
7.0 Michael Niedermayer
6.1 Michael Niedermayer
5.1 Michael Niedermayer
4.4 Michael Niedermayer
3.4 Michael Niedermayer
2.8 Michael Niedermayer
2.7 Michael Niedermayer
2.6 Michael Niedermayer
2.5 Michael Niedermayer
If you want to maintain an older release, please contact us
@@ -612,7 +610,6 @@ Loren Merritt ABD9 08F4 C920 3F65 D8BE 35D7 1540 DAA7 060F 56DE
Lou Logan (llogan) 7D68 DC73 CBEF EABB 671A B6CF 621C 2E28 82F8 DC3A
Lynne FE50 139C 6805 72CA FD52 1F8D A2FE A5F0 3F03 4464
Michael Niedermayer 9FF2 128B 147E F673 0BAD F133 611E C787 040B 0FAB
DD1E C9E8 DE08 5C62 9B3E 1846 B18E 8928 B394 8D64
Nicolas George 24CE 01CE 9ACC 5CEB 74D8 8D9D B063 D997 36E5 4C93
Nikolay Aleksandrov 8978 1D8C FB71 588E 4B27 EAA8 C4F0 B5FC E011 13B1
Panagiotis Issaris 6571 13A3 33D9 3726 F728 AA98 F643 B12E ECF3 E029
+1 -1
View File
@@ -1 +1 @@
4.3.9
4.2.git
-15
View File
@@ -1,15 +0,0 @@
┌────────────────────────────────────┐
│ RELEASE NOTES for FFmpeg 4.3 "4:3" │
└────────────────────────────────────┘
The FFmpeg Project proudly presents FFmpeg 4.3 "4:3", about 10
months after the release of FFmpeg 4.2.
A complete Changelog is available at the root of the project, and the
complete Git history on https://git.ffmpeg.org/gitweb/ffmpeg.git
We hope you will like this release as much as we enjoyed working on it, and
as usual, if you have any questions about it, or any FFmpeg related topic,
feel free to join us on the #ffmpeg IRC channel (on irc.libera.chat) or ask
on the mailing-lists.
Vendored
+9 -14
View File
@@ -532,7 +532,7 @@ die(){
If you think configure made a mistake, make sure you are using the latest
version from Git. If the latest version fails, report the problem to the
ffmpeg-user@ffmpeg.org mailing list or IRC #ffmpeg on irc.libera.chat.
ffmpeg-user@ffmpeg.org mailing list or IRC #ffmpeg on irc.freenode.net.
EOF
if disabled logging; then
cat <<EOF
@@ -2330,7 +2330,6 @@ HAVE_LIST="
opencl_vaapi_intel_media
perl
pod2man
posix_ioctl
texi2html
"
@@ -3237,7 +3236,7 @@ librav1e_encoder_deps="librav1e"
librav1e_encoder_select="extract_extradata_bsf"
librsvg_decoder_deps="librsvg"
libshine_encoder_deps="libshine"
libshine_encoder_select="audio_frame_queue mpegaudioheader"
libshine_encoder_select="audio_frame_queue"
libspeex_decoder_deps="libspeex"
libspeex_encoder_deps="libspeex"
libspeex_encoder_select="audio_frame_queue"
@@ -5331,7 +5330,6 @@ case $target_os in
;;
netbsd)
disable symver
enable section_data_rel_ro
oss_indev_extralibs="-lossaudio"
oss_outdev_extralibs="-lossaudio"
enabled gcc || check_ldflags -Wl,-zmuldefs
@@ -5350,7 +5348,6 @@ case $target_os in
disable symver
;;
freebsd)
enable section_data_rel_ro
;;
bsd/os)
add_extralibs -lpoll -lgnugetopt
@@ -6496,7 +6493,7 @@ fi
if enabled sdl2; then
SDL2_CONFIG="${cross_prefix}sdl2-config"
test_pkg_config sdl2 "sdl2 >= 2.0.1 sdl2 < 3.0.0" SDL_events.h SDL_PollEvent
test_pkg_config sdl2 "sdl2 >= 2.0.1 sdl2 < 2.1.0" SDL_events.h SDL_PollEvent
if disabled sdl2 && "${SDL2_CONFIG}" --version > /dev/null 2>&1; then
sdl2_cflags=$("${SDL2_CONFIG}" --cflags)
sdl2_extralibs=$("${SDL2_CONFIG}" --libs)
@@ -6542,13 +6539,11 @@ perl -v > /dev/null 2>&1 && enable perl || disable perl
pod2man --help > /dev/null 2>&1 && enable pod2man || disable pod2man
rsync --help 2> /dev/null | grep -q 'contimeout' && enable rsync_contimeout || disable rsync_contimeout
check_headers linux/fb.h
check_headers linux/videodev2.h
test_code cc linux/videodev2.h "struct v4l2_frmsizeenum vfse; vfse.discrete.width = 0;" && enable_sanitized struct_v4l2_frmivalenum_discrete
test_code cc sys/ioctl.h "int ioctl(int, int, ...)" && enable posix_ioctl
# check V4L2 codecs available in the API
if enabled v4l2_m2m; then
check_headers linux/fb.h
check_headers linux/videodev2.h
test_code cc linux/videodev2.h "struct v4l2_frmsizeenum vfse; vfse.discrete.width = 0;" && enable_sanitized struct_v4l2_frmivalenum_discrete
check_cc v4l2_m2m linux/videodev2.h "int i = V4L2_CAP_VIDEO_M2M_MPLANE | V4L2_CAP_VIDEO_M2M | V4L2_BUF_FLAG_LAST;"
check_cc vc1_v4l2_m2m linux/videodev2.h "int i = V4L2_PIX_FMT_VC1_ANNEX_G;"
check_cc mpeg1_v4l2_m2m linux/videodev2.h "int i = V4L2_PIX_FMT_MPEG1;"
@@ -6593,7 +6588,7 @@ enabled alsa && { check_pkg_config alsa alsa "alsa/asoundlib.h" snd_pcm_htimesta
enabled libjack &&
require_pkg_config libjack jack jack/jack.h jack_port_get_latency_range
enabled sndio && check_pkg_config sndio sndio sndio.h sio_open
enabled sndio && check_lib sndio sndio.h sio_open -lsndio
if enabled libcdio; then
check_pkg_config libcdio libcdio_paranoia "cdio/cdda.h cdio/paranoia.h" cdio_cddap_open ||
@@ -6690,7 +6685,7 @@ enabled vulkan &&
if enabled x86; then
case $target_os in
freebsd|mingw32*|mingw64*|win32|win64|linux|cygwin*)
mingw32*|mingw64*|win32|win64|linux|cygwin*)
;;
*)
disable ffnvcodec cuvid nvdec nvenc
@@ -7518,7 +7513,7 @@ cat > $TMPH <<EOF
#define FFMPEG_CONFIG_H
#define FFMPEG_CONFIGURATION "$(c_escape $FFMPEG_CONFIGURATION)"
#define FFMPEG_LICENSE "$(c_escape $license)"
#define CONFIG_THIS_YEAR 2025
#define CONFIG_THIS_YEAR 2020
#define FFMPEG_DATADIR "$(eval c_escape $datadir)"
#define AVCONV_DATADIR "$(eval c_escape $datadir)"
#define CC_IDENT "$(c_escape ${cc_ident:-Unknown compiler})"
+1 -1
View File
@@ -38,7 +38,7 @@ PROJECT_NAME = FFmpeg
# could be handy for archiving the generated documentation or if some version
# control system is used.
PROJECT_NUMBER = 4.3.9
PROJECT_NUMBER =
# Using the PROJECT_BRIEF tag one can provide an optional one line description
# for a project that appears at the top of each page and should give viewer a
+2 -2
View File
@@ -3,9 +3,9 @@
The FFmpeg developers.
For details about the authorship, see the Git history of the project
(https://git.ffmpeg.org/ffmpeg), e.g. by typing the command
(git://source.ffmpeg.org/ffmpeg), e.g. by typing the command
@command{git log} in the FFmpeg source directory, or browsing the
online repository at @url{https://git.ffmpeg.org/ffmpeg}.
online repository at @url{http://source.ffmpeg.org}.
Maintainers for the specific components are listed in the file
@file{MAINTAINERS} in the source code tree.
+1 -1
View File
File diff suppressed because one or more lines are too long
+1
View File
@@ -63,3 +63,4 @@ make -j<num>
make -k
Continue build in case of errors, this is useful for the regression tests
sometimes but note that it will still not run all reg tests.
+1 -1
View File
@@ -317,7 +317,7 @@ list are dropped. You may use the special @code{*} string to match all pages,
or @code{subtitle} to match all subtitle pages.
Default value is *.
@item txt_default_region
Set default character set used for decoding, a value between 0 and 87 (see
Set default G0 character set used for decoding, a value between 0 and 80 (see
ETS 300 706, Section 15, Table 32). Default value is -1, which does not
override the libzvbi default. This option is needed for some legacy level 1.0
transmissions which cannot signal the proper charset.
-7
View File
@@ -327,13 +327,6 @@ segment index to start live streams at (negative values are from the end).
@item allowed_extensions
',' separated list of file extensions that hls is allowed to access.
@item extension_picky
This blocks disallowed extensions from probing
It also requires all available segments to have matching extensions to the format
except mpegts, which is always allowed.
It is recommended to set the whitelists correctly instead of depending on extensions
Enabled by default.
@item max_reload
Maximum number of times a insufficient list is attempted to be reloaded.
Default value is 1000.
-19
View File
@@ -762,25 +762,6 @@ In case you need finer control over how valgrind is invoked, use the
@code{--target-exec='valgrind <your_custom_valgrind_options>} option in
your configure line instead.
@anchor{Maintenance}
@chapter Maintenance process
@anchor{MAINTAINERS}
@section MAINTAINERS
The developers maintaining each part of the codebase are listed in @file{MAINTAINERS}.
Being listed in @file{MAINTAINERS}, gives one the right to have git write access to
the specific repository.
@anchor{Becoming a maintainer}
@section Becoming a maintainer
People add themselves to @file{MAINTAINERS} by sending a patch like any other code
change. These get reviewed by the community like any other patch. It is expected
that, if someone has an objection to a new maintainer, she is willing to object
in public with her full name and is willing to take over maintainership for the area.
@anchor{Release process}
@chapter Release process
+3 -1
View File
@@ -137,9 +137,11 @@ static int decode_packet(AVCodecContext *dec, const AVPacket *pkt)
ret = output_audio_frame(frame);
av_frame_unref(frame);
if (ret < 0)
return ret;
}
return ret;
return 0;
}
static int open_codec_context(int *stream_idx,
+3 -1
View File
@@ -221,8 +221,10 @@ static int dec_enc(AVPacket *pkt, AVCodec *enc_codec)
fail:
av_frame_free(&frame);
if (ret < 0)
return ret;
}
return ret;
return 0;
}
int main(int argc, char **argv)
+2 -22
View File
@@ -53,7 +53,7 @@ Most distribution and operating system provide a package for it.
@section Cloning the source tree
@example
git clone https://git.ffmpeg.org/ffmpeg.git <target>
git clone git://source.ffmpeg.org/ffmpeg <target>
@end example
This will put the FFmpeg sources into the directory @var{<target>}.
@@ -187,18 +187,11 @@ to make sure you don't have untracked files or deletions.
git add [-i|-p|-A] <filenames/dirnames>
@end example
Make sure you have told Git your name, email address and GPG key
Make sure you have told Git your name and email address
@example
git config --global user.name "My Name"
git config --global user.email my@@email.invalid
git config --global user.signingkey ABCDEF0123245
@end example
Enable signing all commits or use -S
@example
git config --global commit.gpgsign true
@end example
Use @option{--global} to set the global configuration for all your Git checkouts.
@@ -400,19 +393,6 @@ git checkout -b svn_23456 $SHA1
where @var{$SHA1} is the commit hash from the @command{git log} output.
@chapter gpg key generation
If you have no gpg key yet, we recommend that you create a ed25519 based key as it
is small, fast and secure. Especially it results in small signatures in git.
@example
gpg --default-new-key-algo "ed25519/cert,sign+cv25519/encr" --quick-generate-key "human@@server.com"
@end example
When generating a key, make sure the email specified matches the email used in git as some sites like
github consider mismatches a reason to declare such commits unverified. After generating a key you
can add it to the MAINTAINER file and upload it to a keyserver.
@chapter Pre-push checklist
Once you have a set of commits that you feel are ready for pushing,
+1
View File
@@ -157,3 +157,4 @@ PFD[32] would for example be signed 32 bit little-endian IEEE float
@item XVID @tab non-compliant MPEG-4 generated by old Xvid
@item XVIX @tab non-compliant MPEG-4 generated by old Xvid with interlacing bug
@end multitable
+19 -103
View File
@@ -20,45 +20,8 @@
# License along with FFmpeg; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
# Texinfo 7.0 changed the syntax of various functions.
# Provide a shim for older versions.
sub ff_set_from_init_file($$) {
my $key = shift;
my $value = shift;
if (exists &{'texinfo_set_from_init_file'}) {
texinfo_set_from_init_file($key, $value);
} else {
set_from_init_file($key, $value);
}
}
sub ff_get_conf($) {
my $key = shift;
if (exists &{'texinfo_get_conf'}) {
texinfo_get_conf($key);
} else {
get_conf($key);
}
}
sub get_formatting_function($$) {
my $obj = shift;
my $func = shift;
my $sub = $obj->can('formatting_function');
if ($sub) {
return $obj->formatting_function($func);
} else {
return $obj->{$func};
}
}
# determine texinfo version
my $program_version_num = version->declare(ff_get_conf('PACKAGE_VERSION'))->numify;
my $program_version_6_8 = $program_version_num >= 6.008000;
# no navigation elements
ff_set_from_init_file('HEADERS', 0);
set_from_init_file('HEADERS', 0);
sub ffmpeg_heading_command($$$$$)
{
@@ -92,7 +55,7 @@ sub ffmpeg_heading_command($$$$$)
$element = $command->{'parent'};
}
if ($element) {
$result .= &{get_formatting_function($self, 'format_element_header')}($self, $cmdname,
$result .= &{$self->{'format_element_header'}}($self, $cmdname,
$command, $element);
}
@@ -149,11 +112,7 @@ sub ffmpeg_heading_command($$$$$)
$cmdname
= $Texinfo::Common::level_to_structuring_command{$cmdname}->[$heading_level];
}
# format_heading_text expects an array of headings for texinfo >= 7.0
if ($program_version_num >= 7.000000) {
$heading = [$heading];
}
$result .= &{get_formatting_function($self,'format_heading_text')}(
$result .= &{$self->{'format_heading_text'}}(
$self, $cmdname, $heading,
$heading_level +
$self->get_conf('CHAPTER_HEADER_LEVEL') - 1, $command);
@@ -168,18 +127,14 @@ foreach my $command (keys(%Texinfo::Common::sectioning_commands), 'node') {
}
# print the TOC where @contents is used
if ($program_version_6_8) {
ff_set_from_init_file('CONTENTS_OUTPUT_LOCATION', 'inline');
} else {
ff_set_from_init_file('INLINE_CONTENTS', 1);
}
set_from_init_file('INLINE_CONTENTS', 1);
# make chapters <h2>
ff_set_from_init_file('CHAPTER_HEADER_LEVEL', 2);
set_from_init_file('CHAPTER_HEADER_LEVEL', 2);
# Do not add <hr>
ff_set_from_init_file('DEFAULT_RULE', '');
ff_set_from_init_file('BIG_RULE', '');
set_from_init_file('DEFAULT_RULE', '');
set_from_init_file('BIG_RULE', '');
# Customized file beginning
sub ffmpeg_begin_file($$$)
@@ -196,18 +151,7 @@ sub ffmpeg_begin_file($$$)
my ($title, $description, $encoding, $date, $css_lines,
$doctype, $bodytext, $copying_comment, $after_body_open,
$extra_head, $program_and_version, $program_homepage,
$program, $generator);
if ($program_version_num >= 7.000000) {
($title, $description, $encoding, $date, $css_lines,
$doctype, $bodytext, $copying_comment, $after_body_open,
$extra_head, $program_and_version, $program_homepage,
$program, $generator) = $self->_file_header_information($command);
} else {
($title, $description, $encoding, $date, $css_lines,
$doctype, $bodytext, $copying_comment, $after_body_open,
$extra_head, $program_and_version, $program_homepage,
$program, $generator) = $self->_file_header_informations($command);
}
$program, $generator) = $self->_file_header_informations($command);
my $links = $self->_get_links ($filename, $element);
@@ -240,11 +184,7 @@ EOT
return $head1 . $head_title . $head2 . $head_title . $head3;
}
if ($program_version_6_8) {
texinfo_register_formatting_function('format_begin_file', \&ffmpeg_begin_file);
} else {
texinfo_register_formatting_function('begin_file', \&ffmpeg_begin_file);
}
texinfo_register_formatting_function('begin_file', \&ffmpeg_begin_file);
sub ffmpeg_program_string($)
{
@@ -261,17 +201,13 @@ sub ffmpeg_program_string($)
$self->gdt('This document was generated automatically.'));
}
}
if ($program_version_6_8) {
texinfo_register_formatting_function('format_program_string', \&ffmpeg_program_string);
} else {
texinfo_register_formatting_function('program_string', \&ffmpeg_program_string);
}
texinfo_register_formatting_function('program_string', \&ffmpeg_program_string);
# Customized file ending
sub ffmpeg_end_file($)
{
my $self = shift;
my $program_string = &{get_formatting_function($self,'format_program_string')}($self);
my $program_string = &{$self->{'format_program_string'}}($self);
my $program_text = <<EOT;
<p style="font-size: small;">
$program_string
@@ -284,15 +220,11 @@ EOT
EOT
return $program_text . $footer;
}
if ($program_version_6_8) {
texinfo_register_formatting_function('format_end_file', \&ffmpeg_end_file);
} else {
texinfo_register_formatting_function('end_file', \&ffmpeg_end_file);
}
texinfo_register_formatting_function('end_file', \&ffmpeg_end_file);
# Dummy title command
# Ignore title. Title is handled through ffmpeg_begin_file().
ff_set_from_init_file('USE_TITLEPAGE_FOR_TITLE', 1);
set_from_init_file('USE_TITLEPAGE_FOR_TITLE', 1);
sub ffmpeg_title($$$$)
{
return '';
@@ -310,14 +242,8 @@ sub ffmpeg_float($$$$$)
my $args = shift;
my $content = shift;
my ($caption, $prepended);
if ($program_version_num >= 7.000000) {
($caption, $prepended) = Texinfo::Convert::Converter::float_name_caption($self,
$command);
} else {
($caption, $prepended) = Texinfo::Common::float_name_caption($self,
$command);
}
my ($caption, $prepended) = Texinfo::Common::float_name_caption($self,
$command);
my $caption_text = '';
my $prepended_text;
my $prepended_save = '';
@@ -389,13 +315,8 @@ sub ffmpeg_float($$$$$)
$caption->{'args'}->[0], 'float caption');
}
if ($prepended_text.$caption_text ne '') {
if ($program_version_num >= 7.000000) {
$prepended_text = $self->html_attribute_class('div',['float-caption']). '>'
. $prepended_text;
} else {
$prepended_text = $self->_attribute_class('div','float-caption'). '>'
. $prepended_text;
}
$prepended_text = $self->_attribute_class('div','float-caption'). '>'
. $prepended_text;
$caption_text .= '</div>';
}
my $html_class = '';
@@ -408,13 +329,8 @@ sub ffmpeg_float($$$$$)
$prepended_text = '';
$caption_text = '';
}
if ($program_version_num >= 7.000000) {
return $self->html_attribute_class('div', [$html_class]). '>' . "\n" .
$prepended_text . $caption_text . $content . '</div>';
} else {
return $self->_attribute_class('div', $html_class). '>' . "\n" .
$prepended_text . $caption_text . $content . '</div>';
}
return $self->_attribute_class('div', $html_class). '>' . "\n" .
$prepended_text . $caption_text . $content . '</div>';
}
texinfo_register_command_formatting('float',
Executable → Regular
View File
Executable → Regular
View File
+1
View File
@@ -44,3 +44,4 @@ a+b*c;
here the reader knows that a,b,c are meant to be signed integers but for C
standard compliance / to avoid undefined behavior they are stored in unsigned
ints.
+1 -1
View File
@@ -418,4 +418,4 @@ done:
When all of this is done, you can submit your patch to the ffmpeg-devel
mailing-list for review. If you need any help, feel free to come on our IRC
channel, #ffmpeg-devel on irc.libera.chat.
channel, #ffmpeg-devel on irc.freenode.net.
-2
View File
@@ -1,5 +1,3 @@
#!/bin/sh
toupper(){
echo "$@" | tr abcdefghijklmnopqrstuvwxyz ABCDEFGHIJKLMNOPQRSTUVWXYZ
}
+1 -1
View File
@@ -538,7 +538,7 @@ static const AVOption *opt_find(void *obj, const char *name, const char *unit,
return o;
}
#define FLAGS ((o->type == AV_OPT_TYPE_FLAGS && (arg[0]=='-' || arg[0]=='+')) ? AV_DICT_APPEND : 0)
#define FLAGS (o->type == AV_OPT_TYPE_FLAGS && (arg[0]=='-' || arg[0]=='+')) ? AV_DICT_APPEND : 0
int opt_default(void *optctx, const char *opt, const char *arg)
{
const AVOption *o;
+2 -4
View File
@@ -468,9 +468,8 @@ static int read_key(void)
}
//Read it
if(nchars != 0) {
if (read(0, &ch, 1) == 1)
return ch;
return 0;
read(0, &ch, 1);
return ch;
}else{
return -1;
}
@@ -529,7 +528,6 @@ static void ffmpeg_cleanup(int ret)
for (j = 0; j < fg->nb_outputs; j++) {
OutputFilter *ofilter = fg->outputs[j];
avfilter_inout_free(&ofilter->out_tmp);
av_freep(&ofilter->name);
av_freep(&ofilter->formats);
av_freep(&ofilter->channel_layouts);
+2 -2
View File
@@ -1151,8 +1151,6 @@ static void video_audio_display(VideoState *s)
if (realloc_texture(&s->vis_texture, SDL_PIXELFORMAT_ARGB8888, s->width, s->height, SDL_BLENDMODE_NONE, 1) < 0)
return;
if (s->xpos >= s->width)
s->xpos = 0;
nb_display_channels= FFMIN(nb_display_channels, 2);
if (rdft_bits != s->rdft_bits) {
av_rdft_end(s->rdft);
@@ -1202,6 +1200,8 @@ static void video_audio_display(VideoState *s)
}
if (!s->paused)
s->xpos++;
if (s->xpos >= s->width)
s->xpos= s->xleft;
}
}
+2 -2
View File
@@ -131,8 +131,8 @@ static int zero12v_decode_frame(AVCodecContext *avctx, void *data,
u = x/2 + (uint16_t *)(pic->data[1] + line * pic->linesize[1]);
v = x/2 + (uint16_t *)(pic->data[2] + line * pic->linesize[2]);
memcpy(y, y_temp, sizeof(*y) * (width - x));
memcpy(u, u_temp, sizeof(*u) * ((width - x + 1) / 2));
memcpy(v, v_temp, sizeof(*v) * ((width - x + 1) / 2));
memcpy(u, u_temp, sizeof(*u) * (width - x + 1) / 2);
memcpy(v, v_temp, sizeof(*v) * (width - x + 1) / 2);
}
line_end += stride;
+2 -4
View File
@@ -498,8 +498,8 @@ static int decode_i_block(FourXContext *f, int16_t *block)
{
int code, i, j, level, val;
if (get_bits_left(&f->pre_gb) < 2) {
av_log(f->avctx, AV_LOG_ERROR, "%d bits left before decode_i_block()\n", get_bits_left(&f->pre_gb));
if (get_bits_left(&f->gb) < 2){
av_log(f->avctx, AV_LOG_ERROR, "%d bits left before decode_i_block()\n", get_bits_left(&f->gb));
return AVERROR_INVALIDDATA;
}
@@ -885,8 +885,6 @@ static int decode_frame(AVCodecContext *avctx, void *data,
}
if (i >= CFRAME_BUFFER_COUNT) {
if (free_index < 0)
return AVERROR_INVALIDDATA;
i = free_index;
f->cfrm[i].id = id;
}
-3
View File
@@ -70,9 +70,6 @@ static int decode_frame(AVCodecContext *avctx, void *data,
unsigned char *planemap = c->planemap;
int ret;
if (buf_size < planes * height *2)
return AVERROR_INVALIDDATA;
if ((ret = ff_get_buffer(avctx, frame, 0)) < 0)
return ret;
+1 -1
View File
@@ -1174,7 +1174,7 @@ SKIPHEADERS-$(CONFIG_QSV) += qsv.h qsv_internal.h
SKIPHEADERS-$(CONFIG_QSVDEC) += qsvdec.h
SKIPHEADERS-$(CONFIG_QSVENC) += qsvenc.h
SKIPHEADERS-$(CONFIG_XVMC) += xvmc.h
SKIPHEADERS-$(CONFIG_VAAPI) += vaapi_decode.h vaapi_hevc.h vaapi_encode.h
SKIPHEADERS-$(CONFIG_VAAPI) += vaapi_decode.h vaapi_encode.h
SKIPHEADERS-$(CONFIG_VDPAU) += vdpau.h vdpau_internal.h
SKIPHEADERS-$(CONFIG_VIDEOTOOLBOX) += videotoolbox.h vt_internal.h
SKIPHEADERS-$(CONFIG_V4L2_M2M) += v4l2_buffers.h v4l2_context.h v4l2_m2m.h
-2
View File
@@ -407,7 +407,6 @@ AVCodec ff_a64multi_encoder = {
.close = a64multi_close_encoder,
.pix_fmts = (const enum AVPixelFormat[]) {AV_PIX_FMT_GRAY8, AV_PIX_FMT_NONE},
.capabilities = AV_CODEC_CAP_DELAY,
.caps_internal = FF_CODEC_CAP_INIT_CLEANUP,
};
#endif
#if CONFIG_A64MULTI5_ENCODER
@@ -422,6 +421,5 @@ AVCodec ff_a64multi5_encoder = {
.close = a64multi_close_encoder,
.pix_fmts = (const enum AVPixelFormat[]) {AV_PIX_FMT_GRAY8, AV_PIX_FMT_NONE},
.capabilities = AV_CODEC_CAP_DELAY,
.caps_internal = FF_CODEC_CAP_INIT_CLEANUP,
};
#endif
+4 -4
View File
@@ -843,25 +843,25 @@ static void search_for_ms(AACEncContext *s, ChannelElement *cpe)
sce0->ics.swb_sizes[g],
sce0->sf_idx[w*16+g],
sce0->band_type[w*16+g],
lambda / (band0->threshold + FLT_MIN), INFINITY, &b1, NULL, 0);
lambda / band0->threshold, INFINITY, &b1, NULL, 0);
dist1 += quantize_band_cost(s, &sce1->coeffs[start + (w+w2)*128],
R34,
sce1->ics.swb_sizes[g],
sce1->sf_idx[w*16+g],
sce1->band_type[w*16+g],
lambda / (band1->threshold + FLT_MIN), INFINITY, &b2, NULL, 0);
lambda / band1->threshold, INFINITY, &b2, NULL, 0);
dist2 += quantize_band_cost(s, M,
M34,
sce0->ics.swb_sizes[g],
mididx,
midcb,
lambda / (minthr + FLT_MIN), INFINITY, &b3, NULL, 0);
lambda / minthr, INFINITY, &b3, NULL, 0);
dist2 += quantize_band_cost(s, S,
S34,
sce1->ics.swb_sizes[g],
sididx,
sidcb,
mslambda / (minthr * bmax + FLT_MIN), INFINITY, &b4, NULL, 0);
mslambda / (minthr * bmax), INFINITY, &b4, NULL, 0);
B0 += b1+b2;
B1 += b3+b4;
dist1 -= b1+b2;
+2 -2
View File
@@ -155,9 +155,9 @@ static void vector_pow43(int *coefs, int len)
for (i=0; i<len; i++) {
coef = coefs[i];
if (coef < 0)
coef = -(int)ff_cbrt_tab_fixed[(-coef) & 8191];
coef = -(int)ff_cbrt_tab_fixed[-coef];
else
coef = (int)ff_cbrt_tab_fixed[ coef & 8191];
coef = (int)ff_cbrt_tab_fixed[coef];
coefs[i] = coef;
}
}
+2 -7
View File
@@ -974,18 +974,14 @@ static int decode_audio_specific_config_gb(AACContext *ac,
{
int i, ret;
GetBitContext gbc = *gb;
MPEG4AudioConfig m4ac_bak = *m4ac;
if ((i = ff_mpeg4audio_get_config_gb(m4ac, &gbc, sync_extension, avctx)) < 0) {
*m4ac = m4ac_bak;
if ((i = ff_mpeg4audio_get_config_gb(m4ac, &gbc, sync_extension, avctx)) < 0)
return AVERROR_INVALIDDATA;
}
if (m4ac->sampling_index > 12) {
av_log(avctx, AV_LOG_ERROR,
"invalid sampling rate index %d\n",
m4ac->sampling_index);
*m4ac = m4ac_bak;
return AVERROR_INVALIDDATA;
}
if (m4ac->object_type == AOT_ER_AAC_LD &&
@@ -993,7 +989,6 @@ static int decode_audio_specific_config_gb(AACContext *ac,
av_log(avctx, AV_LOG_ERROR,
"invalid low delay sampling rate index %d\n",
m4ac->sampling_index);
*m4ac = m4ac_bak;
return AVERROR_INVALIDDATA;
}
@@ -2812,7 +2807,7 @@ static void imdct_and_windowing_ld(AACContext *ac, SingleChannelElement *sce)
static void imdct_and_windowing_eld(AACContext *ac, SingleChannelElement *sce)
{
UINTFLOAT *in = sce->coeffs;
INTFLOAT *in = sce->coeffs;
INTFLOAT *out = sce->ret;
INTFLOAT *saved = sce->saved;
INTFLOAT *buf = ac->buf_mdct;
+2 -3
View File
@@ -28,7 +28,6 @@
* TODOs:
* add sane pulse detection
***********************************/
#include <float.h>
#include "libavutil/libm.h"
#include "libavutil/thread.h"
@@ -857,7 +856,7 @@ static int aac_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
/* Not so fast though */
ratio = sqrtf(ratio);
}
s->lambda = av_clipf(s->lambda * ratio, FLT_EPSILON, 65536.f);
s->lambda = FFMIN(s->lambda * ratio, 65536.f);
/* Keep iterating if we must reduce and lambda is in the sky */
if (ratio > 0.9f && ratio < 1.1f) {
@@ -902,7 +901,7 @@ static av_cold int aac_encode_end(AVCodecContext *avctx)
{
AACEncContext *s = avctx->priv_data;
av_log(avctx, AV_LOG_INFO, "Qavg: %.3f\n", s->lambda_count ? s->lambda_sum / s->lambda_count : NAN);
av_log(avctx, AV_LOG_INFO, "Qavg: %.3f\n", s->lambda_sum / s->lambda_count);
ff_mdct_end(&s->mdct1024);
ff_mdct_end(&s->mdct128);
+13 -27
View File
@@ -173,7 +173,6 @@ void ff_aac_search_for_tns(AACEncContext *s, SingleChannelElement *sce)
sce->ics.window_sequence[0] == LONG_START_SEQUENCE ? 0 : 2;
const int sfb_len = sfb_end - sfb_start;
const int coef_len = sce->ics.swb_offset[sfb_end] - sce->ics.swb_offset[sfb_start];
const int n_filt = is8 ? 1 : order != TNS_MAX_ORDER ? 2 : 3;
if (coef_len <= 0 || sfb_len <= 0) {
sce->tns.present = 0;
@@ -181,30 +180,16 @@ void ff_aac_search_for_tns(AACEncContext *s, SingleChannelElement *sce)
}
for (w = 0; w < sce->ics.num_windows; w++) {
float en[4] = {0.0f, 0.0f, 0.0f, 0.0f};
int oc_start = 0;
float en[2] = {0.0f, 0.0f};
int oc_start = 0, os_start = 0;
int coef_start = sce->ics.swb_offset[sfb_start];
if (n_filt == 2) {
for (g = sfb_start; g < sce->ics.num_swb && g <= sfb_end; g++) {
FFPsyBand *band = &s->psy.ch[s->cur_channel].psy_bands[w*16+g];
if (g > sfb_start + (sfb_len/2))
en[1] += band->energy; /* End */
else
en[0] += band->energy; /* Start */
}
en[2] = en[0];
} else {
for (g = sfb_start; g < sce->ics.num_swb && g <= sfb_end; g++) {
FFPsyBand *band = &s->psy.ch[s->cur_channel].psy_bands[w*16+g];
if (g > sfb_start + (sfb_len/2) + (sfb_len/4))
en[2] += band->energy; /* End */
else if (g > sfb_start + (sfb_len/2) - (sfb_len/4))
en[1] += band->energy; /* Middle */
else
en[0] += band->energy; /* Start */
}
en[3] = en[0];
for (g = sfb_start; g < sce->ics.num_swb && g <= sfb_end; g++) {
FFPsyBand *band = &s->psy.ch[s->cur_channel].psy_bands[w*16+g];
if (g > sfb_start + (sfb_len/2))
en[1] += band->energy;
else
en[0] += band->energy;
}
/* LPC */
@@ -214,14 +199,15 @@ void ff_aac_search_for_tns(AACEncContext *s, SingleChannelElement *sce)
if (!order || !isfinite(gain) || gain < TNS_GAIN_THRESHOLD_LOW || gain > TNS_GAIN_THRESHOLD_HIGH)
continue;
tns->n_filt[w] = n_filt;
tns->n_filt[w] = is8 ? 1 : order != TNS_MAX_ORDER ? 2 : 3;
for (g = 0; g < tns->n_filt[w]; g++) {
tns->direction[w][g] = slant != 2 ? slant : en[g] < en[g + 1];
tns->order[w][g] = order/tns->n_filt[w];
tns->length[w][g] = sfb_len/tns->n_filt[w];
tns->direction[w][g] = slant != 2 ? slant : en[g] < en[!g];
tns->order[w][g] = g < tns->n_filt[w] ? order/tns->n_filt[w] : order - oc_start;
tns->length[w][g] = g < tns->n_filt[w] ? sfb_len/tns->n_filt[w] : sfb_len - os_start;
quantize_coefs(&coefs[oc_start], tns->coef_idx[w][g], tns->coef[w][g],
tns->order[w][g], c_bits);
oc_start += tns->order[w][g];
os_start += tns->length[w][g];
}
count++;
}
+1 -4
View File
@@ -308,9 +308,6 @@ static av_cold int psy_3gpp_init(FFPsyContext *ctx) {
const int bandwidth = ctx->cutoff ? ctx->cutoff : AAC_CUTOFF(ctx->avctx);
const float num_bark = calc_bark((float)bandwidth);
if (bandwidth <= 0)
return AVERROR(EINVAL);
ctx->model_priv_data = av_mallocz(sizeof(AacPsyContext));
if (!ctx->model_priv_data)
return AVERROR(ENOMEM);
@@ -797,7 +794,7 @@ static void psy_3gpp_analyze_channel(FFPsyContext *ctx, int channel,
if (pe < 1.15f * desired_pe) {
/* 6.6.1.3.6 "Final threshold modification by linearization" */
norm_fac = norm_fac ? 1.0f / norm_fac : 0;
norm_fac = 1.0f / norm_fac;
for (w = 0; w < wi->num_windows*16; w += 16) {
for (g = 0; g < num_bands; g++) {
AacPsyBand *band = &pch->band[w+g];
-1
View File
@@ -592,7 +592,6 @@ static int sbr_make_f_derived(AACContext *ac, SpectralBandReplication *sbr)
if (sbr->n_q > 5) {
av_log(ac->avctx, AV_LOG_ERROR, "Too many noise floor scale factors: %d\n", sbr->n_q);
sbr->n_q = 1;
return -1;
}
+109 -109
View File
@@ -19,130 +19,130 @@
#include "libavutil/aarch64/asm.S"
function ff_ps_add_squares_neon, export=1
1: ld1 {v0.4s,v1.4s}, [x1], #32
fmul v0.4s, v0.4s, v0.4s
fmul v1.4s, v1.4s, v1.4s
faddp v2.4s, v0.4s, v1.4s
ld1 {v3.4s}, [x0]
fadd v3.4s, v3.4s, v2.4s
st1 {v3.4s}, [x0], #16
subs w2, w2, #4
b.gt 1b
1: ld1 {v0.4S,v1.4S}, [x1], #32
fmul v0.4S, v0.4S, v0.4S
fmul v1.4S, v1.4S, v1.4S
faddp v2.4S, v0.4S, v1.4S
ld1 {v3.4S}, [x0]
fadd v3.4S, v3.4S, v2.4S
st1 {v3.4S}, [x0], #16
subs w2, w2, #4
b.gt 1b
ret
endfunc
function ff_ps_mul_pair_single_neon, export=1
1: ld1 {v0.4s,v1.4s}, [x1], #32
ld1 {v2.4s}, [x2], #16
zip1 v3.4s, v2.4s, v2.4s
zip2 v4.4s, v2.4s, v2.4s
fmul v0.4s, v0.4s, v3.4s
fmul v1.4s, v1.4s, v4.4s
st1 {v0.4s,v1.4s}, [x0], #32
subs w3, w3, #4
b.gt 1b
1: ld1 {v0.4S,v1.4S}, [x1], #32
ld1 {v2.4S}, [x2], #16
zip1 v3.4S, v2.4S, v2.4S
zip2 v4.4S, v2.4S, v2.4S
fmul v0.4S, v0.4S, v3.4S
fmul v1.4S, v1.4S, v4.4S
st1 {v0.4S,v1.4S}, [x0], #32
subs w3, w3, #4
b.gt 1b
ret
endfunc
function ff_ps_stereo_interpolate_neon, export=1
ld1 {v0.4s}, [x2]
ld1 {v1.4s}, [x3]
zip1 v4.4s, v0.4s, v0.4s
zip2 v5.4s, v0.4s, v0.4s
zip1 v6.4s, v1.4s, v1.4s
zip2 v7.4s, v1.4s, v1.4s
1: ld1 {v2.2s}, [x0]
ld1 {v3.2s}, [x1]
fadd v4.4s, v4.4s, v6.4s
fadd v5.4s, v5.4s, v7.4s
mov v2.d[1], v2.d[0]
mov v3.d[1], v3.d[0]
fmul v2.4s, v2.4s, v4.4s
fmla v2.4s, v3.4s, v5.4s
st1 {v2.d}[0], [x0], #8
st1 {v2.d}[1], [x1], #8
subs w4, w4, #1
b.gt 1b
ld1 {v0.4S}, [x2]
ld1 {v1.4S}, [x3]
zip1 v4.4S, v0.4S, v0.4S
zip2 v5.4S, v0.4S, v0.4S
zip1 v6.4S, v1.4S, v1.4S
zip2 v7.4S, v1.4S, v1.4S
1: ld1 {v2.2S}, [x0]
ld1 {v3.2S}, [x1]
fadd v4.4S, v4.4S, v6.4S
fadd v5.4S, v5.4S, v7.4S
mov v2.D[1], v2.D[0]
mov v3.D[1], v3.D[0]
fmul v2.4S, v2.4S, v4.4S
fmla v2.4S, v3.4S, v5.4S
st1 {v2.D}[0], [x0], #8
st1 {v2.D}[1], [x1], #8
subs w4, w4, #1
b.gt 1b
ret
endfunc
function ff_ps_stereo_interpolate_ipdopd_neon, export=1
ld1 {v0.4s,v1.4s}, [x2]
ld1 {v6.4s,v7.4s}, [x3]
fneg v2.4s, v1.4s
fneg v3.4s, v7.4s
zip1 v16.4s, v0.4s, v0.4s
zip2 v17.4s, v0.4s, v0.4s
zip1 v18.4s, v2.4s, v1.4s
zip2 v19.4s, v2.4s, v1.4s
zip1 v20.4s, v6.4s, v6.4s
zip2 v21.4s, v6.4s, v6.4s
zip1 v22.4s, v3.4s, v7.4s
zip2 v23.4s, v3.4s, v7.4s
1: ld1 {v2.2s}, [x0]
ld1 {v3.2s}, [x1]
fadd v16.4s, v16.4s, v20.4s
fadd v17.4s, v17.4s, v21.4s
mov v2.d[1], v2.d[0]
mov v3.d[1], v3.d[0]
fmul v4.4s, v2.4s, v16.4s
fmla v4.4s, v3.4s, v17.4s
fadd v18.4s, v18.4s, v22.4s
fadd v19.4s, v19.4s, v23.4s
ext v2.16b, v2.16b, v2.16b, #4
ext v3.16b, v3.16b, v3.16b, #4
fmla v4.4s, v2.4s, v18.4s
fmla v4.4s, v3.4s, v19.4s
st1 {v4.d}[0], [x0], #8
st1 {v4.d}[1], [x1], #8
subs w4, w4, #1
b.gt 1b
ld1 {v0.4S,v1.4S}, [x2]
ld1 {v6.4S,v7.4S}, [x3]
fneg v2.4S, v1.4S
fneg v3.4S, v7.4S
zip1 v16.4S, v0.4S, v0.4S
zip2 v17.4S, v0.4S, v0.4S
zip1 v18.4S, v2.4S, v1.4S
zip2 v19.4S, v2.4S, v1.4S
zip1 v20.4S, v6.4S, v6.4S
zip2 v21.4S, v6.4S, v6.4S
zip1 v22.4S, v3.4S, v7.4S
zip2 v23.4S, v3.4S, v7.4S
1: ld1 {v2.2S}, [x0]
ld1 {v3.2S}, [x1]
fadd v16.4S, v16.4S, v20.4S
fadd v17.4S, v17.4S, v21.4S
mov v2.D[1], v2.D[0]
mov v3.D[1], v3.D[0]
fmul v4.4S, v2.4S, v16.4S
fmla v4.4S, v3.4S, v17.4S
fadd v18.4S, v18.4S, v22.4S
fadd v19.4S, v19.4S, v23.4S
ext v2.16B, v2.16B, v2.16B, #4
ext v3.16B, v3.16B, v3.16B, #4
fmla v4.4S, v2.4S, v18.4S
fmla v4.4S, v3.4S, v19.4S
st1 {v4.D}[0], [x0], #8
st1 {v4.D}[1], [x1], #8
subs w4, w4, #1
b.gt 1b
ret
endfunc
function ff_ps_hybrid_analysis_neon, export=1
lsl x3, x3, #3
ld2 {v0.4s,v1.4s}, [x1], #32
ld2 {v2.2s,v3.2s}, [x1], #16
ld1 {v24.2s}, [x1], #8
ld2 {v4.2s,v5.2s}, [x1], #16
ld2 {v6.4s,v7.4s}, [x1]
rev64 v6.4s, v6.4s
rev64 v7.4s, v7.4s
ext v6.16b, v6.16b, v6.16b, #8
ext v7.16b, v7.16b, v7.16b, #8
rev64 v4.2s, v4.2s
rev64 v5.2s, v5.2s
mov v2.d[1], v3.d[0]
mov v4.d[1], v5.d[0]
mov v5.d[1], v2.d[0]
mov v3.d[1], v4.d[0]
fadd v16.4s, v0.4s, v6.4s
fadd v17.4s, v1.4s, v7.4s
fsub v18.4s, v1.4s, v7.4s
fsub v19.4s, v0.4s, v6.4s
fadd v22.4s, v2.4s, v4.4s
fsub v23.4s, v5.4s, v3.4s
trn1 v20.2d, v22.2d, v23.2d // {re4+re8, re5+re7, im8-im4, im7-im5}
trn2 v21.2d, v22.2d, v23.2d // {im4+im8, im5+im7, re4-re8, re5-re7}
1: ld2 {v2.4s,v3.4s}, [x2], #32
ld2 {v4.2s,v5.2s}, [x2], #16
ld1 {v6.2s}, [x2], #8
add x2, x2, #8
mov v4.d[1], v5.d[0]
mov v6.s[1], v6.s[0]
fmul v6.2s, v6.2s, v24.2s
fmul v0.4s, v2.4s, v16.4s
fmul v1.4s, v2.4s, v17.4s
fmls v0.4s, v3.4s, v18.4s
fmla v1.4s, v3.4s, v19.4s
fmla v0.4s, v4.4s, v20.4s
fmla v1.4s, v4.4s, v21.4s
faddp v0.4s, v0.4s, v1.4s
faddp v0.4s, v0.4s, v0.4s
fadd v0.2s, v0.2s, v6.2s
st1 {v0.2s}, [x0], x3
subs w4, w4, #1
b.gt 1b
lsl x3, x3, #3
ld2 {v0.4S,v1.4S}, [x1], #32
ld2 {v2.2S,v3.2S}, [x1], #16
ld1 {v24.2S}, [x1], #8
ld2 {v4.2S,v5.2S}, [x1], #16
ld2 {v6.4S,v7.4S}, [x1]
rev64 v6.4S, v6.4S
rev64 v7.4S, v7.4S
ext v6.16B, v6.16B, v6.16B, #8
ext v7.16B, v7.16B, v7.16B, #8
rev64 v4.2S, v4.2S
rev64 v5.2S, v5.2S
mov v2.D[1], v3.D[0]
mov v4.D[1], v5.D[0]
mov v5.D[1], v2.D[0]
mov v3.D[1], v4.D[0]
fadd v16.4S, v0.4S, v6.4S
fadd v17.4S, v1.4S, v7.4S
fsub v18.4S, v1.4S, v7.4S
fsub v19.4S, v0.4S, v6.4S
fadd v22.4S, v2.4S, v4.4S
fsub v23.4S, v5.4S, v3.4S
trn1 v20.2D, v22.2D, v23.2D // {re4+re8, re5+re7, im8-im4, im7-im5}
trn2 v21.2D, v22.2D, v23.2D // {im4+im8, im5+im7, re4-re8, re5-re7}
1: ld2 {v2.4S,v3.4S}, [x2], #32
ld2 {v4.2S,v5.2S}, [x2], #16
ld1 {v6.2S}, [x2], #8
add x2, x2, #8
mov v4.D[1], v5.D[0]
mov v6.S[1], v6.S[0]
fmul v6.2S, v6.2S, v24.2S
fmul v0.4S, v2.4S, v16.4S
fmul v1.4S, v2.4S, v17.4S
fmls v0.4S, v3.4S, v18.4S
fmla v1.4S, v3.4S, v19.4S
fmla v0.4S, v4.4S, v20.4S
fmla v1.4S, v4.4S, v21.4S
faddp v0.4S, v0.4S, v1.4S
faddp v0.4S, v0.4S, v0.4S
fadd v0.2S, v0.2S, v6.2S
st1 {v0.2S}, [x0], x3
subs w4, w4, #1
b.gt 1b
ret
endfunc
+12 -12
View File
@@ -353,18 +353,18 @@ function fft\n\()_neon, align=6
endfunc
.endm
def_fft 32, 16, 8
def_fft 64, 32, 16
def_fft 128, 64, 32
def_fft 256, 128, 64
def_fft 512, 256, 128
def_fft 1024, 512, 256
def_fft 2048, 1024, 512
def_fft 4096, 2048, 1024
def_fft 8192, 4096, 2048
def_fft 16384, 8192, 4096
def_fft 32768, 16384, 8192
def_fft 65536, 32768, 16384
def_fft 32, 16, 8
def_fft 64, 32, 16
def_fft 128, 64, 32
def_fft 256, 128, 64
def_fft 512, 256, 128
def_fft 1024, 512, 256
def_fft 2048, 1024, 512
def_fft 4096, 2048, 1024
def_fft 8192, 4096, 2048
def_fft 16384, 8192, 4096
def_fft 32768, 16384, 8192
def_fft 65536, 32768, 16384
function ff_fft_calc_neon, export=1
prfm pldl1keep, [x1]
+205 -205
View File
@@ -36,11 +36,11 @@ function ff_\type\()_\codec\()_chroma_mc8_neon, export=1
lsl w9, w9, #3
lsl w10, w10, #1
add w9, w9, w10
add x6, x6, w9, uxtw
ld1r {v22.8h}, [x6]
add x6, x6, w9, UXTW
ld1r {v22.8H}, [x6]
.endif
.ifc \codec,vc1
movi v22.8h, #28
movi v22.8H, #28
.endif
mul w7, w4, w5
lsl w14, w5, #3
@@ -53,139 +53,139 @@ function ff_\type\()_\codec\()_chroma_mc8_neon, export=1
add w4, w4, #64
b.eq 2f
dup v0.8b, w4
dup v1.8b, w12
ld1 {v4.8b, v5.8b}, [x1], x2
dup v2.8b, w6
dup v3.8b, w7
ext v5.8b, v4.8b, v5.8b, #1
1: ld1 {v6.8b, v7.8b}, [x1], x2
umull v16.8h, v4.8b, v0.8b
umlal v16.8h, v5.8b, v1.8b
ext v7.8b, v6.8b, v7.8b, #1
ld1 {v4.8b, v5.8b}, [x1], x2
umlal v16.8h, v6.8b, v2.8b
dup v0.8B, w4
dup v1.8B, w12
ld1 {v4.8B, v5.8B}, [x1], x2
dup v2.8B, w6
dup v3.8B, w7
ext v5.8B, v4.8B, v5.8B, #1
1: ld1 {v6.8B, v7.8B}, [x1], x2
umull v16.8H, v4.8B, v0.8B
umlal v16.8H, v5.8B, v1.8B
ext v7.8B, v6.8B, v7.8B, #1
ld1 {v4.8B, v5.8B}, [x1], x2
umlal v16.8H, v6.8B, v2.8B
prfm pldl1strm, [x1]
ext v5.8b, v4.8b, v5.8b, #1
umlal v16.8h, v7.8b, v3.8b
umull v17.8h, v6.8b, v0.8b
ext v5.8B, v4.8B, v5.8B, #1
umlal v16.8H, v7.8B, v3.8B
umull v17.8H, v6.8B, v0.8B
subs w3, w3, #2
umlal v17.8h, v7.8b, v1.8b
umlal v17.8h, v4.8b, v2.8b
umlal v17.8h, v5.8b, v3.8b
umlal v17.8H, v7.8B, v1.8B
umlal v17.8H, v4.8B, v2.8B
umlal v17.8H, v5.8B, v3.8B
prfm pldl1strm, [x1, x2]
.ifc \codec,h264
rshrn v16.8b, v16.8h, #6
rshrn v17.8b, v17.8h, #6
rshrn v16.8B, v16.8H, #6
rshrn v17.8B, v17.8H, #6
.else
add v16.8h, v16.8h, v22.8h
add v17.8h, v17.8h, v22.8h
shrn v16.8b, v16.8h, #6
shrn v17.8b, v17.8h, #6
add v16.8H, v16.8H, v22.8H
add v17.8H, v17.8H, v22.8H
shrn v16.8B, v16.8H, #6
shrn v17.8B, v17.8H, #6
.endif
.ifc \type,avg
ld1 {v20.8b}, [x8], x2
ld1 {v21.8b}, [x8], x2
urhadd v16.8b, v16.8b, v20.8b
urhadd v17.8b, v17.8b, v21.8b
ld1 {v20.8B}, [x8], x2
ld1 {v21.8B}, [x8], x2
urhadd v16.8B, v16.8B, v20.8B
urhadd v17.8B, v17.8B, v21.8B
.endif
st1 {v16.8b}, [x0], x2
st1 {v17.8b}, [x0], x2
st1 {v16.8B}, [x0], x2
st1 {v17.8B}, [x0], x2
b.gt 1b
ret
2: adds w12, w12, w6
dup v0.8b, w4
dup v0.8B, w4
b.eq 5f
tst w6, w6
dup v1.8b, w12
dup v1.8B, w12
b.eq 4f
ld1 {v4.8b}, [x1], x2
3: ld1 {v6.8b}, [x1], x2
umull v16.8h, v4.8b, v0.8b
umlal v16.8h, v6.8b, v1.8b
ld1 {v4.8b}, [x1], x2
umull v17.8h, v6.8b, v0.8b
umlal v17.8h, v4.8b, v1.8b
ld1 {v4.8B}, [x1], x2
3: ld1 {v6.8B}, [x1], x2
umull v16.8H, v4.8B, v0.8B
umlal v16.8H, v6.8B, v1.8B
ld1 {v4.8B}, [x1], x2
umull v17.8H, v6.8B, v0.8B
umlal v17.8H, v4.8B, v1.8B
prfm pldl1strm, [x1]
.ifc \codec,h264
rshrn v16.8b, v16.8h, #6
rshrn v17.8b, v17.8h, #6
rshrn v16.8B, v16.8H, #6
rshrn v17.8B, v17.8H, #6
.else
add v16.8h, v16.8h, v22.8h
add v17.8h, v17.8h, v22.8h
shrn v16.8b, v16.8h, #6
shrn v17.8b, v17.8h, #6
add v16.8H, v16.8H, v22.8H
add v17.8H, v17.8H, v22.8H
shrn v16.8B, v16.8H, #6
shrn v17.8B, v17.8H, #6
.endif
prfm pldl1strm, [x1, x2]
.ifc \type,avg
ld1 {v20.8b}, [x8], x2
ld1 {v21.8b}, [x8], x2
urhadd v16.8b, v16.8b, v20.8b
urhadd v17.8b, v17.8b, v21.8b
ld1 {v20.8B}, [x8], x2
ld1 {v21.8B}, [x8], x2
urhadd v16.8B, v16.8B, v20.8B
urhadd v17.8B, v17.8B, v21.8B
.endif
subs w3, w3, #2
st1 {v16.8b}, [x0], x2
st1 {v17.8b}, [x0], x2
st1 {v16.8B}, [x0], x2
st1 {v17.8B}, [x0], x2
b.gt 3b
ret
4: ld1 {v4.8b, v5.8b}, [x1], x2
ld1 {v6.8b, v7.8b}, [x1], x2
ext v5.8b, v4.8b, v5.8b, #1
ext v7.8b, v6.8b, v7.8b, #1
4: ld1 {v4.8B, v5.8B}, [x1], x2
ld1 {v6.8B, v7.8B}, [x1], x2
ext v5.8B, v4.8B, v5.8B, #1
ext v7.8B, v6.8B, v7.8B, #1
prfm pldl1strm, [x1]
subs w3, w3, #2
umull v16.8h, v4.8b, v0.8b
umlal v16.8h, v5.8b, v1.8b
umull v17.8h, v6.8b, v0.8b
umlal v17.8h, v7.8b, v1.8b
umull v16.8H, v4.8B, v0.8B
umlal v16.8H, v5.8B, v1.8B
umull v17.8H, v6.8B, v0.8B
umlal v17.8H, v7.8B, v1.8B
prfm pldl1strm, [x1, x2]
.ifc \codec,h264
rshrn v16.8b, v16.8h, #6
rshrn v17.8b, v17.8h, #6
rshrn v16.8B, v16.8H, #6
rshrn v17.8B, v17.8H, #6
.else
add v16.8h, v16.8h, v22.8h
add v17.8h, v17.8h, v22.8h
shrn v16.8b, v16.8h, #6
shrn v17.8b, v17.8h, #6
add v16.8H, v16.8H, v22.8H
add v17.8H, v17.8H, v22.8H
shrn v16.8B, v16.8H, #6
shrn v17.8B, v17.8H, #6
.endif
.ifc \type,avg
ld1 {v20.8b}, [x8], x2
ld1 {v21.8b}, [x8], x2
urhadd v16.8b, v16.8b, v20.8b
urhadd v17.8b, v17.8b, v21.8b
ld1 {v20.8B}, [x8], x2
ld1 {v21.8B}, [x8], x2
urhadd v16.8B, v16.8B, v20.8B
urhadd v17.8B, v17.8B, v21.8B
.endif
st1 {v16.8b}, [x0], x2
st1 {v17.8b}, [x0], x2
st1 {v16.8B}, [x0], x2
st1 {v17.8B}, [x0], x2
b.gt 4b
ret
5: ld1 {v4.8b}, [x1], x2
ld1 {v5.8b}, [x1], x2
5: ld1 {v4.8B}, [x1], x2
ld1 {v5.8B}, [x1], x2
prfm pldl1strm, [x1]
subs w3, w3, #2
umull v16.8h, v4.8b, v0.8b
umull v17.8h, v5.8b, v0.8b
umull v16.8H, v4.8B, v0.8B
umull v17.8H, v5.8B, v0.8B
prfm pldl1strm, [x1, x2]
.ifc \codec,h264
rshrn v16.8b, v16.8h, #6
rshrn v17.8b, v17.8h, #6
rshrn v16.8B, v16.8H, #6
rshrn v17.8B, v17.8H, #6
.else
add v16.8h, v16.8h, v22.8h
add v17.8h, v17.8h, v22.8h
shrn v16.8b, v16.8h, #6
shrn v17.8b, v17.8h, #6
add v16.8H, v16.8H, v22.8H
add v17.8H, v17.8H, v22.8H
shrn v16.8B, v16.8H, #6
shrn v17.8B, v17.8H, #6
.endif
.ifc \type,avg
ld1 {v20.8b}, [x8], x2
ld1 {v21.8b}, [x8], x2
urhadd v16.8b, v16.8b, v20.8b
urhadd v17.8b, v17.8b, v21.8b
ld1 {v20.8B}, [x8], x2
ld1 {v21.8B}, [x8], x2
urhadd v16.8B, v16.8B, v20.8B
urhadd v17.8B, v17.8B, v21.8B
.endif
st1 {v16.8b}, [x0], x2
st1 {v17.8b}, [x0], x2
st1 {v16.8B}, [x0], x2
st1 {v17.8B}, [x0], x2
b.gt 5b
ret
endfunc
@@ -206,11 +206,11 @@ function ff_\type\()_\codec\()_chroma_mc4_neon, export=1
lsl w9, w9, #3
lsl w10, w10, #1
add w9, w9, w10
add x6, x6, w9, uxtw
ld1r {v22.8h}, [x6]
add x6, x6, w9, UXTW
ld1r {v22.8H}, [x6]
.endif
.ifc \codec,vc1
movi v22.8h, #28
movi v22.8H, #28
.endif
mul w7, w4, w5
lsl w14, w5, #3
@@ -223,133 +223,133 @@ function ff_\type\()_\codec\()_chroma_mc4_neon, export=1
add w4, w4, #64
b.eq 2f
dup v24.8b, w4
dup v25.8b, w12
ld1 {v4.8b}, [x1], x2
dup v26.8b, w6
dup v27.8b, w7
ext v5.8b, v4.8b, v5.8b, #1
trn1 v0.2s, v24.2s, v25.2s
trn1 v2.2s, v26.2s, v27.2s
trn1 v4.2s, v4.2s, v5.2s
1: ld1 {v6.8b}, [x1], x2
ext v7.8b, v6.8b, v7.8b, #1
trn1 v6.2s, v6.2s, v7.2s
umull v18.8h, v4.8b, v0.8b
umlal v18.8h, v6.8b, v2.8b
ld1 {v4.8b}, [x1], x2
ext v5.8b, v4.8b, v5.8b, #1
trn1 v4.2s, v4.2s, v5.2s
dup v24.8B, w4
dup v25.8B, w12
ld1 {v4.8B}, [x1], x2
dup v26.8B, w6
dup v27.8B, w7
ext v5.8B, v4.8B, v5.8B, #1
trn1 v0.2S, v24.2S, v25.2S
trn1 v2.2S, v26.2S, v27.2S
trn1 v4.2S, v4.2S, v5.2S
1: ld1 {v6.8B}, [x1], x2
ext v7.8B, v6.8B, v7.8B, #1
trn1 v6.2S, v6.2S, v7.2S
umull v18.8H, v4.8B, v0.8B
umlal v18.8H, v6.8B, v2.8B
ld1 {v4.8B}, [x1], x2
ext v5.8B, v4.8B, v5.8B, #1
trn1 v4.2S, v4.2S, v5.2S
prfm pldl1strm, [x1]
umull v19.8h, v6.8b, v0.8b
umlal v19.8h, v4.8b, v2.8b
trn1 v30.2d, v18.2d, v19.2d
trn2 v31.2d, v18.2d, v19.2d
add v18.8h, v30.8h, v31.8h
umull v19.8H, v6.8B, v0.8B
umlal v19.8H, v4.8B, v2.8B
trn1 v30.2D, v18.2D, v19.2D
trn2 v31.2D, v18.2D, v19.2D
add v18.8H, v30.8H, v31.8H
.ifc \codec,h264
rshrn v16.8b, v18.8h, #6
rshrn v16.8B, v18.8H, #6
.else
add v18.8h, v18.8h, v22.8h
shrn v16.8b, v18.8h, #6
add v18.8H, v18.8H, v22.8H
shrn v16.8B, v18.8H, #6
.endif
subs w3, w3, #2
prfm pldl1strm, [x1, x2]
.ifc \type,avg
ld1 {v20.s}[0], [x8], x2
ld1 {v20.s}[1], [x8], x2
urhadd v16.8b, v16.8b, v20.8b
ld1 {v20.S}[0], [x8], x2
ld1 {v20.S}[1], [x8], x2
urhadd v16.8B, v16.8B, v20.8B
.endif
st1 {v16.s}[0], [x0], x2
st1 {v16.s}[1], [x0], x2
st1 {v16.S}[0], [x0], x2
st1 {v16.S}[1], [x0], x2
b.gt 1b
ret
2: adds w12, w12, w6
dup v30.8b, w4
dup v30.8B, w4
b.eq 5f
tst w6, w6
dup v31.8b, w12
trn1 v0.2s, v30.2s, v31.2s
trn2 v1.2s, v30.2s, v31.2s
dup v31.8B, w12
trn1 v0.2S, v30.2S, v31.2S
trn2 v1.2S, v30.2S, v31.2S
b.eq 4f
ext v1.8b, v0.8b, v1.8b, #4
ld1 {v4.s}[0], [x1], x2
3: ld1 {v4.s}[1], [x1], x2
umull v18.8h, v4.8b, v0.8b
ld1 {v4.s}[0], [x1], x2
umull v19.8h, v4.8b, v1.8b
trn1 v30.2d, v18.2d, v19.2d
trn2 v31.2d, v18.2d, v19.2d
add v18.8h, v30.8h, v31.8h
ext v1.8B, v0.8B, v1.8B, #4
ld1 {v4.S}[0], [x1], x2
3: ld1 {v4.S}[1], [x1], x2
umull v18.8H, v4.8B, v0.8B
ld1 {v4.S}[0], [x1], x2
umull v19.8H, v4.8B, v1.8B
trn1 v30.2D, v18.2D, v19.2D
trn2 v31.2D, v18.2D, v19.2D
add v18.8H, v30.8H, v31.8H
prfm pldl1strm, [x1]
.ifc \codec,h264
rshrn v16.8b, v18.8h, #6
rshrn v16.8B, v18.8H, #6
.else
add v18.8h, v18.8h, v22.8h
shrn v16.8b, v18.8h, #6
add v18.8H, v18.8H, v22.8H
shrn v16.8B, v18.8H, #6
.endif
.ifc \type,avg
ld1 {v20.s}[0], [x8], x2
ld1 {v20.s}[1], [x8], x2
urhadd v16.8b, v16.8b, v20.8b
ld1 {v20.S}[0], [x8], x2
ld1 {v20.S}[1], [x8], x2
urhadd v16.8B, v16.8B, v20.8B
.endif
subs w3, w3, #2
prfm pldl1strm, [x1, x2]
st1 {v16.s}[0], [x0], x2
st1 {v16.s}[1], [x0], x2
st1 {v16.S}[0], [x0], x2
st1 {v16.S}[1], [x0], x2
b.gt 3b
ret
4: ld1 {v4.8b}, [x1], x2
ld1 {v6.8b}, [x1], x2
ext v5.8b, v4.8b, v5.8b, #1
ext v7.8b, v6.8b, v7.8b, #1
trn1 v4.2s, v4.2s, v5.2s
trn1 v6.2s, v6.2s, v7.2s
umull v18.8h, v4.8b, v0.8b
umull v19.8h, v6.8b, v0.8b
4: ld1 {v4.8B}, [x1], x2
ld1 {v6.8B}, [x1], x2
ext v5.8B, v4.8B, v5.8B, #1
ext v7.8B, v6.8B, v7.8B, #1
trn1 v4.2S, v4.2S, v5.2S
trn1 v6.2S, v6.2S, v7.2S
umull v18.8H, v4.8B, v0.8B
umull v19.8H, v6.8B, v0.8B
subs w3, w3, #2
trn1 v30.2d, v18.2d, v19.2d
trn2 v31.2d, v18.2d, v19.2d
add v18.8h, v30.8h, v31.8h
trn1 v30.2D, v18.2D, v19.2D
trn2 v31.2D, v18.2D, v19.2D
add v18.8H, v30.8H, v31.8H
prfm pldl1strm, [x1]
.ifc \codec,h264
rshrn v16.8b, v18.8h, #6
rshrn v16.8B, v18.8H, #6
.else
add v18.8h, v18.8h, v22.8h
shrn v16.8b, v18.8h, #6
add v18.8H, v18.8H, v22.8H
shrn v16.8B, v18.8H, #6
.endif
.ifc \type,avg
ld1 {v20.s}[0], [x8], x2
ld1 {v20.s}[1], [x8], x2
urhadd v16.8b, v16.8b, v20.8b
ld1 {v20.S}[0], [x8], x2
ld1 {v20.S}[1], [x8], x2
urhadd v16.8B, v16.8B, v20.8B
.endif
prfm pldl1strm, [x1]
st1 {v16.s}[0], [x0], x2
st1 {v16.s}[1], [x0], x2
st1 {v16.S}[0], [x0], x2
st1 {v16.S}[1], [x0], x2
b.gt 4b
ret
5: ld1 {v4.s}[0], [x1], x2
ld1 {v4.s}[1], [x1], x2
umull v18.8h, v4.8b, v30.8b
5: ld1 {v4.S}[0], [x1], x2
ld1 {v4.S}[1], [x1], x2
umull v18.8H, v4.8B, v30.8B
subs w3, w3, #2
prfm pldl1strm, [x1]
.ifc \codec,h264
rshrn v16.8b, v18.8h, #6
rshrn v16.8B, v18.8H, #6
.else
add v18.8h, v18.8h, v22.8h
shrn v16.8b, v18.8h, #6
add v18.8H, v18.8H, v22.8H
shrn v16.8B, v18.8H, #6
.endif
.ifc \type,avg
ld1 {v20.s}[0], [x8], x2
ld1 {v20.s}[1], [x8], x2
urhadd v16.8b, v16.8b, v20.8b
ld1 {v20.S}[0], [x8], x2
ld1 {v20.S}[1], [x8], x2
urhadd v16.8B, v16.8B, v20.8B
.endif
prfm pldl1strm, [x1]
st1 {v16.s}[0], [x0], x2
st1 {v16.s}[1], [x0], x2
st1 {v16.S}[0], [x0], x2
st1 {v16.S}[1], [x0], x2
b.gt 5b
ret
endfunc
@@ -370,51 +370,51 @@ function ff_\type\()_h264_chroma_mc2_neon, export=1
sub w4, w7, w13
sub w4, w4, w14
add w4, w4, #64
dup v0.8b, w4
dup v2.8b, w12
dup v1.8b, w6
dup v3.8b, w7
trn1 v0.4h, v0.4h, v2.4h
trn1 v1.4h, v1.4h, v3.4h
dup v0.8B, w4
dup v2.8B, w12
dup v1.8B, w6
dup v3.8B, w7
trn1 v0.4H, v0.4H, v2.4H
trn1 v1.4H, v1.4H, v3.4H
1:
ld1 {v4.s}[0], [x1], x2
ld1 {v4.s}[1], [x1], x2
rev64 v5.2s, v4.2s
ld1 {v5.s}[1], [x1]
ext v6.8b, v4.8b, v5.8b, #1
ext v7.8b, v5.8b, v4.8b, #1
trn1 v4.4h, v4.4h, v6.4h
trn1 v5.4h, v5.4h, v7.4h
umull v16.8h, v4.8b, v0.8b
umlal v16.8h, v5.8b, v1.8b
ld1 {v4.S}[0], [x1], x2
ld1 {v4.S}[1], [x1], x2
rev64 v5.2S, v4.2S
ld1 {v5.S}[1], [x1]
ext v6.8B, v4.8B, v5.8B, #1
ext v7.8B, v5.8B, v4.8B, #1
trn1 v4.4H, v4.4H, v6.4H
trn1 v5.4H, v5.4H, v7.4H
umull v16.8H, v4.8B, v0.8B
umlal v16.8H, v5.8B, v1.8B
.ifc \type,avg
ld1 {v18.h}[0], [x0], x2
ld1 {v18.h}[2], [x0]
ld1 {v18.H}[0], [x0], x2
ld1 {v18.H}[2], [x0]
sub x0, x0, x2
.endif
rev64 v17.4s, v16.4s
add v16.8h, v16.8h, v17.8h
rshrn v16.8b, v16.8h, #6
rev64 v17.4S, v16.4S
add v16.8H, v16.8H, v17.8H
rshrn v16.8B, v16.8H, #6
.ifc \type,avg
urhadd v16.8b, v16.8b, v18.8b
urhadd v16.8B, v16.8B, v18.8B
.endif
st1 {v16.h}[0], [x0], x2
st1 {v16.h}[2], [x0], x2
st1 {v16.H}[0], [x0], x2
st1 {v16.H}[2], [x0], x2
subs w3, w3, #2
b.gt 1b
ret
2:
ld1 {v16.h}[0], [x1], x2
ld1 {v16.h}[1], [x1], x2
ld1 {v16.H}[0], [x1], x2
ld1 {v16.H}[1], [x1], x2
.ifc \type,avg
ld1 {v18.h}[0], [x0], x2
ld1 {v18.h}[1], [x0]
ld1 {v18.H}[0], [x0], x2
ld1 {v18.H}[1], [x0]
sub x0, x0, x2
urhadd v16.8b, v16.8b, v18.8b
urhadd v16.8B, v16.8B, v18.8B
.endif
st1 {v16.h}[0], [x0], x2
st1 {v16.h}[1], [x0], x2
st1 {v16.H}[0], [x0], x2
st1 {v16.H}[1], [x0], x2
subs w3, w3, #2
b.gt 2b
ret
File diff suppressed because it is too large Load Diff
+272 -272
View File
@@ -27,114 +27,114 @@
.macro lowpass_const r
movz \r, #20, lsl #16
movk \r, #5
mov v6.s[0], \r
mov v6.S[0], \r
.endm
//trashes v0-v5
.macro lowpass_8 r0, r1, r2, r3, d0, d1, narrow=1
ext v2.8b, \r0\().8b, \r1\().8b, #2
ext v3.8b, \r0\().8b, \r1\().8b, #3
uaddl v2.8h, v2.8b, v3.8b
ext v4.8b, \r0\().8b, \r1\().8b, #1
ext v5.8b, \r0\().8b, \r1\().8b, #4
uaddl v4.8h, v4.8b, v5.8b
ext v1.8b, \r0\().8b, \r1\().8b, #5
uaddl \d0\().8h, \r0\().8b, v1.8b
ext v0.8b, \r2\().8b, \r3\().8b, #2
mla \d0\().8h, v2.8h, v6.h[1]
ext v1.8b, \r2\().8b, \r3\().8b, #3
uaddl v0.8h, v0.8b, v1.8b
ext v1.8b, \r2\().8b, \r3\().8b, #1
mls \d0\().8h, v4.8h, v6.h[0]
ext v3.8b, \r2\().8b, \r3\().8b, #4
uaddl v1.8h, v1.8b, v3.8b
ext v2.8b, \r2\().8b, \r3\().8b, #5
uaddl \d1\().8h, \r2\().8b, v2.8b
mla \d1\().8h, v0.8h, v6.h[1]
mls \d1\().8h, v1.8h, v6.h[0]
ext v2.8B, \r0\().8B, \r1\().8B, #2
ext v3.8B, \r0\().8B, \r1\().8B, #3
uaddl v2.8H, v2.8B, v3.8B
ext v4.8B, \r0\().8B, \r1\().8B, #1
ext v5.8B, \r0\().8B, \r1\().8B, #4
uaddl v4.8H, v4.8B, v5.8B
ext v1.8B, \r0\().8B, \r1\().8B, #5
uaddl \d0\().8H, \r0\().8B, v1.8B
ext v0.8B, \r2\().8B, \r3\().8B, #2
mla \d0\().8H, v2.8H, v6.H[1]
ext v1.8B, \r2\().8B, \r3\().8B, #3
uaddl v0.8H, v0.8B, v1.8B
ext v1.8B, \r2\().8B, \r3\().8B, #1
mls \d0\().8H, v4.8H, v6.H[0]
ext v3.8B, \r2\().8B, \r3\().8B, #4
uaddl v1.8H, v1.8B, v3.8B
ext v2.8B, \r2\().8B, \r3\().8B, #5
uaddl \d1\().8H, \r2\().8B, v2.8B
mla \d1\().8H, v0.8H, v6.H[1]
mls \d1\().8H, v1.8H, v6.H[0]
.if \narrow
sqrshrun \d0\().8b, \d0\().8h, #5
sqrshrun \d1\().8b, \d1\().8h, #5
sqrshrun \d0\().8B, \d0\().8H, #5
sqrshrun \d1\().8B, \d1\().8H, #5
.endif
.endm
//trashes v0-v5, v7, v30-v31
.macro lowpass_8H r0, r1
ext v0.16b, \r0\().16b, \r0\().16b, #2
ext v1.16b, \r0\().16b, \r0\().16b, #3
uaddl v0.8h, v0.8b, v1.8b
ext v2.16b, \r0\().16b, \r0\().16b, #1
ext v3.16b, \r0\().16b, \r0\().16b, #4
uaddl v2.8h, v2.8b, v3.8b
ext v30.16b, \r0\().16b, \r0\().16b, #5
uaddl \r0\().8h, \r0\().8b, v30.8b
ext v4.16b, \r1\().16b, \r1\().16b, #2
mla \r0\().8h, v0.8h, v6.h[1]
ext v5.16b, \r1\().16b, \r1\().16b, #3
uaddl v4.8h, v4.8b, v5.8b
ext v7.16b, \r1\().16b, \r1\().16b, #1
mls \r0\().8h, v2.8h, v6.h[0]
ext v0.16b, \r1\().16b, \r1\().16b, #4
uaddl v7.8h, v7.8b, v0.8b
ext v31.16b, \r1\().16b, \r1\().16b, #5
uaddl \r1\().8h, \r1\().8b, v31.8b
mla \r1\().8h, v4.8h, v6.h[1]
mls \r1\().8h, v7.8h, v6.h[0]
ext v0.16B, \r0\().16B, \r0\().16B, #2
ext v1.16B, \r0\().16B, \r0\().16B, #3
uaddl v0.8H, v0.8B, v1.8B
ext v2.16B, \r0\().16B, \r0\().16B, #1
ext v3.16B, \r0\().16B, \r0\().16B, #4
uaddl v2.8H, v2.8B, v3.8B
ext v30.16B, \r0\().16B, \r0\().16B, #5
uaddl \r0\().8H, \r0\().8B, v30.8B
ext v4.16B, \r1\().16B, \r1\().16B, #2
mla \r0\().8H, v0.8H, v6.H[1]
ext v5.16B, \r1\().16B, \r1\().16B, #3
uaddl v4.8H, v4.8B, v5.8B
ext v7.16B, \r1\().16B, \r1\().16B, #1
mls \r0\().8H, v2.8H, v6.H[0]
ext v0.16B, \r1\().16B, \r1\().16B, #4
uaddl v7.8H, v7.8B, v0.8B
ext v31.16B, \r1\().16B, \r1\().16B, #5
uaddl \r1\().8H, \r1\().8B, v31.8B
mla \r1\().8H, v4.8H, v6.H[1]
mls \r1\().8H, v7.8H, v6.H[0]
.endm
// trashes v2-v5, v30
.macro lowpass_8_1 r0, r1, d0, narrow=1
ext v2.8b, \r0\().8b, \r1\().8b, #2
ext v3.8b, \r0\().8b, \r1\().8b, #3
uaddl v2.8h, v2.8b, v3.8b
ext v4.8b, \r0\().8b, \r1\().8b, #1
ext v5.8b, \r0\().8b, \r1\().8b, #4
uaddl v4.8h, v4.8b, v5.8b
ext v30.8b, \r0\().8b, \r1\().8b, #5
uaddl \d0\().8h, \r0\().8b, v30.8b
mla \d0\().8h, v2.8h, v6.h[1]
mls \d0\().8h, v4.8h, v6.h[0]
ext v2.8B, \r0\().8B, \r1\().8B, #2
ext v3.8B, \r0\().8B, \r1\().8B, #3
uaddl v2.8H, v2.8B, v3.8B
ext v4.8B, \r0\().8B, \r1\().8B, #1
ext v5.8B, \r0\().8B, \r1\().8B, #4
uaddl v4.8H, v4.8B, v5.8B
ext v30.8B, \r0\().8B, \r1\().8B, #5
uaddl \d0\().8H, \r0\().8B, v30.8B
mla \d0\().8H, v2.8H, v6.H[1]
mls \d0\().8H, v4.8H, v6.H[0]
.if \narrow
sqrshrun \d0\().8b, \d0\().8h, #5
sqrshrun \d0\().8B, \d0\().8H, #5
.endif
.endm
// trashed v0-v7
.macro lowpass_8.16 r0, r1, r2
ext v1.16b, \r0\().16b, \r1\().16b, #4
ext v0.16b, \r0\().16b, \r1\().16b, #6
saddl v5.4s, v1.4h, v0.4h
ext v2.16b, \r0\().16b, \r1\().16b, #2
saddl2 v1.4s, v1.8h, v0.8h
ext v3.16b, \r0\().16b, \r1\().16b, #8
saddl v6.4s, v2.4h, v3.4h
ext \r1\().16b, \r0\().16b, \r1\().16b, #10
saddl2 v2.4s, v2.8h, v3.8h
saddl v0.4s, \r0\().4h, \r1\().4h
saddl2 v4.4s, \r0\().8h, \r1\().8h
ext v1.16B, \r0\().16B, \r1\().16B, #4
ext v0.16B, \r0\().16B, \r1\().16B, #6
saddl v5.4S, v1.4H, v0.4H
ext v2.16B, \r0\().16B, \r1\().16B, #2
saddl2 v1.4S, v1.8H, v0.8H
ext v3.16B, \r0\().16B, \r1\().16B, #8
saddl v6.4S, v2.4H, v3.4H
ext \r1\().16B, \r0\().16B, \r1\().16B, #10
saddl2 v2.4S, v2.8H, v3.8H
saddl v0.4S, \r0\().4H, \r1\().4H
saddl2 v4.4S, \r0\().8H, \r1\().8H
shl v3.4s, v5.4s, #4
shl v5.4s, v5.4s, #2
shl v7.4s, v6.4s, #2
add v5.4s, v5.4s, v3.4s
add v6.4s, v6.4s, v7.4s
shl v3.4S, v5.4S, #4
shl v5.4S, v5.4S, #2
shl v7.4S, v6.4S, #2
add v5.4S, v5.4S, v3.4S
add v6.4S, v6.4S, v7.4S
shl v3.4s, v1.4s, #4
shl v1.4s, v1.4s, #2
shl v7.4s, v2.4s, #2
add v1.4s, v1.4s, v3.4s
add v2.4s, v2.4s, v7.4s
shl v3.4S, v1.4S, #4
shl v1.4S, v1.4S, #2
shl v7.4S, v2.4S, #2
add v1.4S, v1.4S, v3.4S
add v2.4S, v2.4S, v7.4S
add v5.4s, v5.4s, v0.4s
sub v5.4s, v5.4s, v6.4s
add v5.4S, v5.4S, v0.4S
sub v5.4S, v5.4S, v6.4S
add v1.4s, v1.4s, v4.4s
sub v1.4s, v1.4s, v2.4s
add v1.4S, v1.4S, v4.4S
sub v1.4S, v1.4S, v2.4S
rshrn v5.4h, v5.4s, #10
rshrn2 v5.8h, v1.4s, #10
rshrn v5.4H, v5.4S, #10
rshrn2 v5.8H, v1.4S, #10
sqxtun \r2\().8b, v5.8h
sqxtun \r2\().8B, v5.8H
.endm
function put_h264_qpel16_h_lowpass_neon_packed
@@ -163,19 +163,19 @@ function \type\()_h264_qpel16_h_lowpass_neon
endfunc
function \type\()_h264_qpel8_h_lowpass_neon
1: ld1 {v28.8b, v29.8b}, [x1], x2
ld1 {v16.8b, v17.8b}, [x1], x2
1: ld1 {v28.8B, v29.8B}, [x1], x2
ld1 {v16.8B, v17.8B}, [x1], x2
subs x12, x12, #2
lowpass_8 v28, v29, v16, v17, v28, v16
.ifc \type,avg
ld1 {v2.8b}, [x0], x3
urhadd v28.8b, v28.8b, v2.8b
ld1 {v3.8b}, [x0]
urhadd v16.8b, v16.8b, v3.8b
ld1 {v2.8B}, [x0], x3
urhadd v28.8B, v28.8B, v2.8B
ld1 {v3.8B}, [x0]
urhadd v16.8B, v16.8B, v3.8B
sub x0, x0, x3
.endif
st1 {v28.8b}, [x0], x3
st1 {v16.8b}, [x0], x3
st1 {v28.8B}, [x0], x3
st1 {v16.8B}, [x0], x3
b.ne 1b
ret
endfunc
@@ -200,23 +200,23 @@ function \type\()_h264_qpel16_h_lowpass_l2_neon
endfunc
function \type\()_h264_qpel8_h_lowpass_l2_neon
1: ld1 {v26.8b, v27.8b}, [x1], x2
ld1 {v16.8b, v17.8b}, [x1], x2
ld1 {v28.8b}, [x3], x2
ld1 {v29.8b}, [x3], x2
1: ld1 {v26.8B, v27.8B}, [x1], x2
ld1 {v16.8B, v17.8B}, [x1], x2
ld1 {v28.8B}, [x3], x2
ld1 {v29.8B}, [x3], x2
subs x12, x12, #2
lowpass_8 v26, v27, v16, v17, v26, v27
urhadd v26.8b, v26.8b, v28.8b
urhadd v27.8b, v27.8b, v29.8b
urhadd v26.8B, v26.8B, v28.8B
urhadd v27.8B, v27.8B, v29.8B
.ifc \type,avg
ld1 {v2.8b}, [x0], x2
urhadd v26.8b, v26.8b, v2.8b
ld1 {v3.8b}, [x0]
urhadd v27.8b, v27.8b, v3.8b
ld1 {v2.8B}, [x0], x2
urhadd v26.8B, v26.8B, v2.8B
ld1 {v3.8B}, [x0]
urhadd v27.8B, v27.8B, v3.8B
sub x0, x0, x2
.endif
st1 {v26.8b}, [x0], x2
st1 {v27.8b}, [x0], x2
st1 {v26.8B}, [x0], x2
st1 {v27.8B}, [x0], x2
b.ne 1b
ret
endfunc
@@ -257,19 +257,19 @@ function \type\()_h264_qpel16_v_lowpass_neon
endfunc
function \type\()_h264_qpel8_v_lowpass_neon
ld1 {v16.8b}, [x1], x3
ld1 {v18.8b}, [x1], x3
ld1 {v20.8b}, [x1], x3
ld1 {v22.8b}, [x1], x3
ld1 {v24.8b}, [x1], x3
ld1 {v26.8b}, [x1], x3
ld1 {v28.8b}, [x1], x3
ld1 {v30.8b}, [x1], x3
ld1 {v17.8b}, [x1], x3
ld1 {v19.8b}, [x1], x3
ld1 {v21.8b}, [x1], x3
ld1 {v23.8b}, [x1], x3
ld1 {v25.8b}, [x1]
ld1 {v16.8B}, [x1], x3
ld1 {v18.8B}, [x1], x3
ld1 {v20.8B}, [x1], x3
ld1 {v22.8B}, [x1], x3
ld1 {v24.8B}, [x1], x3
ld1 {v26.8B}, [x1], x3
ld1 {v28.8B}, [x1], x3
ld1 {v30.8B}, [x1], x3
ld1 {v17.8B}, [x1], x3
ld1 {v19.8B}, [x1], x3
ld1 {v21.8B}, [x1], x3
ld1 {v23.8B}, [x1], x3
ld1 {v25.8B}, [x1]
transpose_8x8B v16, v18, v20, v22, v24, v26, v28, v30, v0, v1
transpose_8x8B v17, v19, v21, v23, v25, v27, v29, v31, v0, v1
@@ -280,33 +280,33 @@ function \type\()_h264_qpel8_v_lowpass_neon
transpose_8x8B v16, v17, v18, v19, v20, v21, v22, v23, v0, v1
.ifc \type,avg
ld1 {v24.8b}, [x0], x2
urhadd v16.8b, v16.8b, v24.8b
ld1 {v25.8b}, [x0], x2
urhadd v17.8b, v17.8b, v25.8b
ld1 {v26.8b}, [x0], x2
urhadd v18.8b, v18.8b, v26.8b
ld1 {v27.8b}, [x0], x2
urhadd v19.8b, v19.8b, v27.8b
ld1 {v28.8b}, [x0], x2
urhadd v20.8b, v20.8b, v28.8b
ld1 {v29.8b}, [x0], x2
urhadd v21.8b, v21.8b, v29.8b
ld1 {v30.8b}, [x0], x2
urhadd v22.8b, v22.8b, v30.8b
ld1 {v31.8b}, [x0], x2
urhadd v23.8b, v23.8b, v31.8b
ld1 {v24.8B}, [x0], x2
urhadd v16.8B, v16.8B, v24.8B
ld1 {v25.8B}, [x0], x2
urhadd v17.8B, v17.8B, v25.8B
ld1 {v26.8B}, [x0], x2
urhadd v18.8B, v18.8B, v26.8B
ld1 {v27.8B}, [x0], x2
urhadd v19.8B, v19.8B, v27.8B
ld1 {v28.8B}, [x0], x2
urhadd v20.8B, v20.8B, v28.8B
ld1 {v29.8B}, [x0], x2
urhadd v21.8B, v21.8B, v29.8B
ld1 {v30.8B}, [x0], x2
urhadd v22.8B, v22.8B, v30.8B
ld1 {v31.8B}, [x0], x2
urhadd v23.8B, v23.8B, v31.8B
sub x0, x0, x2, lsl #3
.endif
st1 {v16.8b}, [x0], x2
st1 {v17.8b}, [x0], x2
st1 {v18.8b}, [x0], x2
st1 {v19.8b}, [x0], x2
st1 {v20.8b}, [x0], x2
st1 {v21.8b}, [x0], x2
st1 {v22.8b}, [x0], x2
st1 {v23.8b}, [x0], x2
st1 {v16.8B}, [x0], x2
st1 {v17.8B}, [x0], x2
st1 {v18.8B}, [x0], x2
st1 {v19.8B}, [x0], x2
st1 {v20.8B}, [x0], x2
st1 {v21.8B}, [x0], x2
st1 {v22.8B}, [x0], x2
st1 {v23.8B}, [x0], x2
ret
endfunc
@@ -334,19 +334,19 @@ function \type\()_h264_qpel16_v_lowpass_l2_neon
endfunc
function \type\()_h264_qpel8_v_lowpass_l2_neon
ld1 {v16.8b}, [x1], x3
ld1 {v18.8b}, [x1], x3
ld1 {v20.8b}, [x1], x3
ld1 {v22.8b}, [x1], x3
ld1 {v24.8b}, [x1], x3
ld1 {v26.8b}, [x1], x3
ld1 {v28.8b}, [x1], x3
ld1 {v30.8b}, [x1], x3
ld1 {v17.8b}, [x1], x3
ld1 {v19.8b}, [x1], x3
ld1 {v21.8b}, [x1], x3
ld1 {v23.8b}, [x1], x3
ld1 {v25.8b}, [x1]
ld1 {v16.8B}, [x1], x3
ld1 {v18.8B}, [x1], x3
ld1 {v20.8B}, [x1], x3
ld1 {v22.8B}, [x1], x3
ld1 {v24.8B}, [x1], x3
ld1 {v26.8B}, [x1], x3
ld1 {v28.8B}, [x1], x3
ld1 {v30.8B}, [x1], x3
ld1 {v17.8B}, [x1], x3
ld1 {v19.8B}, [x1], x3
ld1 {v21.8B}, [x1], x3
ld1 {v23.8B}, [x1], x3
ld1 {v25.8B}, [x1]
transpose_8x8B v16, v18, v20, v22, v24, v26, v28, v30, v0, v1
transpose_8x8B v17, v19, v21, v23, v25, v27, v29, v31, v0, v1
@@ -356,51 +356,51 @@ function \type\()_h264_qpel8_v_lowpass_l2_neon
lowpass_8 v28, v29, v30, v31, v22, v23
transpose_8x8B v16, v17, v18, v19, v20, v21, v22, v23, v0, v1
ld1 {v24.8b}, [x12], x2
ld1 {v25.8b}, [x12], x2
ld1 {v26.8b}, [x12], x2
ld1 {v27.8b}, [x12], x2
ld1 {v28.8b}, [x12], x2
urhadd v16.8b, v24.8b, v16.8b
urhadd v17.8b, v25.8b, v17.8b
ld1 {v29.8b}, [x12], x2
urhadd v18.8b, v26.8b, v18.8b
urhadd v19.8b, v27.8b, v19.8b
ld1 {v30.8b}, [x12], x2
urhadd v20.8b, v28.8b, v20.8b
urhadd v21.8b, v29.8b, v21.8b
ld1 {v31.8b}, [x12], x2
urhadd v22.8b, v30.8b, v22.8b
urhadd v23.8b, v31.8b, v23.8b
ld1 {v24.8B}, [x12], x2
ld1 {v25.8B}, [x12], x2
ld1 {v26.8B}, [x12], x2
ld1 {v27.8B}, [x12], x2
ld1 {v28.8B}, [x12], x2
urhadd v16.8B, v24.8B, v16.8B
urhadd v17.8B, v25.8B, v17.8B
ld1 {v29.8B}, [x12], x2
urhadd v18.8B, v26.8B, v18.8B
urhadd v19.8B, v27.8B, v19.8B
ld1 {v30.8B}, [x12], x2
urhadd v20.8B, v28.8B, v20.8B
urhadd v21.8B, v29.8B, v21.8B
ld1 {v31.8B}, [x12], x2
urhadd v22.8B, v30.8B, v22.8B
urhadd v23.8B, v31.8B, v23.8B
.ifc \type,avg
ld1 {v24.8b}, [x0], x3
urhadd v16.8b, v16.8b, v24.8b
ld1 {v25.8b}, [x0], x3
urhadd v17.8b, v17.8b, v25.8b
ld1 {v26.8b}, [x0], x3
urhadd v18.8b, v18.8b, v26.8b
ld1 {v27.8b}, [x0], x3
urhadd v19.8b, v19.8b, v27.8b
ld1 {v28.8b}, [x0], x3
urhadd v20.8b, v20.8b, v28.8b
ld1 {v29.8b}, [x0], x3
urhadd v21.8b, v21.8b, v29.8b
ld1 {v30.8b}, [x0], x3
urhadd v22.8b, v22.8b, v30.8b
ld1 {v31.8b}, [x0], x3
urhadd v23.8b, v23.8b, v31.8b
ld1 {v24.8B}, [x0], x3
urhadd v16.8B, v16.8B, v24.8B
ld1 {v25.8B}, [x0], x3
urhadd v17.8B, v17.8B, v25.8B
ld1 {v26.8B}, [x0], x3
urhadd v18.8B, v18.8B, v26.8B
ld1 {v27.8B}, [x0], x3
urhadd v19.8B, v19.8B, v27.8B
ld1 {v28.8B}, [x0], x3
urhadd v20.8B, v20.8B, v28.8B
ld1 {v29.8B}, [x0], x3
urhadd v21.8B, v21.8B, v29.8B
ld1 {v30.8B}, [x0], x3
urhadd v22.8B, v22.8B, v30.8B
ld1 {v31.8B}, [x0], x3
urhadd v23.8B, v23.8B, v31.8B
sub x0, x0, x3, lsl #3
.endif
st1 {v16.8b}, [x0], x3
st1 {v17.8b}, [x0], x3
st1 {v18.8b}, [x0], x3
st1 {v19.8b}, [x0], x3
st1 {v20.8b}, [x0], x3
st1 {v21.8b}, [x0], x3
st1 {v22.8b}, [x0], x3
st1 {v23.8b}, [x0], x3
st1 {v16.8B}, [x0], x3
st1 {v17.8B}, [x0], x3
st1 {v18.8B}, [x0], x3
st1 {v19.8B}, [x0], x3
st1 {v20.8B}, [x0], x3
st1 {v21.8B}, [x0], x3
st1 {v22.8B}, [x0], x3
st1 {v23.8B}, [x0], x3
ret
endfunc
@@ -411,19 +411,19 @@ endfunc
function put_h264_qpel8_hv_lowpass_neon_top
lowpass_const w12
ld1 {v16.8h}, [x1], x3
ld1 {v17.8h}, [x1], x3
ld1 {v18.8h}, [x1], x3
ld1 {v19.8h}, [x1], x3
ld1 {v20.8h}, [x1], x3
ld1 {v21.8h}, [x1], x3
ld1 {v22.8h}, [x1], x3
ld1 {v23.8h}, [x1], x3
ld1 {v24.8h}, [x1], x3
ld1 {v25.8h}, [x1], x3
ld1 {v26.8h}, [x1], x3
ld1 {v27.8h}, [x1], x3
ld1 {v28.8h}, [x1]
ld1 {v16.8H}, [x1], x3
ld1 {v17.8H}, [x1], x3
ld1 {v18.8H}, [x1], x3
ld1 {v19.8H}, [x1], x3
ld1 {v20.8H}, [x1], x3
ld1 {v21.8H}, [x1], x3
ld1 {v22.8H}, [x1], x3
ld1 {v23.8H}, [x1], x3
ld1 {v24.8H}, [x1], x3
ld1 {v25.8H}, [x1], x3
ld1 {v26.8H}, [x1], x3
ld1 {v27.8H}, [x1], x3
ld1 {v28.8H}, [x1]
lowpass_8H v16, v17
lowpass_8H v18, v19
lowpass_8H v20, v21
@@ -447,7 +447,7 @@ function put_h264_qpel8_hv_lowpass_neon_top
lowpass_8.16 v22, v30, v22
lowpass_8.16 v23, v31, v23
transpose_8x8B v16, v17, v18, v19, v20, v21, v22, v23, v0, v1
transpose_8x8B v16, v17, v18, v19, v20, v21, v22, v23, v0, v1
ret
endfunc
@@ -457,33 +457,33 @@ function \type\()_h264_qpel8_hv_lowpass_neon
mov x10, x30
bl put_h264_qpel8_hv_lowpass_neon_top
.ifc \type,avg
ld1 {v0.8b}, [x0], x2
urhadd v16.8b, v16.8b, v0.8b
ld1 {v1.8b}, [x0], x2
urhadd v17.8b, v17.8b, v1.8b
ld1 {v2.8b}, [x0], x2
urhadd v18.8b, v18.8b, v2.8b
ld1 {v3.8b}, [x0], x2
urhadd v19.8b, v19.8b, v3.8b
ld1 {v4.8b}, [x0], x2
urhadd v20.8b, v20.8b, v4.8b
ld1 {v5.8b}, [x0], x2
urhadd v21.8b, v21.8b, v5.8b
ld1 {v6.8b}, [x0], x2
urhadd v22.8b, v22.8b, v6.8b
ld1 {v7.8b}, [x0], x2
urhadd v23.8b, v23.8b, v7.8b
ld1 {v0.8B}, [x0], x2
urhadd v16.8B, v16.8B, v0.8B
ld1 {v1.8B}, [x0], x2
urhadd v17.8B, v17.8B, v1.8B
ld1 {v2.8B}, [x0], x2
urhadd v18.8B, v18.8B, v2.8B
ld1 {v3.8B}, [x0], x2
urhadd v19.8B, v19.8B, v3.8B
ld1 {v4.8B}, [x0], x2
urhadd v20.8B, v20.8B, v4.8B
ld1 {v5.8B}, [x0], x2
urhadd v21.8B, v21.8B, v5.8B
ld1 {v6.8B}, [x0], x2
urhadd v22.8B, v22.8B, v6.8B
ld1 {v7.8B}, [x0], x2
urhadd v23.8B, v23.8B, v7.8B
sub x0, x0, x2, lsl #3
.endif
st1 {v16.8b}, [x0], x2
st1 {v17.8b}, [x0], x2
st1 {v18.8b}, [x0], x2
st1 {v19.8b}, [x0], x2
st1 {v20.8b}, [x0], x2
st1 {v21.8b}, [x0], x2
st1 {v22.8b}, [x0], x2
st1 {v23.8b}, [x0], x2
st1 {v16.8B}, [x0], x2
st1 {v17.8B}, [x0], x2
st1 {v18.8B}, [x0], x2
st1 {v19.8B}, [x0], x2
st1 {v20.8B}, [x0], x2
st1 {v21.8B}, [x0], x2
st1 {v22.8B}, [x0], x2
st1 {v23.8B}, [x0], x2
ret x10
endfunc
@@ -497,45 +497,45 @@ function \type\()_h264_qpel8_hv_lowpass_l2_neon
mov x10, x30
bl put_h264_qpel8_hv_lowpass_neon_top
ld1 {v0.8b, v1.8b}, [x2], #16
ld1 {v2.8b, v3.8b}, [x2], #16
urhadd v0.8b, v0.8b, v16.8b
urhadd v1.8b, v1.8b, v17.8b
ld1 {v4.8b, v5.8b}, [x2], #16
urhadd v2.8b, v2.8b, v18.8b
urhadd v3.8b, v3.8b, v19.8b
ld1 {v6.8b, v7.8b}, [x2], #16
urhadd v4.8b, v4.8b, v20.8b
urhadd v5.8b, v5.8b, v21.8b
urhadd v6.8b, v6.8b, v22.8b
urhadd v7.8b, v7.8b, v23.8b
ld1 {v0.8B, v1.8B}, [x2], #16
ld1 {v2.8B, v3.8B}, [x2], #16
urhadd v0.8B, v0.8B, v16.8B
urhadd v1.8B, v1.8B, v17.8B
ld1 {v4.8B, v5.8B}, [x2], #16
urhadd v2.8B, v2.8B, v18.8B
urhadd v3.8B, v3.8B, v19.8B
ld1 {v6.8B, v7.8B}, [x2], #16
urhadd v4.8B, v4.8B, v20.8B
urhadd v5.8B, v5.8B, v21.8B
urhadd v6.8B, v6.8B, v22.8B
urhadd v7.8B, v7.8B, v23.8B
.ifc \type,avg
ld1 {v16.8b}, [x0], x3
urhadd v0.8b, v0.8b, v16.8b
ld1 {v17.8b}, [x0], x3
urhadd v1.8b, v1.8b, v17.8b
ld1 {v18.8b}, [x0], x3
urhadd v2.8b, v2.8b, v18.8b
ld1 {v19.8b}, [x0], x3
urhadd v3.8b, v3.8b, v19.8b
ld1 {v20.8b}, [x0], x3
urhadd v4.8b, v4.8b, v20.8b
ld1 {v21.8b}, [x0], x3
urhadd v5.8b, v5.8b, v21.8b
ld1 {v22.8b}, [x0], x3
urhadd v6.8b, v6.8b, v22.8b
ld1 {v23.8b}, [x0], x3
urhadd v7.8b, v7.8b, v23.8b
ld1 {v16.8B}, [x0], x3
urhadd v0.8B, v0.8B, v16.8B
ld1 {v17.8B}, [x0], x3
urhadd v1.8B, v1.8B, v17.8B
ld1 {v18.8B}, [x0], x3
urhadd v2.8B, v2.8B, v18.8B
ld1 {v19.8B}, [x0], x3
urhadd v3.8B, v3.8B, v19.8B
ld1 {v20.8B}, [x0], x3
urhadd v4.8B, v4.8B, v20.8B
ld1 {v21.8B}, [x0], x3
urhadd v5.8B, v5.8B, v21.8B
ld1 {v22.8B}, [x0], x3
urhadd v6.8B, v6.8B, v22.8B
ld1 {v23.8B}, [x0], x3
urhadd v7.8B, v7.8B, v23.8B
sub x0, x0, x3, lsl #3
.endif
st1 {v0.8b}, [x0], x3
st1 {v1.8b}, [x0], x3
st1 {v2.8b}, [x0], x3
st1 {v3.8b}, [x0], x3
st1 {v4.8b}, [x0], x3
st1 {v5.8b}, [x0], x3
st1 {v6.8b}, [x0], x3
st1 {v7.8b}, [x0], x3
st1 {v0.8B}, [x0], x3
st1 {v1.8B}, [x0], x3
st1 {v2.8B}, [x0], x3
st1 {v3.8B}, [x0], x3
st1 {v4.8B}, [x0], x3
st1 {v5.8B}, [x0], x3
st1 {v6.8B}, [x0], x3
st1 {v7.8B}, [x0], x3
ret x10
endfunc
@@ -579,8 +579,8 @@ function \type\()_h264_qpel16_hv_lowpass_l2_neon
endfunc
.endm
h264_qpel16_hv put
h264_qpel16_hv avg
h264_qpel16_hv put
h264_qpel16_hv avg
.macro h264_qpel8 type
function ff_\type\()_h264_qpel8_mc10_neon, export=1
@@ -758,8 +758,8 @@ function ff_\type\()_h264_qpel8_mc33_neon, export=1
endfunc
.endm
h264_qpel8 put
h264_qpel8 avg
h264_qpel8 put
h264_qpel8 avg
.macro h264_qpel16 type
function ff_\type\()_h264_qpel16_mc10_neon, export=1
@@ -930,5 +930,5 @@ function ff_\type\()_h264_qpel16_mc33_neon, export=1
endfunc
.endm
h264_qpel16 put
h264_qpel16 avg
h264_qpel16 put
h264_qpel16 avg
+181 -181
View File
@@ -26,295 +26,295 @@
.if \avg
mov x12, x0
.endif
1: ld1 {v0.16b}, [x1], x2
ld1 {v1.16b}, [x1], x2
ld1 {v2.16b}, [x1], x2
ld1 {v3.16b}, [x1], x2
1: ld1 {v0.16B}, [x1], x2
ld1 {v1.16B}, [x1], x2
ld1 {v2.16B}, [x1], x2
ld1 {v3.16B}, [x1], x2
.if \avg
ld1 {v4.16b}, [x12], x2
urhadd v0.16b, v0.16b, v4.16b
ld1 {v5.16b}, [x12], x2
urhadd v1.16b, v1.16b, v5.16b
ld1 {v6.16b}, [x12], x2
urhadd v2.16b, v2.16b, v6.16b
ld1 {v7.16b}, [x12], x2
urhadd v3.16b, v3.16b, v7.16b
ld1 {v4.16B}, [x12], x2
urhadd v0.16B, v0.16B, v4.16B
ld1 {v5.16B}, [x12], x2
urhadd v1.16B, v1.16B, v5.16B
ld1 {v6.16B}, [x12], x2
urhadd v2.16B, v2.16B, v6.16B
ld1 {v7.16B}, [x12], x2
urhadd v3.16B, v3.16B, v7.16B
.endif
subs w3, w3, #4
st1 {v0.16b}, [x0], x2
st1 {v1.16b}, [x0], x2
st1 {v2.16b}, [x0], x2
st1 {v3.16b}, [x0], x2
st1 {v0.16B}, [x0], x2
st1 {v1.16B}, [x0], x2
st1 {v2.16B}, [x0], x2
st1 {v3.16B}, [x0], x2
b.ne 1b
ret
.endm
.macro pixels16_x2 rnd=1, avg=0
1: ld1 {v0.16b, v1.16b}, [x1], x2
ld1 {v2.16b, v3.16b}, [x1], x2
1: ld1 {v0.16B, v1.16B}, [x1], x2
ld1 {v2.16B, v3.16B}, [x1], x2
subs w3, w3, #2
ext v1.16b, v0.16b, v1.16b, #1
avg v0.16b, v0.16b, v1.16b
ext v3.16b, v2.16b, v3.16b, #1
avg v2.16b, v2.16b, v3.16b
ext v1.16B, v0.16B, v1.16B, #1
avg v0.16B, v0.16B, v1.16B
ext v3.16B, v2.16B, v3.16B, #1
avg v2.16B, v2.16B, v3.16B
.if \avg
ld1 {v1.16b}, [x0], x2
ld1 {v3.16b}, [x0]
urhadd v0.16b, v0.16b, v1.16b
urhadd v2.16b, v2.16b, v3.16b
ld1 {v1.16B}, [x0], x2
ld1 {v3.16B}, [x0]
urhadd v0.16B, v0.16B, v1.16B
urhadd v2.16B, v2.16B, v3.16B
sub x0, x0, x2
.endif
st1 {v0.16b}, [x0], x2
st1 {v2.16b}, [x0], x2
st1 {v0.16B}, [x0], x2
st1 {v2.16B}, [x0], x2
b.ne 1b
ret
.endm
.macro pixels16_y2 rnd=1, avg=0
sub w3, w3, #2
ld1 {v0.16b}, [x1], x2
ld1 {v1.16b}, [x1], x2
ld1 {v0.16B}, [x1], x2
ld1 {v1.16B}, [x1], x2
1: subs w3, w3, #2
avg v2.16b, v0.16b, v1.16b
ld1 {v0.16b}, [x1], x2
avg v3.16b, v0.16b, v1.16b
ld1 {v1.16b}, [x1], x2
avg v2.16B, v0.16B, v1.16B
ld1 {v0.16B}, [x1], x2
avg v3.16B, v0.16B, v1.16B
ld1 {v1.16B}, [x1], x2
.if \avg
ld1 {v4.16b}, [x0], x2
ld1 {v5.16b}, [x0]
urhadd v2.16b, v2.16b, v4.16b
urhadd v3.16b, v3.16b, v5.16b
ld1 {v4.16B}, [x0], x2
ld1 {v5.16B}, [x0]
urhadd v2.16B, v2.16B, v4.16B
urhadd v3.16B, v3.16B, v5.16B
sub x0, x0, x2
.endif
st1 {v2.16b}, [x0], x2
st1 {v3.16b}, [x0], x2
st1 {v2.16B}, [x0], x2
st1 {v3.16B}, [x0], x2
b.ne 1b
avg v2.16b, v0.16b, v1.16b
ld1 {v0.16b}, [x1], x2
avg v3.16b, v0.16b, v1.16b
avg v2.16B, v0.16B, v1.16B
ld1 {v0.16B}, [x1], x2
avg v3.16B, v0.16B, v1.16B
.if \avg
ld1 {v4.16b}, [x0], x2
ld1 {v5.16b}, [x0]
urhadd v2.16b, v2.16b, v4.16b
urhadd v3.16b, v3.16b, v5.16b
ld1 {v4.16B}, [x0], x2
ld1 {v5.16B}, [x0]
urhadd v2.16B, v2.16B, v4.16B
urhadd v3.16B, v3.16B, v5.16B
sub x0, x0, x2
.endif
st1 {v2.16b}, [x0], x2
st1 {v3.16b}, [x0], x2
st1 {v2.16B}, [x0], x2
st1 {v3.16B}, [x0], x2
ret
.endm
.macro pixels16_xy2 rnd=1, avg=0
sub w3, w3, #2
ld1 {v0.16b, v1.16b}, [x1], x2
ld1 {v4.16b, v5.16b}, [x1], x2
ld1 {v0.16B, v1.16B}, [x1], x2
ld1 {v4.16B, v5.16B}, [x1], x2
NRND movi v26.8H, #1
ext v1.16b, v0.16b, v1.16b, #1
ext v5.16b, v4.16b, v5.16b, #1
uaddl v16.8h, v0.8b, v1.8b
uaddl2 v20.8h, v0.16b, v1.16b
uaddl v18.8h, v4.8b, v5.8b
uaddl2 v22.8h, v4.16b, v5.16b
ext v1.16B, v0.16B, v1.16B, #1
ext v5.16B, v4.16B, v5.16B, #1
uaddl v16.8H, v0.8B, v1.8B
uaddl2 v20.8H, v0.16B, v1.16B
uaddl v18.8H, v4.8B, v5.8B
uaddl2 v22.8H, v4.16B, v5.16B
1: subs w3, w3, #2
ld1 {v0.16b, v1.16b}, [x1], x2
add v24.8h, v16.8h, v18.8h
ld1 {v0.16B, v1.16B}, [x1], x2
add v24.8H, v16.8H, v18.8H
NRND add v24.8H, v24.8H, v26.8H
ext v30.16b, v0.16b, v1.16b, #1
add v1.8h, v20.8h, v22.8h
mshrn v28.8b, v24.8h, #2
ext v30.16B, v0.16B, v1.16B, #1
add v1.8H, v20.8H, v22.8H
mshrn v28.8B, v24.8H, #2
NRND add v1.8H, v1.8H, v26.8H
mshrn2 v28.16b, v1.8h, #2
mshrn2 v28.16B, v1.8H, #2
.if \avg
ld1 {v16.16b}, [x0]
urhadd v28.16b, v28.16b, v16.16b
ld1 {v16.16B}, [x0]
urhadd v28.16B, v28.16B, v16.16B
.endif
uaddl v16.8h, v0.8b, v30.8b
ld1 {v2.16b, v3.16b}, [x1], x2
uaddl2 v20.8h, v0.16b, v30.16b
st1 {v28.16b}, [x0], x2
add v24.8h, v16.8h, v18.8h
uaddl v16.8H, v0.8B, v30.8B
ld1 {v2.16B, v3.16B}, [x1], x2
uaddl2 v20.8H, v0.16B, v30.16B
st1 {v28.16B}, [x0], x2
add v24.8H, v16.8H, v18.8H
NRND add v24.8H, v24.8H, v26.8H
ext v3.16b, v2.16b, v3.16b, #1
add v0.8h, v20.8h, v22.8h
mshrn v30.8b, v24.8h, #2
ext v3.16B, v2.16B, v3.16B, #1
add v0.8H, v20.8H, v22.8H
mshrn v30.8B, v24.8H, #2
NRND add v0.8H, v0.8H, v26.8H
mshrn2 v30.16b, v0.8h, #2
mshrn2 v30.16B, v0.8H, #2
.if \avg
ld1 {v18.16b}, [x0]
urhadd v30.16b, v30.16b, v18.16b
ld1 {v18.16B}, [x0]
urhadd v30.16B, v30.16B, v18.16B
.endif
uaddl v18.8h, v2.8b, v3.8b
uaddl2 v22.8h, v2.16b, v3.16b
st1 {v30.16b}, [x0], x2
uaddl v18.8H, v2.8B, v3.8B
uaddl2 v22.8H, v2.16B, v3.16B
st1 {v30.16B}, [x0], x2
b.gt 1b
ld1 {v0.16b, v1.16b}, [x1], x2
add v24.8h, v16.8h, v18.8h
ld1 {v0.16B, v1.16B}, [x1], x2
add v24.8H, v16.8H, v18.8H
NRND add v24.8H, v24.8H, v26.8H
ext v30.16b, v0.16b, v1.16b, #1
add v1.8h, v20.8h, v22.8h
mshrn v28.8b, v24.8h, #2
ext v30.16B, v0.16B, v1.16B, #1
add v1.8H, v20.8H, v22.8H
mshrn v28.8B, v24.8H, #2
NRND add v1.8H, v1.8H, v26.8H
mshrn2 v28.16b, v1.8h, #2
mshrn2 v28.16B, v1.8H, #2
.if \avg
ld1 {v16.16b}, [x0]
urhadd v28.16b, v28.16b, v16.16b
ld1 {v16.16B}, [x0]
urhadd v28.16B, v28.16B, v16.16B
.endif
uaddl v16.8h, v0.8b, v30.8b
uaddl2 v20.8h, v0.16b, v30.16b
st1 {v28.16b}, [x0], x2
add v24.8h, v16.8h, v18.8h
uaddl v16.8H, v0.8B, v30.8B
uaddl2 v20.8H, v0.16B, v30.16B
st1 {v28.16B}, [x0], x2
add v24.8H, v16.8H, v18.8H
NRND add v24.8H, v24.8H, v26.8H
add v0.8h, v20.8h, v22.8h
mshrn v30.8b, v24.8h, #2
add v0.8H, v20.8H, v22.8H
mshrn v30.8B, v24.8H, #2
NRND add v0.8H, v0.8H, v26.8H
mshrn2 v30.16b, v0.8h, #2
mshrn2 v30.16B, v0.8H, #2
.if \avg
ld1 {v18.16b}, [x0]
urhadd v30.16b, v30.16b, v18.16b
ld1 {v18.16B}, [x0]
urhadd v30.16B, v30.16B, v18.16B
.endif
st1 {v30.16b}, [x0], x2
st1 {v30.16B}, [x0], x2
ret
.endm
.macro pixels8 rnd=1, avg=0
1: ld1 {v0.8b}, [x1], x2
ld1 {v1.8b}, [x1], x2
ld1 {v2.8b}, [x1], x2
ld1 {v3.8b}, [x1], x2
1: ld1 {v0.8B}, [x1], x2
ld1 {v1.8B}, [x1], x2
ld1 {v2.8B}, [x1], x2
ld1 {v3.8B}, [x1], x2
.if \avg
ld1 {v4.8b}, [x0], x2
urhadd v0.8b, v0.8b, v4.8b
ld1 {v5.8b}, [x0], x2
urhadd v1.8b, v1.8b, v5.8b
ld1 {v6.8b}, [x0], x2
urhadd v2.8b, v2.8b, v6.8b
ld1 {v7.8b}, [x0], x2
urhadd v3.8b, v3.8b, v7.8b
ld1 {v4.8B}, [x0], x2
urhadd v0.8B, v0.8B, v4.8B
ld1 {v5.8B}, [x0], x2
urhadd v1.8B, v1.8B, v5.8B
ld1 {v6.8B}, [x0], x2
urhadd v2.8B, v2.8B, v6.8B
ld1 {v7.8B}, [x0], x2
urhadd v3.8B, v3.8B, v7.8B
sub x0, x0, x2, lsl #2
.endif
subs w3, w3, #4
st1 {v0.8b}, [x0], x2
st1 {v1.8b}, [x0], x2
st1 {v2.8b}, [x0], x2
st1 {v3.8b}, [x0], x2
st1 {v0.8B}, [x0], x2
st1 {v1.8B}, [x0], x2
st1 {v2.8B}, [x0], x2
st1 {v3.8B}, [x0], x2
b.ne 1b
ret
.endm
.macro pixels8_x2 rnd=1, avg=0
1: ld1 {v0.8b, v1.8b}, [x1], x2
ext v1.8b, v0.8b, v1.8b, #1
ld1 {v2.8b, v3.8b}, [x1], x2
ext v3.8b, v2.8b, v3.8b, #1
1: ld1 {v0.8B, v1.8B}, [x1], x2
ext v1.8B, v0.8B, v1.8B, #1
ld1 {v2.8B, v3.8B}, [x1], x2
ext v3.8B, v2.8B, v3.8B, #1
subs w3, w3, #2
avg v0.8b, v0.8b, v1.8b
avg v2.8b, v2.8b, v3.8b
avg v0.8B, v0.8B, v1.8B
avg v2.8B, v2.8B, v3.8B
.if \avg
ld1 {v4.8b}, [x0], x2
ld1 {v5.8b}, [x0]
urhadd v0.8b, v0.8b, v4.8b
urhadd v2.8b, v2.8b, v5.8b
ld1 {v4.8B}, [x0], x2
ld1 {v5.8B}, [x0]
urhadd v0.8B, v0.8B, v4.8B
urhadd v2.8B, v2.8B, v5.8B
sub x0, x0, x2
.endif
st1 {v0.8b}, [x0], x2
st1 {v2.8b}, [x0], x2
st1 {v0.8B}, [x0], x2
st1 {v2.8B}, [x0], x2
b.ne 1b
ret
.endm
.macro pixels8_y2 rnd=1, avg=0
sub w3, w3, #2
ld1 {v0.8b}, [x1], x2
ld1 {v1.8b}, [x1], x2
ld1 {v0.8B}, [x1], x2
ld1 {v1.8B}, [x1], x2
1: subs w3, w3, #2
avg v4.8b, v0.8b, v1.8b
ld1 {v0.8b}, [x1], x2
avg v5.8b, v0.8b, v1.8b
ld1 {v1.8b}, [x1], x2
avg v4.8B, v0.8B, v1.8B
ld1 {v0.8B}, [x1], x2
avg v5.8B, v0.8B, v1.8B
ld1 {v1.8B}, [x1], x2
.if \avg
ld1 {v2.8b}, [x0], x2
ld1 {v3.8b}, [x0]
urhadd v4.8b, v4.8b, v2.8b
urhadd v5.8b, v5.8b, v3.8b
ld1 {v2.8B}, [x0], x2
ld1 {v3.8B}, [x0]
urhadd v4.8B, v4.8B, v2.8B
urhadd v5.8B, v5.8B, v3.8B
sub x0, x0, x2
.endif
st1 {v4.8b}, [x0], x2
st1 {v5.8b}, [x0], x2
st1 {v4.8B}, [x0], x2
st1 {v5.8B}, [x0], x2
b.ne 1b
avg v4.8b, v0.8b, v1.8b
ld1 {v0.8b}, [x1], x2
avg v5.8b, v0.8b, v1.8b
avg v4.8B, v0.8B, v1.8B
ld1 {v0.8B}, [x1], x2
avg v5.8B, v0.8B, v1.8B
.if \avg
ld1 {v2.8b}, [x0], x2
ld1 {v3.8b}, [x0]
urhadd v4.8b, v4.8b, v2.8b
urhadd v5.8b, v5.8b, v3.8b
ld1 {v2.8B}, [x0], x2
ld1 {v3.8B}, [x0]
urhadd v4.8B, v4.8B, v2.8B
urhadd v5.8B, v5.8B, v3.8B
sub x0, x0, x2
.endif
st1 {v4.8b}, [x0], x2
st1 {v5.8b}, [x0], x2
st1 {v4.8B}, [x0], x2
st1 {v5.8B}, [x0], x2
ret
.endm
.macro pixels8_xy2 rnd=1, avg=0
sub w3, w3, #2
ld1 {v0.16b}, [x1], x2
ld1 {v1.16b}, [x1], x2
ld1 {v0.16B}, [x1], x2
ld1 {v1.16B}, [x1], x2
NRND movi v19.8H, #1
ext v4.16b, v0.16b, v4.16b, #1
ext v6.16b, v1.16b, v6.16b, #1
uaddl v16.8h, v0.8b, v4.8b
uaddl v17.8h, v1.8b, v6.8b
ext v4.16B, v0.16B, v4.16B, #1
ext v6.16B, v1.16B, v6.16B, #1
uaddl v16.8H, v0.8B, v4.8B
uaddl v17.8H, v1.8B, v6.8B
1: subs w3, w3, #2
ld1 {v0.16b}, [x1], x2
add v18.8h, v16.8h, v17.8h
ext v4.16b, v0.16b, v4.16b, #1
ld1 {v0.16B}, [x1], x2
add v18.8H, v16.8H, v17.8H
ext v4.16B, v0.16B, v4.16B, #1
NRND add v18.8H, v18.8H, v19.8H
uaddl v16.8h, v0.8b, v4.8b
mshrn v5.8b, v18.8h, #2
ld1 {v1.16b}, [x1], x2
add v18.8h, v16.8h, v17.8h
uaddl v16.8H, v0.8B, v4.8B
mshrn v5.8B, v18.8H, #2
ld1 {v1.16B}, [x1], x2
add v18.8H, v16.8H, v17.8H
.if \avg
ld1 {v7.8b}, [x0]
urhadd v5.8b, v5.8b, v7.8b
ld1 {v7.8B}, [x0]
urhadd v5.8B, v5.8B, v7.8B
.endif
NRND add v18.8H, v18.8H, v19.8H
st1 {v5.8b}, [x0], x2
mshrn v7.8b, v18.8h, #2
st1 {v5.8B}, [x0], x2
mshrn v7.8B, v18.8H, #2
.if \avg
ld1 {v5.8b}, [x0]
urhadd v7.8b, v7.8b, v5.8b
ld1 {v5.8B}, [x0]
urhadd v7.8B, v7.8B, v5.8B
.endif
ext v6.16b, v1.16b, v6.16b, #1
uaddl v17.8h, v1.8b, v6.8b
st1 {v7.8b}, [x0], x2
ext v6.16B, v1.16B, v6.16B, #1
uaddl v17.8H, v1.8B, v6.8B
st1 {v7.8B}, [x0], x2
b.gt 1b
ld1 {v0.16b}, [x1], x2
add v18.8h, v16.8h, v17.8h
ext v4.16b, v0.16b, v4.16b, #1
ld1 {v0.16B}, [x1], x2
add v18.8H, v16.8H, v17.8H
ext v4.16B, v0.16B, v4.16B, #1
NRND add v18.8H, v18.8H, v19.8H
uaddl v16.8h, v0.8b, v4.8b
mshrn v5.8b, v18.8h, #2
add v18.8h, v16.8h, v17.8h
uaddl v16.8H, v0.8B, v4.8B
mshrn v5.8B, v18.8H, #2
add v18.8H, v16.8H, v17.8H
.if \avg
ld1 {v7.8b}, [x0]
urhadd v5.8b, v5.8b, v7.8b
ld1 {v7.8B}, [x0]
urhadd v5.8B, v5.8B, v7.8B
.endif
NRND add v18.8H, v18.8H, v19.8H
st1 {v5.8b}, [x0], x2
mshrn v7.8b, v18.8h, #2
st1 {v5.8B}, [x0], x2
mshrn v7.8B, v18.8H, #2
.if \avg
ld1 {v5.8b}, [x0]
urhadd v7.8b, v7.8b, v5.8b
ld1 {v5.8B}, [x0]
urhadd v7.8B, v7.8B, v5.8B
.endif
st1 {v7.8b}, [x0], x2
st1 {v7.8B}, [x0], x2
ret
.endm
-1
View File
@@ -19,7 +19,6 @@
#ifndef AVCODEC_AARCH64_IDCT_H
#define AVCODEC_AARCH64_IDCT_H
#include <stddef.h>
#include <stdint.h>
void ff_simple_idct_neon(int16_t *data);
+96 -96
View File
@@ -17,133 +17,133 @@
*/
.macro transpose_8x8B r0, r1, r2, r3, r4, r5, r6, r7, r8, r9
trn1 \r8\().8b, \r0\().8b, \r1\().8b
trn2 \r9\().8b, \r0\().8b, \r1\().8b
trn1 \r1\().8b, \r2\().8b, \r3\().8b
trn2 \r3\().8b, \r2\().8b, \r3\().8b
trn1 \r0\().8b, \r4\().8b, \r5\().8b
trn2 \r5\().8b, \r4\().8b, \r5\().8b
trn1 \r2\().8b, \r6\().8b, \r7\().8b
trn2 \r7\().8b, \r6\().8b, \r7\().8b
trn1 \r8\().8B, \r0\().8B, \r1\().8B
trn2 \r9\().8B, \r0\().8B, \r1\().8B
trn1 \r1\().8B, \r2\().8B, \r3\().8B
trn2 \r3\().8B, \r2\().8B, \r3\().8B
trn1 \r0\().8B, \r4\().8B, \r5\().8B
trn2 \r5\().8B, \r4\().8B, \r5\().8B
trn1 \r2\().8B, \r6\().8B, \r7\().8B
trn2 \r7\().8B, \r6\().8B, \r7\().8B
trn1 \r4\().4h, \r0\().4h, \r2\().4h
trn2 \r2\().4h, \r0\().4h, \r2\().4h
trn1 \r6\().4h, \r5\().4h, \r7\().4h
trn2 \r7\().4h, \r5\().4h, \r7\().4h
trn1 \r5\().4h, \r9\().4h, \r3\().4h
trn2 \r9\().4h, \r9\().4h, \r3\().4h
trn1 \r3\().4h, \r8\().4h, \r1\().4h
trn2 \r8\().4h, \r8\().4h, \r1\().4h
trn1 \r4\().4H, \r0\().4H, \r2\().4H
trn2 \r2\().4H, \r0\().4H, \r2\().4H
trn1 \r6\().4H, \r5\().4H, \r7\().4H
trn2 \r7\().4H, \r5\().4H, \r7\().4H
trn1 \r5\().4H, \r9\().4H, \r3\().4H
trn2 \r9\().4H, \r9\().4H, \r3\().4H
trn1 \r3\().4H, \r8\().4H, \r1\().4H
trn2 \r8\().4H, \r8\().4H, \r1\().4H
trn1 \r0\().2s, \r3\().2s, \r4\().2s
trn2 \r4\().2s, \r3\().2s, \r4\().2s
trn1 \r0\().2S, \r3\().2S, \r4\().2S
trn2 \r4\().2S, \r3\().2S, \r4\().2S
trn1 \r1\().2s, \r5\().2s, \r6\().2s
trn2 \r5\().2s, \r5\().2s, \r6\().2s
trn1 \r1\().2S, \r5\().2S, \r6\().2S
trn2 \r5\().2S, \r5\().2S, \r6\().2S
trn2 \r6\().2s, \r8\().2s, \r2\().2s
trn1 \r2\().2s, \r8\().2s, \r2\().2s
trn2 \r6\().2S, \r8\().2S, \r2\().2S
trn1 \r2\().2S, \r8\().2S, \r2\().2S
trn1 \r3\().2s, \r9\().2s, \r7\().2s
trn2 \r7\().2s, \r9\().2s, \r7\().2s
trn1 \r3\().2S, \r9\().2S, \r7\().2S
trn2 \r7\().2S, \r9\().2S, \r7\().2S
.endm
.macro transpose_8x16B r0, r1, r2, r3, r4, r5, r6, r7, t0, t1
trn1 \t0\().16b, \r0\().16b, \r1\().16b
trn2 \t1\().16b, \r0\().16b, \r1\().16b
trn1 \r1\().16b, \r2\().16b, \r3\().16b
trn2 \r3\().16b, \r2\().16b, \r3\().16b
trn1 \r0\().16b, \r4\().16b, \r5\().16b
trn2 \r5\().16b, \r4\().16b, \r5\().16b
trn1 \r2\().16b, \r6\().16b, \r7\().16b
trn2 \r7\().16b, \r6\().16b, \r7\().16b
trn1 \t0\().16B, \r0\().16B, \r1\().16B
trn2 \t1\().16B, \r0\().16B, \r1\().16B
trn1 \r1\().16B, \r2\().16B, \r3\().16B
trn2 \r3\().16B, \r2\().16B, \r3\().16B
trn1 \r0\().16B, \r4\().16B, \r5\().16B
trn2 \r5\().16B, \r4\().16B, \r5\().16B
trn1 \r2\().16B, \r6\().16B, \r7\().16B
trn2 \r7\().16B, \r6\().16B, \r7\().16B
trn1 \r4\().8h, \r0\().8h, \r2\().8h
trn2 \r2\().8h, \r0\().8h, \r2\().8h
trn1 \r6\().8h, \r5\().8h, \r7\().8h
trn2 \r7\().8h, \r5\().8h, \r7\().8h
trn1 \r5\().8h, \t1\().8h, \r3\().8h
trn2 \t1\().8h, \t1\().8h, \r3\().8h
trn1 \r3\().8h, \t0\().8h, \r1\().8h
trn2 \t0\().8h, \t0\().8h, \r1\().8h
trn1 \r4\().8H, \r0\().8H, \r2\().8H
trn2 \r2\().8H, \r0\().8H, \r2\().8H
trn1 \r6\().8H, \r5\().8H, \r7\().8H
trn2 \r7\().8H, \r5\().8H, \r7\().8H
trn1 \r5\().8H, \t1\().8H, \r3\().8H
trn2 \t1\().8H, \t1\().8H, \r3\().8H
trn1 \r3\().8H, \t0\().8H, \r1\().8H
trn2 \t0\().8H, \t0\().8H, \r1\().8H
trn1 \r0\().4s, \r3\().4s, \r4\().4s
trn2 \r4\().4s, \r3\().4s, \r4\().4s
trn1 \r0\().4S, \r3\().4S, \r4\().4S
trn2 \r4\().4S, \r3\().4S, \r4\().4S
trn1 \r1\().4s, \r5\().4s, \r6\().4s
trn2 \r5\().4s, \r5\().4s, \r6\().4s
trn1 \r1\().4S, \r5\().4S, \r6\().4S
trn2 \r5\().4S, \r5\().4S, \r6\().4S
trn2 \r6\().4s, \t0\().4s, \r2\().4s
trn1 \r2\().4s, \t0\().4s, \r2\().4s
trn2 \r6\().4S, \t0\().4S, \r2\().4S
trn1 \r2\().4S, \t0\().4S, \r2\().4S
trn1 \r3\().4s, \t1\().4s, \r7\().4s
trn2 \r7\().4s, \t1\().4s, \r7\().4s
trn1 \r3\().4S, \t1\().4S, \r7\().4S
trn2 \r7\().4S, \t1\().4S, \r7\().4S
.endm
.macro transpose_4x16B r0, r1, r2, r3, t4, t5, t6, t7
trn1 \t4\().16b, \r0\().16b, \r1\().16b
trn2 \t5\().16b, \r0\().16b, \r1\().16b
trn1 \t6\().16b, \r2\().16b, \r3\().16b
trn2 \t7\().16b, \r2\().16b, \r3\().16b
trn1 \t4\().16B, \r0\().16B, \r1\().16B
trn2 \t5\().16B, \r0\().16B, \r1\().16B
trn1 \t6\().16B, \r2\().16B, \r3\().16B
trn2 \t7\().16B, \r2\().16B, \r3\().16B
trn1 \r0\().8h, \t4\().8h, \t6\().8h
trn2 \r2\().8h, \t4\().8h, \t6\().8h
trn1 \r1\().8h, \t5\().8h, \t7\().8h
trn2 \r3\().8h, \t5\().8h, \t7\().8h
trn1 \r0\().8H, \t4\().8H, \t6\().8H
trn2 \r2\().8H, \t4\().8H, \t6\().8H
trn1 \r1\().8H, \t5\().8H, \t7\().8H
trn2 \r3\().8H, \t5\().8H, \t7\().8H
.endm
.macro transpose_4x8B r0, r1, r2, r3, t4, t5, t6, t7
trn1 \t4\().8b, \r0\().8b, \r1\().8b
trn2 \t5\().8b, \r0\().8b, \r1\().8b
trn1 \t6\().8b, \r2\().8b, \r3\().8b
trn2 \t7\().8b, \r2\().8b, \r3\().8b
trn1 \t4\().8B, \r0\().8B, \r1\().8B
trn2 \t5\().8B, \r0\().8B, \r1\().8B
trn1 \t6\().8B, \r2\().8B, \r3\().8B
trn2 \t7\().8B, \r2\().8B, \r3\().8B
trn1 \r0\().4h, \t4\().4h, \t6\().4h
trn2 \r2\().4h, \t4\().4h, \t6\().4h
trn1 \r1\().4h, \t5\().4h, \t7\().4h
trn2 \r3\().4h, \t5\().4h, \t7\().4h
trn1 \r0\().4H, \t4\().4H, \t6\().4H
trn2 \r2\().4H, \t4\().4H, \t6\().4H
trn1 \r1\().4H, \t5\().4H, \t7\().4H
trn2 \r3\().4H, \t5\().4H, \t7\().4H
.endm
.macro transpose_4x4H r0, r1, r2, r3, r4, r5, r6, r7
trn1 \r4\().4h, \r0\().4h, \r1\().4h
trn2 \r5\().4h, \r0\().4h, \r1\().4h
trn1 \r6\().4h, \r2\().4h, \r3\().4h
trn2 \r7\().4h, \r2\().4h, \r3\().4h
trn1 \r0\().2s, \r4\().2s, \r6\().2s
trn2 \r2\().2s, \r4\().2s, \r6\().2s
trn1 \r1\().2s, \r5\().2s, \r7\().2s
trn2 \r3\().2s, \r5\().2s, \r7\().2s
trn1 \r4\().4H, \r0\().4H, \r1\().4H
trn2 \r5\().4H, \r0\().4H, \r1\().4H
trn1 \r6\().4H, \r2\().4H, \r3\().4H
trn2 \r7\().4H, \r2\().4H, \r3\().4H
trn1 \r0\().2S, \r4\().2S, \r6\().2S
trn2 \r2\().2S, \r4\().2S, \r6\().2S
trn1 \r1\().2S, \r5\().2S, \r7\().2S
trn2 \r3\().2S, \r5\().2S, \r7\().2S
.endm
.macro transpose_8x8H r0, r1, r2, r3, r4, r5, r6, r7, r8, r9
trn1 \r8\().8h, \r0\().8h, \r1\().8h
trn2 \r9\().8h, \r0\().8h, \r1\().8h
trn1 \r1\().8h, \r2\().8h, \r3\().8h
trn2 \r3\().8h, \r2\().8h, \r3\().8h
trn1 \r0\().8h, \r4\().8h, \r5\().8h
trn2 \r5\().8h, \r4\().8h, \r5\().8h
trn1 \r2\().8h, \r6\().8h, \r7\().8h
trn2 \r7\().8h, \r6\().8h, \r7\().8h
trn1 \r8\().8H, \r0\().8H, \r1\().8H
trn2 \r9\().8H, \r0\().8H, \r1\().8H
trn1 \r1\().8H, \r2\().8H, \r3\().8H
trn2 \r3\().8H, \r2\().8H, \r3\().8H
trn1 \r0\().8H, \r4\().8H, \r5\().8H
trn2 \r5\().8H, \r4\().8H, \r5\().8H
trn1 \r2\().8H, \r6\().8H, \r7\().8H
trn2 \r7\().8H, \r6\().8H, \r7\().8H
trn1 \r4\().4s, \r0\().4s, \r2\().4s
trn2 \r2\().4s, \r0\().4s, \r2\().4s
trn1 \r6\().4s, \r5\().4s, \r7\().4s
trn2 \r7\().4s, \r5\().4s, \r7\().4s
trn1 \r5\().4s, \r9\().4s, \r3\().4s
trn2 \r9\().4s, \r9\().4s, \r3\().4s
trn1 \r3\().4s, \r8\().4s, \r1\().4s
trn2 \r8\().4s, \r8\().4s, \r1\().4s
trn1 \r4\().4S, \r0\().4S, \r2\().4S
trn2 \r2\().4S, \r0\().4S, \r2\().4S
trn1 \r6\().4S, \r5\().4S, \r7\().4S
trn2 \r7\().4S, \r5\().4S, \r7\().4S
trn1 \r5\().4S, \r9\().4S, \r3\().4S
trn2 \r9\().4S, \r9\().4S, \r3\().4S
trn1 \r3\().4S, \r8\().4S, \r1\().4S
trn2 \r8\().4S, \r8\().4S, \r1\().4S
trn1 \r0\().2d, \r3\().2d, \r4\().2d
trn2 \r4\().2d, \r3\().2d, \r4\().2d
trn1 \r0\().2D, \r3\().2D, \r4\().2D
trn2 \r4\().2D, \r3\().2D, \r4\().2D
trn1 \r1\().2d, \r5\().2d, \r6\().2d
trn2 \r5\().2d, \r5\().2d, \r6\().2d
trn1 \r1\().2D, \r5\().2D, \r6\().2D
trn2 \r5\().2D, \r5\().2D, \r6\().2D
trn2 \r6\().2d, \r8\().2d, \r2\().2d
trn1 \r2\().2d, \r8\().2d, \r2\().2d
trn2 \r6\().2D, \r8\().2D, \r2\().2D
trn1 \r2\().2D, \r8\().2D, \r2\().2D
trn1 \r3\().2d, \r9\().2d, \r7\().2d
trn2 \r7\().2d, \r9\().2d, \r7\().2d
trn1 \r3\().2D, \r9\().2D, \r7\().2D
trn2 \r7\().2D, \r9\().2D, \r7\().2D
.endm
+51 -51
View File
@@ -33,81 +33,81 @@ const tab_x2, align=4
endconst
function ff_opus_deemphasis_neon, export=1
movrel x4, tab_st
ld1 {v4.4s}, [x4]
movrel x4, tab_x0
ld1 {v5.4s}, [x4]
movrel x4, tab_x1
ld1 {v6.4s}, [x4]
movrel x4, tab_x2
ld1 {v7.4s}, [x4]
movrel x4, tab_st
ld1 {v4.4s}, [x4]
movrel x4, tab_x0
ld1 {v5.4s}, [x4]
movrel x4, tab_x1
ld1 {v6.4s}, [x4]
movrel x4, tab_x2
ld1 {v7.4s}, [x4]
fmul v0.4s, v4.4s, v0.s[0]
fmul v0.4s, v4.4s, v0.s[0]
1: ld1 {v1.4s, v2.4s}, [x1], #32
1: ld1 {v1.4s, v2.4s}, [x1], #32
fmla v0.4s, v5.4s, v1.s[0]
fmul v3.4s, v7.4s, v2.s[2]
fmla v0.4s, v5.4s, v1.s[0]
fmul v3.4s, v7.4s, v2.s[2]
fmla v0.4s, v6.4s, v1.s[1]
fmla v3.4s, v6.4s, v2.s[1]
fmla v0.4s, v6.4s, v1.s[1]
fmla v3.4s, v6.4s, v2.s[1]
fmla v0.4s, v7.4s, v1.s[2]
fmla v3.4s, v5.4s, v2.s[0]
fmla v0.4s, v7.4s, v1.s[2]
fmla v3.4s, v5.4s, v2.s[0]
fadd v1.4s, v1.4s, v0.4s
fadd v2.4s, v2.4s, v3.4s
fadd v1.4s, v1.4s, v0.4s
fadd v2.4s, v2.4s, v3.4s
fmla v2.4s, v4.4s, v1.s[3]
fmla v2.4s, v4.4s, v1.s[3]
st1 {v1.4s, v2.4s}, [x0], #32
fmul v0.4s, v4.4s, v2.s[3]
st1 {v1.4s, v2.4s}, [x0], #32
fmul v0.4s, v4.4s, v2.s[3]
subs w2, w2, #8
b.gt 1b
subs w2, w2, #8
b.gt 1b
mov s0, v2.s[3]
mov s0, v2.s[3]
ret
endfunc
function ff_opus_postfilter_neon, export=1
ld1 {v0.4s}, [x2]
dup v1.4s, v0.s[1]
dup v2.4s, v0.s[2]
dup v0.4s, v0.s[0]
ld1 {v0.4s}, [x2]
dup v1.4s, v0.s[1]
dup v2.4s, v0.s[2]
dup v0.4s, v0.s[0]
add w1, w1, #2
sub x1, x0, x1, lsl #2
add w1, w1, #2
sub x1, x0, x1, lsl #2
ld1 {v3.4s}, [x1]
fmul v3.4s, v3.4s, v2.4s
ld1 {v3.4s}, [x1]
fmul v3.4s, v3.4s, v2.4s
1: add x1, x1, #4
ld1 {v4.4s}, [x1]
add x1, x1, #4
ld1 {v5.4s}, [x1]
add x1, x1, #4
ld1 {v6.4s}, [x1]
add x1, x1, #4
ld1 {v7.4s}, [x1]
1: add x1, x1, #4
ld1 {v4.4s}, [x1]
add x1, x1, #4
ld1 {v5.4s}, [x1]
add x1, x1, #4
ld1 {v6.4s}, [x1]
add x1, x1, #4
ld1 {v7.4s}, [x1]
fmla v3.4s, v7.4s, v2.4s
fadd v6.4s, v6.4s, v4.4s
fmla v3.4s, v7.4s, v2.4s
fadd v6.4s, v6.4s, v4.4s
ld1 {v4.4s}, [x0]
fmla v4.4s, v5.4s, v0.4s
ld1 {v4.4s}, [x0]
fmla v4.4s, v5.4s, v0.4s
fmul v6.4s, v6.4s, v1.4s
fadd v6.4s, v6.4s, v3.4s
fmul v6.4s, v6.4s, v1.4s
fadd v6.4s, v6.4s, v3.4s
fadd v4.4s, v4.4s, v6.4s
fmul v3.4s, v7.4s, v2.4s
fadd v4.4s, v4.4s, v6.4s
fmul v3.4s, v7.4s, v2.4s
st1 {v4.4s}, [x0], #16
st1 {v4.4s}, [x0], #16
subs w3, w3, #4
b.gt 1b
subs w3, w3, #4
b.gt 1b
ret
endfunc
+147 -147
View File
@@ -46,49 +46,49 @@ function ff_sbr_sum64x5_neon, export=1
add x3, x0, #192*4
add x4, x0, #256*4
mov x5, #64
1: ld1 {v0.4s}, [x0]
ld1 {v1.4s}, [x1], #16
fadd v0.4s, v0.4s, v1.4s
ld1 {v2.4s}, [x2], #16
fadd v0.4s, v0.4s, v2.4s
ld1 {v3.4s}, [x3], #16
fadd v0.4s, v0.4s, v3.4s
ld1 {v4.4s}, [x4], #16
fadd v0.4s, v0.4s, v4.4s
st1 {v0.4s}, [x0], #16
1: ld1 {v0.4S}, [x0]
ld1 {v1.4S}, [x1], #16
fadd v0.4S, v0.4S, v1.4S
ld1 {v2.4S}, [x2], #16
fadd v0.4S, v0.4S, v2.4S
ld1 {v3.4S}, [x3], #16
fadd v0.4S, v0.4S, v3.4S
ld1 {v4.4S}, [x4], #16
fadd v0.4S, v0.4S, v4.4S
st1 {v0.4S}, [x0], #16
subs x5, x5, #4
b.gt 1b
ret
endfunc
function ff_sbr_sum_square_neon, export=1
movi v0.4s, #0
1: ld1 {v1.4s}, [x0], #16
fmla v0.4s, v1.4s, v1.4s
movi v0.4S, #0
1: ld1 {v1.4S}, [x0], #16
fmla v0.4S, v1.4S, v1.4S
subs w1, w1, #2
b.gt 1b
faddp v0.4s, v0.4s, v0.4s
faddp v0.4s, v0.4s, v0.4s
faddp v0.4S, v0.4S, v0.4S
faddp v0.4S, v0.4S, v0.4S
ret
endfunc
function ff_sbr_neg_odd_64_neon, export=1
mov x1, x0
movi v5.4s, #1<<7, lsl #24
ld2 {v0.4s, v1.4s}, [x0], #32
eor v1.16b, v1.16b, v5.16b
ld2 {v2.4s, v3.4s}, [x0], #32
movi v5.4S, #1<<7, lsl #24
ld2 {v0.4S, v1.4S}, [x0], #32
eor v1.16B, v1.16B, v5.16B
ld2 {v2.4S, v3.4S}, [x0], #32
.rept 3
st2 {v0.4s, v1.4s}, [x1], #32
eor v3.16b, v3.16b, v5.16b
ld2 {v0.4s, v1.4s}, [x0], #32
st2 {v2.4s, v3.4s}, [x1], #32
eor v1.16b, v1.16b, v5.16b
ld2 {v2.4s, v3.4s}, [x0], #32
st2 {v0.4S, v1.4S}, [x1], #32
eor v3.16B, v3.16B, v5.16B
ld2 {v0.4S, v1.4S}, [x0], #32
st2 {v2.4S, v3.4S}, [x1], #32
eor v1.16B, v1.16B, v5.16B
ld2 {v2.4S, v3.4S}, [x0], #32
.endr
eor v3.16b, v3.16b, v5.16b
st2 {v0.4s, v1.4s}, [x1], #32
st2 {v2.4s, v3.4s}, [x1], #32
eor v3.16B, v3.16B, v5.16B
st2 {v0.4S, v1.4S}, [x1], #32
st2 {v2.4S, v3.4S}, [x1], #32
ret
endfunc
@@ -97,26 +97,26 @@ function ff_sbr_qmf_pre_shuffle_neon, export=1
add x2, x0, #64*4
mov x3, #-16
mov x4, #-4
movi v6.4s, #1<<7, lsl #24
ld1 {v0.2s}, [x0], #8
st1 {v0.2s}, [x2], #8
movi v6.4S, #1<<7, lsl #24
ld1 {v0.2S}, [x0], #8
st1 {v0.2S}, [x2], #8
.rept 7
ld1 {v1.4s}, [x1], x3
ld1 {v2.4s}, [x0], #16
eor v1.16b, v1.16b, v6.16b
rev64 v1.4s, v1.4s
ext v1.16b, v1.16b, v1.16b, #8
st2 {v1.4s, v2.4s}, [x2], #32
ld1 {v1.4S}, [x1], x3
ld1 {v2.4S}, [x0], #16
eor v1.16B, v1.16B, v6.16B
rev64 v1.4S, v1.4S
ext v1.16B, v1.16B, v1.16B, #8
st2 {v1.4S, v2.4S}, [x2], #32
.endr
add x1, x1, #8
ld1 {v1.2s}, [x1], x4
ld1 {v2.2s}, [x0], #8
ld1 {v1.s}[3], [x1]
ld1 {v2.s}[2], [x0]
eor v1.16b, v1.16b, v6.16b
rev64 v1.4s, v1.4s
st2 {v1.2s, v2.2s}, [x2], #16
st2 {v1.s, v2.s}[2], [x2]
ld1 {v1.2S}, [x1], x4
ld1 {v2.2S}, [x0], #8
ld1 {v1.S}[3], [x1]
ld1 {v2.S}[2], [x0]
eor v1.16B, v1.16B, v6.16B
rev64 v1.4S, v1.4S
st2 {v1.2S, v2.2S}, [x2], #16
st2 {v1.S, v2.S}[2], [x2]
ret
endfunc
@@ -124,13 +124,13 @@ function ff_sbr_qmf_post_shuffle_neon, export=1
add x2, x1, #60*4
mov x3, #-16
mov x4, #32
movi v6.4s, #1<<7, lsl #24
1: ld1 {v0.4s}, [x2], x3
ld1 {v1.4s}, [x1], #16
eor v0.16b, v0.16b, v6.16b
rev64 v0.4s, v0.4s
ext v0.16b, v0.16b, v0.16b, #8
st2 {v0.4s, v1.4s}, [x0], #32
movi v6.4S, #1<<7, lsl #24
1: ld1 {v0.4S}, [x2], x3
ld1 {v1.4S}, [x1], #16
eor v0.16B, v0.16B, v6.16B
rev64 v0.4S, v0.4S
ext v0.16B, v0.16B, v0.16B, #8
st2 {v0.4S, v1.4S}, [x0], #32
subs x4, x4, #4
b.gt 1b
ret
@@ -141,13 +141,13 @@ function ff_sbr_qmf_deint_neg_neon, export=1
add x2, x0, #60*4
mov x3, #-32
mov x4, #32
movi v2.4s, #1<<7, lsl #24
1: ld2 {v0.4s, v1.4s}, [x1], x3
eor v0.16b, v0.16b, v2.16b
rev64 v1.4s, v1.4s
ext v1.16b, v1.16b, v1.16b, #8
st1 {v0.4s}, [x2]
st1 {v1.4s}, [x0], #16
movi v2.4S, #1<<7, lsl #24
1: ld2 {v0.4S, v1.4S}, [x1], x3
eor v0.16B, v0.16B, v2.16B
rev64 v1.4S, v1.4S
ext v1.16B, v1.16B, v1.16B, #8
st1 {v0.4S}, [x2]
st1 {v1.4S}, [x0], #16
sub x2, x2, #16
subs x4, x4, #4
b.gt 1b
@@ -159,16 +159,16 @@ function ff_sbr_qmf_deint_bfly_neon, export=1
add x3, x0, #124*4
mov x4, #64
mov x5, #-16
1: ld1 {v0.4s}, [x1], #16
ld1 {v1.4s}, [x2], x5
rev64 v2.4s, v0.4s
ext v2.16b, v2.16b, v2.16b, #8
rev64 v3.4s, v1.4s
ext v3.16b, v3.16b, v3.16b, #8
fadd v1.4s, v1.4s, v2.4s
fsub v0.4s, v0.4s, v3.4s
st1 {v0.4s}, [x0], #16
st1 {v1.4s}, [x3], x5
1: ld1 {v0.4S}, [x1], #16
ld1 {v1.4S}, [x2], x5
rev64 v2.4S, v0.4S
ext v2.16B, v2.16B, v2.16B, #8
rev64 v3.4S, v1.4S
ext v3.16B, v3.16B, v3.16B, #8
fadd v1.4S, v1.4S, v2.4S
fsub v0.4S, v0.4S, v3.4S
st1 {v0.4S}, [x0], #16
st1 {v1.4S}, [x3], x5
subs x4, x4, #4
b.gt 1b
ret
@@ -178,32 +178,32 @@ function ff_sbr_hf_gen_neon, export=1
sxtw x4, w4
sxtw x5, w5
movrel x6, factors
ld1 {v7.4s}, [x6]
dup v1.4s, v0.s[0]
mov v2.8b, v1.8b
mov v2.s[2], v7.s[0]
mov v2.s[3], v7.s[0]
fmul v1.4s, v1.4s, v2.4s
ld1 {v0.d}[0], [x3]
ld1 {v0.d}[1], [x2]
fmul v0.4s, v0.4s, v1.4s
fmul v1.4s, v0.4s, v7.4s
rev64 v0.4s, v0.4s
ld1 {v7.4S}, [x6]
dup v1.4S, v0.S[0]
mov v2.8B, v1.8B
mov v2.S[2], v7.S[0]
mov v2.S[3], v7.S[0]
fmul v1.4S, v1.4S, v2.4S
ld1 {v0.D}[0], [x3]
ld1 {v0.D}[1], [x2]
fmul v0.4S, v0.4S, v1.4S
fmul v1.4S, v0.4S, v7.4S
rev64 v0.4S, v0.4S
sub x7, x5, x4
add x0, x0, x4, lsl #3
add x1, x1, x4, lsl #3
sub x1, x1, #16
1: ld1 {v2.4s}, [x1], #16
ld1 {v3.2s}, [x1]
fmul v4.4s, v2.4s, v1.4s
fmul v5.4s, v2.4s, v0.4s
faddp v4.4s, v4.4s, v4.4s
faddp v5.4s, v5.4s, v5.4s
faddp v4.4s, v4.4s, v4.4s
faddp v5.4s, v5.4s, v5.4s
mov v4.s[1], v5.s[0]
fadd v4.2s, v4.2s, v3.2s
st1 {v4.2s}, [x0], #8
1: ld1 {v2.4S}, [x1], #16
ld1 {v3.2S}, [x1]
fmul v4.4S, v2.4S, v1.4S
fmul v5.4S, v2.4S, v0.4S
faddp v4.4S, v4.4S, v4.4S
faddp v5.4S, v5.4S, v5.4S
faddp v4.4S, v4.4S, v4.4S
faddp v5.4S, v5.4S, v5.4S
mov v4.S[1], v5.S[0]
fadd v4.2S, v4.2S, v3.2S
st1 {v4.2S}, [x0], #8
sub x1, x1, #8
subs x7, x7, #1
b.gt 1b
@@ -215,10 +215,10 @@ function ff_sbr_hf_g_filt_neon, export=1
sxtw x4, w4
mov x5, #40*2*4
add x1, x1, x4, lsl #3
1: ld1 {v0.2s}, [x1], x5
ld1 {v1.s}[0], [x2], #4
fmul v2.4s, v0.4s, v1.s[0]
st1 {v2.2s}, [x0], #8
1: ld1 {v0.2S}, [x1], x5
ld1 {v1.S}[0], [x2], #4
fmul v2.4S, v0.4S, v1.S[0]
st1 {v2.2S}, [x0], #8
subs x3, x3, #1
b.gt 1b
ret
@@ -227,46 +227,46 @@ endfunc
function ff_sbr_autocorrelate_neon, export=1
mov x2, #38
movrel x3, factors
ld1 {v0.4s}, [x3]
movi v1.4s, #0
movi v2.4s, #0
movi v3.4s, #0
ld1 {v4.2s}, [x0], #8
ld1 {v5.2s}, [x0], #8
fmul v16.2s, v4.2s, v4.2s
fmul v17.2s, v5.2s, v4.s[0]
fmul v18.2s, v5.2s, v4.s[1]
1: ld1 {v5.d}[1], [x0], #8
fmla v1.2s, v4.2s, v4.2s
fmla v2.4s, v5.4s, v4.s[0]
fmla v3.4s, v5.4s, v4.s[1]
mov v4.d[0], v5.d[0]
mov v5.d[0], v5.d[1]
ld1 {v0.4S}, [x3]
movi v1.4S, #0
movi v2.4S, #0
movi v3.4S, #0
ld1 {v4.2S}, [x0], #8
ld1 {v5.2S}, [x0], #8
fmul v16.2S, v4.2S, v4.2S
fmul v17.2S, v5.2S, v4.S[0]
fmul v18.2S, v5.2S, v4.S[1]
1: ld1 {v5.D}[1], [x0], #8
fmla v1.2S, v4.2S, v4.2S
fmla v2.4S, v5.4S, v4.S[0]
fmla v3.4S, v5.4S, v4.S[1]
mov v4.D[0], v5.D[0]
mov v5.D[0], v5.D[1]
subs x2, x2, #1
b.gt 1b
fmul v19.2s, v4.2s, v4.2s
fmul v20.2s, v5.2s, v4.s[0]
fmul v21.2s, v5.2s, v4.s[1]
fadd v22.4s, v2.4s, v20.4s
fsub v22.4s, v22.4s, v17.4s
fadd v23.4s, v3.4s, v21.4s
fsub v23.4s, v23.4s, v18.4s
rev64 v23.4s, v23.4s
fmul v23.4s, v23.4s, v0.4s
fadd v22.4s, v22.4s, v23.4s
st1 {v22.4s}, [x1], #16
fadd v23.2s, v1.2s, v19.2s
fsub v23.2s, v23.2s, v16.2s
faddp v23.2s, v23.2s, v23.2s
st1 {v23.s}[0], [x1]
fmul v19.2S, v4.2S, v4.2S
fmul v20.2S, v5.2S, v4.S[0]
fmul v21.2S, v5.2S, v4.S[1]
fadd v22.4S, v2.4S, v20.4S
fsub v22.4S, v22.4S, v17.4S
fadd v23.4S, v3.4S, v21.4S
fsub v23.4S, v23.4S, v18.4S
rev64 v23.4S, v23.4S
fmul v23.4S, v23.4S, v0.4S
fadd v22.4S, v22.4S, v23.4S
st1 {v22.4S}, [x1], #16
fadd v23.2S, v1.2S, v19.2S
fsub v23.2S, v23.2S, v16.2S
faddp v23.2S, v23.2S, v23.2S
st1 {v23.S}[0], [x1]
add x1, x1, #8
rev64 v3.2s, v3.2s
fmul v3.2s, v3.2s, v0.2s
fadd v2.2s, v2.2s, v3.2s
st1 {v2.2s}, [x1]
rev64 v3.2S, v3.2S
fmul v3.2S, v3.2S, v0.2S
fadd v2.2S, v2.2S, v3.2S
st1 {v2.2S}, [x1]
add x1, x1, #16
faddp v1.2s, v1.2s, v1.2s
st1 {v1.s}[0], [x1]
faddp v1.2S, v1.2S, v1.2S
st1 {v1.S}[0], [x1]
ret
endfunc
@@ -278,25 +278,25 @@ endfunc
1: and x3, x3, #0x1ff
add x8, x7, x3, lsl #3
add x3, x3, #2
ld1 {v2.4s}, [x0]
ld1 {v3.2s}, [x1], #8
ld1 {v4.2s}, [x2], #8
ld1 {v5.4s}, [x8]
mov v6.16b, v2.16b
zip1 v3.4s, v3.4s, v3.4s
zip1 v4.4s, v4.4s, v4.4s
fmla v6.4s, v1.4s, v3.4s
fmla v2.4s, v5.4s, v4.4s
fcmeq v7.4s, v3.4s, #0
bif v2.16b, v6.16b, v7.16b
st1 {v2.4s}, [x0], #16
ld1 {v2.4S}, [x0]
ld1 {v3.2S}, [x1], #8
ld1 {v4.2S}, [x2], #8
ld1 {v5.4S}, [x8]
mov v6.16B, v2.16B
zip1 v3.4S, v3.4S, v3.4S
zip1 v4.4S, v4.4S, v4.4S
fmla v6.4S, v1.4S, v3.4S
fmla v2.4S, v5.4S, v4.4S
fcmeq v7.4S, v3.4S, #0
bif v2.16B, v6.16B, v7.16B
st1 {v2.4S}, [x0], #16
subs x5, x5, #2
b.gt 1b
.endm
function ff_sbr_hf_apply_noise_0_neon, export=1
movrel x9, phi_noise_0
ld1 {v1.4s}, [x9]
ld1 {v1.4S}, [x9]
apply_noise_common
ret
endfunc
@@ -305,14 +305,14 @@ function ff_sbr_hf_apply_noise_1_neon, export=1
movrel x9, phi_noise_1
and x4, x4, #1
add x9, x9, x4, lsl #4
ld1 {v1.4s}, [x9]
ld1 {v1.4S}, [x9]
apply_noise_common
ret
endfunc
function ff_sbr_hf_apply_noise_2_neon, export=1
movrel x9, phi_noise_2
ld1 {v1.4s}, [x9]
ld1 {v1.4S}, [x9]
apply_noise_common
ret
endfunc
@@ -321,7 +321,7 @@ function ff_sbr_hf_apply_noise_3_neon, export=1
movrel x9, phi_noise_3
and x4, x4, #1
add x9, x9, x4, lsl #4
ld1 {v1.4s}, [x9]
ld1 {v1.4S}, [x9]
apply_noise_common
ret
endfunc
+190 -190
View File
@@ -54,7 +54,7 @@ endconst
prfm pldl1keep, [\data]
mov x10, x30
movrel x3, idct_coeff_neon
ld1 {v0.2d}, [x3]
ld1 {v0.2D}, [x3]
.endm
.macro idct_end
@@ -74,146 +74,146 @@ endconst
.endm
.macro idct_col4_top y1, y2, y3, y4, i, l
smull\i v7.4s, \y3\l, z2
smull\i v16.4s, \y3\l, z6
smull\i v17.4s, \y2\l, z1
add v19.4s, v23.4s, v7.4s
smull\i v18.4s, \y2\l, z3
add v20.4s, v23.4s, v16.4s
smull\i v5.4s, \y2\l, z5
sub v21.4s, v23.4s, v16.4s
smull\i v6.4s, \y2\l, z7
sub v22.4s, v23.4s, v7.4s
smull\i v7.4S, \y3\l, z2
smull\i v16.4S, \y3\l, z6
smull\i v17.4S, \y2\l, z1
add v19.4S, v23.4S, v7.4S
smull\i v18.4S, \y2\l, z3
add v20.4S, v23.4S, v16.4S
smull\i v5.4S, \y2\l, z5
sub v21.4S, v23.4S, v16.4S
smull\i v6.4S, \y2\l, z7
sub v22.4S, v23.4S, v7.4S
smlal\i v17.4s, \y4\l, z3
smlsl\i v18.4s, \y4\l, z7
smlsl\i v5.4s, \y4\l, z1
smlsl\i v6.4s, \y4\l, z5
smlal\i v17.4S, \y4\l, z3
smlsl\i v18.4S, \y4\l, z7
smlsl\i v5.4S, \y4\l, z1
smlsl\i v6.4S, \y4\l, z5
.endm
.macro idct_row4_neon y1, y2, y3, y4, pass
ld1 {\y1\().2d,\y2\().2d}, [x2], #32
movi v23.4s, #1<<2, lsl #8
orr v5.16b, \y1\().16b, \y2\().16b
ld1 {\y3\().2d,\y4\().2d}, [x2], #32
orr v6.16b, \y3\().16b, \y4\().16b
orr v5.16b, v5.16b, v6.16b
mov x3, v5.d[1]
smlal v23.4s, \y1\().4h, z4
ld1 {\y1\().2D,\y2\().2D}, [x2], #32
movi v23.4S, #1<<2, lsl #8
orr v5.16B, \y1\().16B, \y2\().16B
ld1 {\y3\().2D,\y4\().2D}, [x2], #32
orr v6.16B, \y3\().16B, \y4\().16B
orr v5.16B, v5.16B, v6.16B
mov x3, v5.D[1]
smlal v23.4S, \y1\().4H, z4
idct_col4_top \y1, \y2, \y3, \y4, 1, .4h
idct_col4_top \y1, \y2, \y3, \y4, 1, .4H
cmp x3, #0
b.eq \pass\()f
smull2 v7.4s, \y1\().8h, z4
smlal2 v17.4s, \y2\().8h, z5
smlsl2 v18.4s, \y2\().8h, z1
smull2 v16.4s, \y3\().8h, z2
smlal2 v5.4s, \y2\().8h, z7
add v19.4s, v19.4s, v7.4s
sub v20.4s, v20.4s, v7.4s
sub v21.4s, v21.4s, v7.4s
add v22.4s, v22.4s, v7.4s
smlal2 v6.4s, \y2\().8h, z3
smull2 v7.4s, \y3\().8h, z6
smlal2 v17.4s, \y4\().8h, z7
smlsl2 v18.4s, \y4\().8h, z5
smlal2 v5.4s, \y4\().8h, z3
smlsl2 v6.4s, \y4\().8h, z1
add v19.4s, v19.4s, v7.4s
sub v20.4s, v20.4s, v16.4s
add v21.4s, v21.4s, v16.4s
sub v22.4s, v22.4s, v7.4s
smull2 v7.4S, \y1\().8H, z4
smlal2 v17.4S, \y2\().8H, z5
smlsl2 v18.4S, \y2\().8H, z1
smull2 v16.4S, \y3\().8H, z2
smlal2 v5.4S, \y2\().8H, z7
add v19.4S, v19.4S, v7.4S
sub v20.4S, v20.4S, v7.4S
sub v21.4S, v21.4S, v7.4S
add v22.4S, v22.4S, v7.4S
smlal2 v6.4S, \y2\().8H, z3
smull2 v7.4S, \y3\().8H, z6
smlal2 v17.4S, \y4\().8H, z7
smlsl2 v18.4S, \y4\().8H, z5
smlal2 v5.4S, \y4\().8H, z3
smlsl2 v6.4S, \y4\().8H, z1
add v19.4S, v19.4S, v7.4S
sub v20.4S, v20.4S, v16.4S
add v21.4S, v21.4S, v16.4S
sub v22.4S, v22.4S, v7.4S
\pass: add \y3\().4S, v19.4S, v17.4S
add \y4\().4s, v20.4s, v18.4s
shrn \y1\().4h, \y3\().4s, #ROW_SHIFT
shrn \y2\().4h, \y4\().4s, #ROW_SHIFT
add v7.4s, v21.4s, v5.4s
add v16.4s, v22.4s, v6.4s
shrn \y3\().4h, v7.4s, #ROW_SHIFT
shrn \y4\().4h, v16.4s, #ROW_SHIFT
sub v22.4s, v22.4s, v6.4s
sub v19.4s, v19.4s, v17.4s
sub v21.4s, v21.4s, v5.4s
shrn2 \y1\().8h, v22.4s, #ROW_SHIFT
sub v20.4s, v20.4s, v18.4s
shrn2 \y2\().8h, v21.4s, #ROW_SHIFT
shrn2 \y3\().8h, v20.4s, #ROW_SHIFT
shrn2 \y4\().8h, v19.4s, #ROW_SHIFT
add \y4\().4S, v20.4S, v18.4S
shrn \y1\().4H, \y3\().4S, #ROW_SHIFT
shrn \y2\().4H, \y4\().4S, #ROW_SHIFT
add v7.4S, v21.4S, v5.4S
add v16.4S, v22.4S, v6.4S
shrn \y3\().4H, v7.4S, #ROW_SHIFT
shrn \y4\().4H, v16.4S, #ROW_SHIFT
sub v22.4S, v22.4S, v6.4S
sub v19.4S, v19.4S, v17.4S
sub v21.4S, v21.4S, v5.4S
shrn2 \y1\().8H, v22.4S, #ROW_SHIFT
sub v20.4S, v20.4S, v18.4S
shrn2 \y2\().8H, v21.4S, #ROW_SHIFT
shrn2 \y3\().8H, v20.4S, #ROW_SHIFT
shrn2 \y4\().8H, v19.4S, #ROW_SHIFT
trn1 v16.8h, \y1\().8h, \y2\().8h
trn2 v17.8h, \y1\().8h, \y2\().8h
trn1 v18.8h, \y3\().8h, \y4\().8h
trn2 v19.8h, \y3\().8h, \y4\().8h
trn1 \y1\().4s, v16.4s, v18.4s
trn1 \y2\().4s, v17.4s, v19.4s
trn2 \y3\().4s, v16.4s, v18.4s
trn2 \y4\().4s, v17.4s, v19.4s
trn1 v16.8H, \y1\().8H, \y2\().8H
trn2 v17.8H, \y1\().8H, \y2\().8H
trn1 v18.8H, \y3\().8H, \y4\().8H
trn2 v19.8H, \y3\().8H, \y4\().8H
trn1 \y1\().4S, v16.4S, v18.4S
trn1 \y2\().4S, v17.4S, v19.4S
trn2 \y3\().4S, v16.4S, v18.4S
trn2 \y4\().4S, v17.4S, v19.4S
.endm
.macro declare_idct_col4_neon i, l
function idct_col4_neon\i
dup v23.4h, z4c
dup v23.4H, z4c
.if \i == 1
add v23.4h, v23.4h, v24.4h
add v23.4H, v23.4H, v24.4H
.else
mov v5.d[0], v24.d[1]
add v23.4h, v23.4h, v5.4h
mov v5.D[0], v24.D[1]
add v23.4H, v23.4H, v5.4H
.endif
smull v23.4s, v23.4h, z4
smull v23.4S, v23.4H, z4
idct_col4_top v24, v25, v26, v27, \i, \l
mov x4, v28.d[\i - 1]
mov x5, v29.d[\i - 1]
mov x4, v28.D[\i - 1]
mov x5, v29.D[\i - 1]
cmp x4, #0
b.eq 1f
smull\i v7.4s, v28\l, z4
add v19.4s, v19.4s, v7.4s
sub v20.4s, v20.4s, v7.4s
sub v21.4s, v21.4s, v7.4s
add v22.4s, v22.4s, v7.4s
smull\i v7.4S, v28\l, z4
add v19.4S, v19.4S, v7.4S
sub v20.4S, v20.4S, v7.4S
sub v21.4S, v21.4S, v7.4S
add v22.4S, v22.4S, v7.4S
1: mov x4, v30.d[\i - 1]
1: mov x4, v30.D[\i - 1]
cmp x5, #0
b.eq 2f
smlal\i v17.4s, v29\l, z5
smlsl\i v18.4s, v29\l, z1
smlal\i v5.4s, v29\l, z7
smlal\i v6.4s, v29\l, z3
smlal\i v17.4S, v29\l, z5
smlsl\i v18.4S, v29\l, z1
smlal\i v5.4S, v29\l, z7
smlal\i v6.4S, v29\l, z3
2: mov x5, v31.d[\i - 1]
2: mov x5, v31.D[\i - 1]
cmp x4, #0
b.eq 3f
smull\i v7.4s, v30\l, z6
smull\i v16.4s, v30\l, z2
add v19.4s, v19.4s, v7.4s
sub v22.4s, v22.4s, v7.4s
sub v20.4s, v20.4s, v16.4s
add v21.4s, v21.4s, v16.4s
smull\i v7.4S, v30\l, z6
smull\i v16.4S, v30\l, z2
add v19.4S, v19.4S, v7.4S
sub v22.4S, v22.4S, v7.4S
sub v20.4S, v20.4S, v16.4S
add v21.4S, v21.4S, v16.4S
3: cmp x5, #0
b.eq 4f
smlal\i v17.4s, v31\l, z7
smlsl\i v18.4s, v31\l, z5
smlal\i v5.4s, v31\l, z3
smlsl\i v6.4s, v31\l, z1
smlal\i v17.4S, v31\l, z7
smlsl\i v18.4S, v31\l, z5
smlal\i v5.4S, v31\l, z3
smlsl\i v6.4S, v31\l, z1
4: addhn v7.4h, v19.4s, v17.4s
addhn2 v7.8h, v20.4s, v18.4s
subhn v18.4h, v20.4s, v18.4s
subhn2 v18.8h, v19.4s, v17.4s
4: addhn v7.4H, v19.4S, v17.4S
addhn2 v7.8H, v20.4S, v18.4S
subhn v18.4H, v20.4S, v18.4S
subhn2 v18.8H, v19.4S, v17.4S
addhn v16.4h, v21.4s, v5.4s
addhn2 v16.8h, v22.4s, v6.4s
subhn v17.4h, v22.4s, v6.4s
subhn2 v17.8h, v21.4s, v5.4s
addhn v16.4H, v21.4S, v5.4S
addhn2 v16.8H, v22.4S, v6.4S
subhn v17.4H, v22.4S, v6.4S
subhn2 v17.8H, v21.4S, v5.4S
ret
endfunc
@@ -229,33 +229,33 @@ function ff_simple_idct_put_neon, export=1
idct_row4_neon v28, v29, v30, v31, 2
bl idct_col4_neon1
sqshrun v1.8b, v7.8h, #COL_SHIFT-16
sqshrun2 v1.16b, v16.8h, #COL_SHIFT-16
sqshrun v3.8b, v17.8h, #COL_SHIFT-16
sqshrun2 v3.16b, v18.8h, #COL_SHIFT-16
sqshrun v1.8B, v7.8H, #COL_SHIFT-16
sqshrun2 v1.16B, v16.8H, #COL_SHIFT-16
sqshrun v3.8B, v17.8H, #COL_SHIFT-16
sqshrun2 v3.16B, v18.8H, #COL_SHIFT-16
bl idct_col4_neon2
sqshrun v2.8b, v7.8h, #COL_SHIFT-16
sqshrun2 v2.16b, v16.8h, #COL_SHIFT-16
sqshrun v4.8b, v17.8h, #COL_SHIFT-16
sqshrun2 v4.16b, v18.8h, #COL_SHIFT-16
sqshrun v2.8B, v7.8H, #COL_SHIFT-16
sqshrun2 v2.16B, v16.8H, #COL_SHIFT-16
sqshrun v4.8B, v17.8H, #COL_SHIFT-16
sqshrun2 v4.16B, v18.8H, #COL_SHIFT-16
zip1 v16.4s, v1.4s, v2.4s
zip2 v17.4s, v1.4s, v2.4s
zip1 v16.4S, v1.4S, v2.4S
zip2 v17.4S, v1.4S, v2.4S
st1 {v16.d}[0], [x0], x1
st1 {v16.d}[1], [x0], x1
st1 {v16.D}[0], [x0], x1
st1 {v16.D}[1], [x0], x1
zip1 v18.4s, v3.4s, v4.4s
zip2 v19.4s, v3.4s, v4.4s
zip1 v18.4S, v3.4S, v4.4S
zip2 v19.4S, v3.4S, v4.4S
st1 {v17.d}[0], [x0], x1
st1 {v17.d}[1], [x0], x1
st1 {v18.d}[0], [x0], x1
st1 {v18.d}[1], [x0], x1
st1 {v19.d}[0], [x0], x1
st1 {v19.d}[1], [x0], x1
st1 {v17.D}[0], [x0], x1
st1 {v17.D}[1], [x0], x1
st1 {v18.D}[0], [x0], x1
st1 {v18.D}[1], [x0], x1
st1 {v19.D}[0], [x0], x1
st1 {v19.D}[1], [x0], x1
idct_end
endfunc
@@ -267,59 +267,59 @@ function ff_simple_idct_add_neon, export=1
idct_row4_neon v28, v29, v30, v31, 2
bl idct_col4_neon1
sshr v1.8h, v7.8h, #COL_SHIFT-16
sshr v2.8h, v16.8h, #COL_SHIFT-16
sshr v3.8h, v17.8h, #COL_SHIFT-16
sshr v4.8h, v18.8h, #COL_SHIFT-16
sshr v1.8H, v7.8H, #COL_SHIFT-16
sshr v2.8H, v16.8H, #COL_SHIFT-16
sshr v3.8H, v17.8H, #COL_SHIFT-16
sshr v4.8H, v18.8H, #COL_SHIFT-16
bl idct_col4_neon2
sshr v7.8h, v7.8h, #COL_SHIFT-16
sshr v16.8h, v16.8h, #COL_SHIFT-16
sshr v17.8h, v17.8h, #COL_SHIFT-16
sshr v18.8h, v18.8h, #COL_SHIFT-16
sshr v7.8H, v7.8H, #COL_SHIFT-16
sshr v16.8H, v16.8H, #COL_SHIFT-16
sshr v17.8H, v17.8H, #COL_SHIFT-16
sshr v18.8H, v18.8H, #COL_SHIFT-16
mov x9, x0
ld1 {v19.d}[0], [x0], x1
zip1 v23.2d, v1.2d, v7.2d
zip2 v24.2d, v1.2d, v7.2d
ld1 {v19.d}[1], [x0], x1
zip1 v25.2d, v2.2d, v16.2d
zip2 v26.2d, v2.2d, v16.2d
ld1 {v20.d}[0], [x0], x1
zip1 v27.2d, v3.2d, v17.2d
zip2 v28.2d, v3.2d, v17.2d
ld1 {v20.d}[1], [x0], x1
zip1 v29.2d, v4.2d, v18.2d
zip2 v30.2d, v4.2d, v18.2d
ld1 {v21.d}[0], [x0], x1
uaddw v23.8h, v23.8h, v19.8b
uaddw2 v24.8h, v24.8h, v19.16b
ld1 {v21.d}[1], [x0], x1
sqxtun v23.8b, v23.8h
sqxtun2 v23.16b, v24.8h
ld1 {v22.d}[0], [x0], x1
uaddw v24.8h, v25.8h, v20.8b
uaddw2 v25.8h, v26.8h, v20.16b
ld1 {v22.d}[1], [x0], x1
sqxtun v24.8b, v24.8h
sqxtun2 v24.16b, v25.8h
st1 {v23.d}[0], [x9], x1
uaddw v25.8h, v27.8h, v21.8b
uaddw2 v26.8h, v28.8h, v21.16b
st1 {v23.d}[1], [x9], x1
sqxtun v25.8b, v25.8h
sqxtun2 v25.16b, v26.8h
st1 {v24.d}[0], [x9], x1
uaddw v26.8h, v29.8h, v22.8b
uaddw2 v27.8h, v30.8h, v22.16b
st1 {v24.d}[1], [x9], x1
sqxtun v26.8b, v26.8h
sqxtun2 v26.16b, v27.8h
st1 {v25.d}[0], [x9], x1
st1 {v25.d}[1], [x9], x1
st1 {v26.d}[0], [x9], x1
st1 {v26.d}[1], [x9], x1
ld1 {v19.D}[0], [x0], x1
zip1 v23.2D, v1.2D, v7.2D
zip2 v24.2D, v1.2D, v7.2D
ld1 {v19.D}[1], [x0], x1
zip1 v25.2D, v2.2D, v16.2D
zip2 v26.2D, v2.2D, v16.2D
ld1 {v20.D}[0], [x0], x1
zip1 v27.2D, v3.2D, v17.2D
zip2 v28.2D, v3.2D, v17.2D
ld1 {v20.D}[1], [x0], x1
zip1 v29.2D, v4.2D, v18.2D
zip2 v30.2D, v4.2D, v18.2D
ld1 {v21.D}[0], [x0], x1
uaddw v23.8H, v23.8H, v19.8B
uaddw2 v24.8H, v24.8H, v19.16B
ld1 {v21.D}[1], [x0], x1
sqxtun v23.8B, v23.8H
sqxtun2 v23.16B, v24.8H
ld1 {v22.D}[0], [x0], x1
uaddw v24.8H, v25.8H, v20.8B
uaddw2 v25.8H, v26.8H, v20.16B
ld1 {v22.D}[1], [x0], x1
sqxtun v24.8B, v24.8H
sqxtun2 v24.16B, v25.8H
st1 {v23.D}[0], [x9], x1
uaddw v25.8H, v27.8H, v21.8B
uaddw2 v26.8H, v28.8H, v21.16B
st1 {v23.D}[1], [x9], x1
sqxtun v25.8B, v25.8H
sqxtun2 v25.16B, v26.8H
st1 {v24.D}[0], [x9], x1
uaddw v26.8H, v29.8H, v22.8B
uaddw2 v27.8H, v30.8H, v22.16B
st1 {v24.D}[1], [x9], x1
sqxtun v26.8B, v26.8H
sqxtun2 v26.16B, v27.8H
st1 {v25.D}[0], [x9], x1
st1 {v25.D}[1], [x9], x1
st1 {v26.D}[0], [x9], x1
st1 {v26.D}[1], [x9], x1
idct_end
endfunc
@@ -333,30 +333,30 @@ function ff_simple_idct_neon, export=1
sub x2, x2, #128
bl idct_col4_neon1
sshr v1.8h, v7.8h, #COL_SHIFT-16
sshr v2.8h, v16.8h, #COL_SHIFT-16
sshr v3.8h, v17.8h, #COL_SHIFT-16
sshr v4.8h, v18.8h, #COL_SHIFT-16
sshr v1.8H, v7.8H, #COL_SHIFT-16
sshr v2.8H, v16.8H, #COL_SHIFT-16
sshr v3.8H, v17.8H, #COL_SHIFT-16
sshr v4.8H, v18.8H, #COL_SHIFT-16
bl idct_col4_neon2
sshr v7.8h, v7.8h, #COL_SHIFT-16
sshr v16.8h, v16.8h, #COL_SHIFT-16
sshr v17.8h, v17.8h, #COL_SHIFT-16
sshr v18.8h, v18.8h, #COL_SHIFT-16
sshr v7.8H, v7.8H, #COL_SHIFT-16
sshr v16.8H, v16.8H, #COL_SHIFT-16
sshr v17.8H, v17.8H, #COL_SHIFT-16
sshr v18.8H, v18.8H, #COL_SHIFT-16
zip1 v23.2d, v1.2d, v7.2d
zip2 v24.2d, v1.2d, v7.2d
st1 {v23.2d,v24.2d}, [x2], #32
zip1 v25.2d, v2.2d, v16.2d
zip2 v26.2d, v2.2d, v16.2d
st1 {v25.2d,v26.2d}, [x2], #32
zip1 v27.2d, v3.2d, v17.2d
zip2 v28.2d, v3.2d, v17.2d
st1 {v27.2d,v28.2d}, [x2], #32
zip1 v29.2d, v4.2d, v18.2d
zip2 v30.2d, v4.2d, v18.2d
st1 {v29.2d,v30.2d}, [x2], #32
zip1 v23.2D, v1.2D, v7.2D
zip2 v24.2D, v1.2D, v7.2D
st1 {v23.2D,v24.2D}, [x2], #32
zip1 v25.2D, v2.2D, v16.2D
zip2 v26.2D, v2.2D, v16.2D
st1 {v25.2D,v26.2D}, [x2], #32
zip1 v27.2D, v3.2D, v17.2D
zip2 v28.2D, v3.2D, v17.2D
st1 {v27.2D,v28.2D}, [x2], #32
zip1 v29.2D, v4.2D, v18.2D
zip2 v30.2D, v4.2D, v18.2D
st1 {v29.2D,v30.2D}, [x2], #32
idct_end
endfunc
+151 -151
View File
@@ -330,32 +330,32 @@ endfunc
// v17: hev
// convert to signed value:
eor v3.16b, v3.16b, v21.16b // PS0 = P0 ^ 0x80
eor v4.16b, v4.16b, v21.16b // QS0 = Q0 ^ 0x80
eor v3.16b, v3.16b, v21.16b // PS0 = P0 ^ 0x80
eor v4.16b, v4.16b, v21.16b // QS0 = Q0 ^ 0x80
movi v20.8h, #3
ssubl v18.8h, v4.8b, v3.8b // QS0 - PS0
ssubl2 v19.8h, v4.16b, v3.16b // (widened to 16bit)
eor v2.16b, v2.16b, v21.16b // PS1 = P1 ^ 0x80
eor v5.16b, v5.16b, v21.16b // QS1 = Q1 ^ 0x80
mul v18.8h, v18.8h, v20.8h // w = 3 * (QS0 - PS0)
mul v19.8h, v19.8h, v20.8h
movi v20.8h, #3
ssubl v18.8h, v4.8b, v3.8b // QS0 - PS0
ssubl2 v19.8h, v4.16b, v3.16b // (widened to 16bit)
eor v2.16b, v2.16b, v21.16b // PS1 = P1 ^ 0x80
eor v5.16b, v5.16b, v21.16b // QS1 = Q1 ^ 0x80
mul v18.8h, v18.8h, v20.8h // w = 3 * (QS0 - PS0)
mul v19.8h, v19.8h, v20.8h
sqsub v20.16b, v2.16b, v5.16b // clamp(PS1-QS1)
movi v22.16b, #4
movi v23.16b, #3
sqsub v20.16b, v2.16b, v5.16b // clamp(PS1-QS1)
movi v22.16b, #4
movi v23.16b, #3
.if \inner
and v20.16b, v20.16b, v17.16b // if(hev) w += clamp(PS1-QS1)
and v20.16b, v20.16b, v17.16b // if(hev) w += clamp(PS1-QS1)
.endif
saddw v18.8h, v18.8h, v20.8b // w += clamp(PS1-QS1)
saddw2 v19.8h, v19.8h, v20.16b
sqxtn v18.8b, v18.8h // narrow result back into v18
sqxtn2 v18.16b, v19.8h
saddw v18.8h, v18.8h, v20.8b // w += clamp(PS1-QS1)
saddw2 v19.8h, v19.8h, v20.16b
sqxtn v18.8b, v18.8h // narrow result back into v18
sqxtn2 v18.16b, v19.8h
.if !\inner && !\simple
eor v1.16b, v1.16b, v21.16b // PS2 = P2 ^ 0x80
eor v6.16b, v6.16b, v21.16b // QS2 = Q2 ^ 0x80
eor v1.16b, v1.16b, v21.16b // PS2 = P2 ^ 0x80
eor v6.16b, v6.16b, v21.16b // QS2 = Q2 ^ 0x80
.endif
and v18.16b, v18.16b, v16.16b // w &= normal_limit
and v18.16b, v18.16b, v16.16b // w &= normal_limit
// registers used at this point..
// v0 -> P3 (don't corrupt)
@@ -375,44 +375,44 @@ endfunc
// P0 = s2u(PS0 + c2);
.if \simple
sqadd v19.16b, v18.16b, v22.16b // c1 = clamp((w&hev)+4)
sqadd v20.16b, v18.16b, v23.16b // c2 = clamp((w&hev)+3)
sshr v19.16b, v19.16b, #3 // c1 >>= 3
sshr v20.16b, v20.16b, #3 // c2 >>= 3
sqsub v4.16b, v4.16b, v19.16b // QS0 = clamp(QS0-c1)
sqadd v3.16b, v3.16b, v20.16b // PS0 = clamp(PS0+c2)
eor v4.16b, v4.16b, v21.16b // Q0 = QS0 ^ 0x80
eor v3.16b, v3.16b, v21.16b // P0 = PS0 ^ 0x80
eor v5.16b, v5.16b, v21.16b // Q1 = QS1 ^ 0x80
eor v2.16b, v2.16b, v21.16b // P1 = PS1 ^ 0x80
sqadd v19.16b, v18.16b, v22.16b // c1 = clamp((w&hev)+4)
sqadd v20.16b, v18.16b, v23.16b // c2 = clamp((w&hev)+3)
sshr v19.16b, v19.16b, #3 // c1 >>= 3
sshr v20.16b, v20.16b, #3 // c2 >>= 3
sqsub v4.16b, v4.16b, v19.16b // QS0 = clamp(QS0-c1)
sqadd v3.16b, v3.16b, v20.16b // PS0 = clamp(PS0+c2)
eor v4.16b, v4.16b, v21.16b // Q0 = QS0 ^ 0x80
eor v3.16b, v3.16b, v21.16b // P0 = PS0 ^ 0x80
eor v5.16b, v5.16b, v21.16b // Q1 = QS1 ^ 0x80
eor v2.16b, v2.16b, v21.16b // P1 = PS1 ^ 0x80
.elseif \inner
// the !is4tap case of filter_common, only used for inner blocks
// c3 = ((c1&~hev) + 1) >> 1;
// Q1 = s2u(QS1 - c3);
// P1 = s2u(PS1 + c3);
sqadd v19.16b, v18.16b, v22.16b // c1 = clamp((w&hev)+4)
sqadd v20.16b, v18.16b, v23.16b // c2 = clamp((w&hev)+3)
sshr v19.16b, v19.16b, #3 // c1 >>= 3
sshr v20.16b, v20.16b, #3 // c2 >>= 3
sqsub v4.16b, v4.16b, v19.16b // QS0 = clamp(QS0-c1)
sqadd v3.16b, v3.16b, v20.16b // PS0 = clamp(PS0+c2)
bic v19.16b, v19.16b, v17.16b // c1 & ~hev
eor v4.16b, v4.16b, v21.16b // Q0 = QS0 ^ 0x80
srshr v19.16b, v19.16b, #1 // c3 >>= 1
eor v3.16b, v3.16b, v21.16b // P0 = PS0 ^ 0x80
sqsub v5.16b, v5.16b, v19.16b // QS1 = clamp(QS1-c3)
sqadd v2.16b, v2.16b, v19.16b // PS1 = clamp(PS1+c3)
eor v5.16b, v5.16b, v21.16b // Q1 = QS1 ^ 0x80
eor v2.16b, v2.16b, v21.16b // P1 = PS1 ^ 0x80
sqadd v19.16b, v18.16b, v22.16b // c1 = clamp((w&hev)+4)
sqadd v20.16b, v18.16b, v23.16b // c2 = clamp((w&hev)+3)
sshr v19.16b, v19.16b, #3 // c1 >>= 3
sshr v20.16b, v20.16b, #3 // c2 >>= 3
sqsub v4.16b, v4.16b, v19.16b // QS0 = clamp(QS0-c1)
sqadd v3.16b, v3.16b, v20.16b // PS0 = clamp(PS0+c2)
bic v19.16b, v19.16b, v17.16b // c1 & ~hev
eor v4.16b, v4.16b, v21.16b // Q0 = QS0 ^ 0x80
srshr v19.16b, v19.16b, #1 // c3 >>= 1
eor v3.16b, v3.16b, v21.16b // P0 = PS0 ^ 0x80
sqsub v5.16b, v5.16b, v19.16b // QS1 = clamp(QS1-c3)
sqadd v2.16b, v2.16b, v19.16b // PS1 = clamp(PS1+c3)
eor v5.16b, v5.16b, v21.16b // Q1 = QS1 ^ 0x80
eor v2.16b, v2.16b, v21.16b // P1 = PS1 ^ 0x80
.else
and v20.16b, v18.16b, v17.16b // w & hev
sqadd v19.16b, v20.16b, v22.16b // c1 = clamp((w&hev)+4)
sqadd v20.16b, v20.16b, v23.16b // c2 = clamp((w&hev)+3)
sshr v19.16b, v19.16b, #3 // c1 >>= 3
sshr v20.16b, v20.16b, #3 // c2 >>= 3
bic v18.16b, v18.16b, v17.16b // w &= ~hev
sqsub v4.16b, v4.16b, v19.16b // QS0 = clamp(QS0-c1)
sqadd v3.16b, v3.16b, v20.16b // PS0 = clamp(PS0+c2)
and v20.16b, v18.16b, v17.16b // w & hev
sqadd v19.16b, v20.16b, v22.16b // c1 = clamp((w&hev)+4)
sqadd v20.16b, v20.16b, v23.16b // c2 = clamp((w&hev)+3)
sshr v19.16b, v19.16b, #3 // c1 >>= 3
sshr v20.16b, v20.16b, #3 // c2 >>= 3
bic v18.16b, v18.16b, v17.16b // w &= ~hev
sqsub v4.16b, v4.16b, v19.16b // QS0 = clamp(QS0-c1)
sqadd v3.16b, v3.16b, v20.16b // PS0 = clamp(PS0+c2)
// filter_mbedge:
// a = clamp((27*w + 63) >> 7);
@@ -424,35 +424,35 @@ endfunc
// a = clamp((9*w + 63) >> 7);
// Q2 = s2u(QS2 - a);
// P2 = s2u(PS2 + a);
movi v17.8h, #63
sshll v22.8h, v18.8b, #3
sshll2 v23.8h, v18.16b, #3
saddw v22.8h, v22.8h, v18.8b
saddw2 v23.8h, v23.8h, v18.16b
add v16.8h, v17.8h, v22.8h
add v17.8h, v17.8h, v23.8h // 9*w + 63
add v19.8h, v16.8h, v22.8h
add v20.8h, v17.8h, v23.8h // 18*w + 63
add v22.8h, v19.8h, v22.8h
add v23.8h, v20.8h, v23.8h // 27*w + 63
sqshrn v16.8b, v16.8h, #7
sqshrn2 v16.16b, v17.8h, #7 // clamp(( 9*w + 63)>>7)
sqshrn v19.8b, v19.8h, #7
sqshrn2 v19.16b, v20.8h, #7 // clamp((18*w + 63)>>7)
sqshrn v22.8b, v22.8h, #7
sqshrn2 v22.16b, v23.8h, #7 // clamp((27*w + 63)>>7)
sqadd v1.16b, v1.16b, v16.16b // PS2 = clamp(PS2+a)
sqsub v6.16b, v6.16b, v16.16b // QS2 = clamp(QS2-a)
sqadd v2.16b, v2.16b, v19.16b // PS1 = clamp(PS1+a)
sqsub v5.16b, v5.16b, v19.16b // QS1 = clamp(QS1-a)
sqadd v3.16b, v3.16b, v22.16b // PS0 = clamp(PS0+a)
sqsub v4.16b, v4.16b, v22.16b // QS0 = clamp(QS0-a)
eor v3.16b, v3.16b, v21.16b // P0 = PS0 ^ 0x80
eor v4.16b, v4.16b, v21.16b // Q0 = QS0 ^ 0x80
eor v2.16b, v2.16b, v21.16b // P1 = PS1 ^ 0x80
eor v5.16b, v5.16b, v21.16b // Q1 = QS1 ^ 0x80
eor v1.16b, v1.16b, v21.16b // P2 = PS2 ^ 0x80
eor v6.16b, v6.16b, v21.16b // Q2 = QS2 ^ 0x80
movi v17.8h, #63
sshll v22.8h, v18.8b, #3
sshll2 v23.8h, v18.16b, #3
saddw v22.8h, v22.8h, v18.8b
saddw2 v23.8h, v23.8h, v18.16b
add v16.8h, v17.8h, v22.8h
add v17.8h, v17.8h, v23.8h // 9*w + 63
add v19.8h, v16.8h, v22.8h
add v20.8h, v17.8h, v23.8h // 18*w + 63
add v22.8h, v19.8h, v22.8h
add v23.8h, v20.8h, v23.8h // 27*w + 63
sqshrn v16.8b, v16.8h, #7
sqshrn2 v16.16b, v17.8h, #7 // clamp(( 9*w + 63)>>7)
sqshrn v19.8b, v19.8h, #7
sqshrn2 v19.16b, v20.8h, #7 // clamp((18*w + 63)>>7)
sqshrn v22.8b, v22.8h, #7
sqshrn2 v22.16b, v23.8h, #7 // clamp((27*w + 63)>>7)
sqadd v1.16b, v1.16b, v16.16b // PS2 = clamp(PS2+a)
sqsub v6.16b, v6.16b, v16.16b // QS2 = clamp(QS2-a)
sqadd v2.16b, v2.16b, v19.16b // PS1 = clamp(PS1+a)
sqsub v5.16b, v5.16b, v19.16b // QS1 = clamp(QS1-a)
sqadd v3.16b, v3.16b, v22.16b // PS0 = clamp(PS0+a)
sqsub v4.16b, v4.16b, v22.16b // QS0 = clamp(QS0-a)
eor v3.16b, v3.16b, v21.16b // P0 = PS0 ^ 0x80
eor v4.16b, v4.16b, v21.16b // Q0 = QS0 ^ 0x80
eor v2.16b, v2.16b, v21.16b // P1 = PS1 ^ 0x80
eor v5.16b, v5.16b, v21.16b // Q1 = QS1 ^ 0x80
eor v1.16b, v1.16b, v21.16b // P2 = PS2 ^ 0x80
eor v6.16b, v6.16b, v21.16b // Q2 = QS2 ^ 0x80
.endif
.endm
@@ -507,48 +507,48 @@ function ff_vp8_v_loop_filter8uv\name\()_neon, export=1
sub x0, x0, x2, lsl #2
sub x1, x1, x2, lsl #2
// Load pixels:
ld1 {v0.d}[0], [x0], x2 // P3
ld1 {v0.d}[1], [x1], x2 // P3
ld1 {v1.d}[0], [x0], x2 // P2
ld1 {v1.d}[1], [x1], x2 // P2
ld1 {v2.d}[0], [x0], x2 // P1
ld1 {v2.d}[1], [x1], x2 // P1
ld1 {v3.d}[0], [x0], x2 // P0
ld1 {v3.d}[1], [x1], x2 // P0
ld1 {v4.d}[0], [x0], x2 // Q0
ld1 {v4.d}[1], [x1], x2 // Q0
ld1 {v5.d}[0], [x0], x2 // Q1
ld1 {v5.d}[1], [x1], x2 // Q1
ld1 {v6.d}[0], [x0], x2 // Q2
ld1 {v6.d}[1], [x1], x2 // Q2
ld1 {v7.d}[0], [x0] // Q3
ld1 {v7.d}[1], [x1] // Q3
ld1 {v0.d}[0], [x0], x2 // P3
ld1 {v0.d}[1], [x1], x2 // P3
ld1 {v1.d}[0], [x0], x2 // P2
ld1 {v1.d}[1], [x1], x2 // P2
ld1 {v2.d}[0], [x0], x2 // P1
ld1 {v2.d}[1], [x1], x2 // P1
ld1 {v3.d}[0], [x0], x2 // P0
ld1 {v3.d}[1], [x1], x2 // P0
ld1 {v4.d}[0], [x0], x2 // Q0
ld1 {v4.d}[1], [x1], x2 // Q0
ld1 {v5.d}[0], [x0], x2 // Q1
ld1 {v5.d}[1], [x1], x2 // Q1
ld1 {v6.d}[0], [x0], x2 // Q2
ld1 {v6.d}[1], [x1], x2 // Q2
ld1 {v7.d}[0], [x0] // Q3
ld1 {v7.d}[1], [x1] // Q3
dup v22.16b, w3 // flim_E
dup v23.16b, w4 // flim_I
dup v22.16b, w3 // flim_E
dup v23.16b, w4 // flim_I
vp8_loop_filter inner=\inner, hev_thresh=w5
// back up to P2: u,v -= stride * 6
sub x0, x0, x2, lsl #2
sub x1, x1, x2, lsl #2
sub x0, x0, x2, lsl #1
sub x1, x1, x2, lsl #1
sub x0, x0, x2, lsl #2
sub x1, x1, x2, lsl #2
sub x0, x0, x2, lsl #1
sub x1, x1, x2, lsl #1
// Store pixels:
st1 {v1.d}[0], [x0], x2 // P2
st1 {v1.d}[1], [x1], x2 // P2
st1 {v2.d}[0], [x0], x2 // P1
st1 {v2.d}[1], [x1], x2 // P1
st1 {v3.d}[0], [x0], x2 // P0
st1 {v3.d}[1], [x1], x2 // P0
st1 {v4.d}[0], [x0], x2 // Q0
st1 {v4.d}[1], [x1], x2 // Q0
st1 {v5.d}[0], [x0], x2 // Q1
st1 {v5.d}[1], [x1], x2 // Q1
st1 {v6.d}[0], [x0] // Q2
st1 {v6.d}[1], [x1] // Q2
st1 {v1.d}[0], [x0], x2 // P2
st1 {v1.d}[1], [x1], x2 // P2
st1 {v2.d}[0], [x0], x2 // P1
st1 {v2.d}[1], [x1], x2 // P1
st1 {v3.d}[0], [x0], x2 // P0
st1 {v3.d}[1], [x1], x2 // P0
st1 {v4.d}[0], [x0], x2 // Q0
st1 {v4.d}[1], [x1], x2 // Q0
st1 {v5.d}[0], [x0], x2 // Q1
st1 {v5.d}[1], [x1], x2 // Q1
st1 {v6.d}[0], [x0] // Q2
st1 {v6.d}[1], [x1] // Q2
ret
endfunc
@@ -579,7 +579,7 @@ function ff_vp8_h_loop_filter16\name\()_neon, export=1
ld1 {v6.d}[1], [x0], x1
ld1 {v7.d}[1], [x0], x1
transpose_8x16B v0, v1, v2, v3, v4, v5, v6, v7, v30, v31
transpose_8x16B v0, v1, v2, v3, v4, v5, v6, v7, v30, v31
dup v22.16b, w2 // flim_E
.if !\simple
@@ -590,7 +590,7 @@ function ff_vp8_h_loop_filter16\name\()_neon, export=1
sub x0, x0, x1, lsl #4 // backup 16 rows
transpose_8x16B v0, v1, v2, v3, v4, v5, v6, v7, v30, v31
transpose_8x16B v0, v1, v2, v3, v4, v5, v6, v7, v30, v31
// Store pixels:
st1 {v0.d}[0], [x0], x1
@@ -624,24 +624,24 @@ function ff_vp8_h_loop_filter8uv\name\()_neon, export=1
sub x1, x1, #4
// Load pixels:
ld1 {v0.d}[0], [x0], x2 // load u
ld1 {v0.d}[1], [x1], x2 // load v
ld1 {v1.d}[0], [x0], x2
ld1 {v1.d}[1], [x1], x2
ld1 {v2.d}[0], [x0], x2
ld1 {v2.d}[1], [x1], x2
ld1 {v3.d}[0], [x0], x2
ld1 {v3.d}[1], [x1], x2
ld1 {v4.d}[0], [x0], x2
ld1 {v4.d}[1], [x1], x2
ld1 {v5.d}[0], [x0], x2
ld1 {v5.d}[1], [x1], x2
ld1 {v6.d}[0], [x0], x2
ld1 {v6.d}[1], [x1], x2
ld1 {v7.d}[0], [x0], x2
ld1 {v7.d}[1], [x1], x2
ld1 {v0.d}[0], [x0], x2 // load u
ld1 {v0.d}[1], [x1], x2 // load v
ld1 {v1.d}[0], [x0], x2
ld1 {v1.d}[1], [x1], x2
ld1 {v2.d}[0], [x0], x2
ld1 {v2.d}[1], [x1], x2
ld1 {v3.d}[0], [x0], x2
ld1 {v3.d}[1], [x1], x2
ld1 {v4.d}[0], [x0], x2
ld1 {v4.d}[1], [x1], x2
ld1 {v5.d}[0], [x0], x2
ld1 {v5.d}[1], [x1], x2
ld1 {v6.d}[0], [x0], x2
ld1 {v6.d}[1], [x1], x2
ld1 {v7.d}[0], [x0], x2
ld1 {v7.d}[1], [x1], x2
transpose_8x16B v0, v1, v2, v3, v4, v5, v6, v7, v30, v31
transpose_8x16B v0, v1, v2, v3, v4, v5, v6, v7, v30, v31
dup v22.16b, w3 // flim_E
dup v23.16b, w4 // flim_I
@@ -651,25 +651,25 @@ function ff_vp8_h_loop_filter8uv\name\()_neon, export=1
sub x0, x0, x2, lsl #3 // backup u 8 rows
sub x1, x1, x2, lsl #3 // backup v 8 rows
transpose_8x16B v0, v1, v2, v3, v4, v5, v6, v7, v30, v31
transpose_8x16B v0, v1, v2, v3, v4, v5, v6, v7, v30, v31
// Store pixels:
st1 {v0.d}[0], [x0], x2 // load u
st1 {v0.d}[1], [x1], x2 // load v
st1 {v1.d}[0], [x0], x2
st1 {v1.d}[1], [x1], x2
st1 {v2.d}[0], [x0], x2
st1 {v2.d}[1], [x1], x2
st1 {v3.d}[0], [x0], x2
st1 {v3.d}[1], [x1], x2
st1 {v4.d}[0], [x0], x2
st1 {v4.d}[1], [x1], x2
st1 {v5.d}[0], [x0], x2
st1 {v5.d}[1], [x1], x2
st1 {v6.d}[0], [x0], x2
st1 {v6.d}[1], [x1], x2
st1 {v7.d}[0], [x0]
st1 {v7.d}[1], [x1]
st1 {v0.d}[0], [x0], x2 // load u
st1 {v0.d}[1], [x1], x2 // load v
st1 {v1.d}[0], [x0], x2
st1 {v1.d}[1], [x1], x2
st1 {v2.d}[0], [x0], x2
st1 {v2.d}[1], [x1], x2
st1 {v3.d}[0], [x0], x2
st1 {v3.d}[1], [x1], x2
st1 {v4.d}[0], [x0], x2
st1 {v4.d}[1], [x1], x2
st1 {v5.d}[0], [x0], x2
st1 {v5.d}[1], [x1], x2
st1 {v6.d}[0], [x0], x2
st1 {v6.d}[1], [x1], x2
st1 {v7.d}[0], [x0]
st1 {v7.d}[1], [x1]
ret
+1 -9
View File
@@ -230,9 +230,6 @@ function \type\()_8tap_\size\()h_\idx1\idx2
// reduced dst stride
.if \size >= 16
sub x1, x1, x5
.elseif \size == 4
add x12, x2, #8
add x13, x7, #8
.endif
// size >= 16 loads two qwords and increments x2,
// for size 4/8 it's enough with one qword and no
@@ -251,14 +248,9 @@ function \type\()_8tap_\size\()h_\idx1\idx2
.if \size >= 16
ld1 {v4.8b, v5.8b, v6.8b}, [x2], #24
ld1 {v16.8b, v17.8b, v18.8b}, [x7], #24
.elseif \size == 8
.else
ld1 {v4.8b, v5.8b}, [x2]
ld1 {v16.8b, v17.8b}, [x7]
.else // \size == 4
ld1 {v4.8b}, [x2]
ld1 {v16.8b}, [x7]
ld1 {v5.s}[0], [x12], x3
ld1 {v17.s}[0], [x13], x3
.endif
uxtl v4.8h, v4.8b
uxtl v5.8h, v5.8b
+17 -17
View File
@@ -104,26 +104,26 @@ static int aasc_decode_frame(AVCodecContext *avctx,
ff_msrle_decode(avctx, s->frame, 8, &s->gb);
break;
case MKTAG('A', 'A', 'S', 'C'):
switch (compr) {
case 0:
stride = (avctx->width * psize + psize) & ~psize;
if (buf_size < stride * avctx->height)
return AVERROR_INVALIDDATA;
for (i = avctx->height - 1; i >= 0; i--) {
memcpy(s->frame->data[0] + i * s->frame->linesize[0], buf, avctx->width * psize);
buf += stride;
buf_size -= stride;
}
break;
case 1:
bytestream2_init(&s->gb, buf, buf_size);
ff_msrle_decode(avctx, s->frame, 8, &s->gb);
break;
default:
av_log(avctx, AV_LOG_ERROR, "Unknown compression type %d\n", compr);
switch (compr) {
case 0:
stride = (avctx->width * psize + psize) & ~psize;
if (buf_size < stride * avctx->height)
return AVERROR_INVALIDDATA;
for (i = avctx->height - 1; i >= 0; i--) {
memcpy(s->frame->data[0] + i * s->frame->linesize[0], buf, avctx->width * psize);
buf += stride;
buf_size -= stride;
}
break;
case 1:
bytestream2_init(&s->gb, buf, buf_size);
ff_msrle_decode(avctx, s->frame, 8, &s->gb);
break;
default:
av_log(avctx, AV_LOG_ERROR, "Unknown compression type %d\n", compr);
return AVERROR_INVALIDDATA;
}
break;
default:
av_log(avctx, AV_LOG_ERROR, "Unknown FourCC: %X\n", avctx->codec_tag);
return -1;
-2
View File
@@ -75,7 +75,6 @@
#define AC3_DYNAMIC_RANGE1 0
typedef int INTFLOAT;
typedef unsigned int UINTFLOAT;
typedef int16_t SHORTFLOAT;
#else /* USE_FIXED */
@@ -95,7 +94,6 @@ typedef int16_t SHORTFLOAT;
#define AC3_DYNAMIC_RANGE1 1.0f
typedef float INTFLOAT;
typedef float UINTFLOAT;
typedef float SHORTFLOAT;
#endif /* USE_FIXED */
+1 -3
View File
@@ -179,9 +179,7 @@ int av_ac3_parse_header(const uint8_t *buf, size_t size,
AC3HeaderInfo hdr;
int err;
err = init_get_bits8(&gb, buf, size);
if (err < 0)
return AVERROR_INVALIDDATA;
init_get_bits8(&gb, buf, size);
err = ff_ac3_parse_header(&gb, &hdr);
if (err < 0)
return AVERROR_INVALIDDATA;
+1 -1
View File
@@ -43,7 +43,7 @@ int AC3_NAME(allocate_sample_buffers)(AC3EncodeContext *s)
FF_ALLOC_OR_GOTO(s->avctx, s->windowed_samples, AC3_WINDOW_SIZE *
sizeof(*s->windowed_samples), alloc_fail);
FF_ALLOCZ_ARRAY_OR_GOTO(s->avctx, s->planar_samples, s->channels, sizeof(*s->planar_samples),
FF_ALLOC_ARRAY_OR_GOTO(s->avctx, s->planar_samples, s->channels, sizeof(*s->planar_samples),
alloc_fail);
for (ch = 0; ch < s->channels; ch++) {
FF_ALLOCZ_OR_GOTO(s->avctx, s->planar_samples[ch],
+2 -2
View File
@@ -423,8 +423,8 @@ static int decode_inter_plane(AGMContext *s, GetBitContext *gb, int size,
int map = s->map[x];
if (orig_mv_x >= -32) {
if (y * 8 + mv_y < 0 || y * 8 + mv_y + 8 > h ||
x * 8 + mv_x < 0 || x * 8 + mv_x + 8 > w)
if (y * 8 + mv_y < 0 || y * 8 + mv_y + 8 >= h ||
x * 8 + mv_x < 0 || x * 8 + mv_x + 8 >= w)
return AVERROR_INVALIDDATA;
copy_block8(frame->data[plane] + (s->blocks_h - 1 - y) * 8 * frame->linesize[plane] + x * 8,
+2 -1
View File
@@ -470,7 +470,8 @@ static av_cold int aic_decode_init(AVCodecContext *avctx)
}
}
ctx->slice_data = av_calloc(ctx->slice_width, AIC_BAND_COEFFS * sizeof(*ctx->slice_data));
ctx->slice_data = av_malloc_array(ctx->slice_width, AIC_BAND_COEFFS
* sizeof(*ctx->slice_data));
if (!ctx->slice_data) {
av_log(avctx, AV_LOG_ERROR, "Error allocating slice buffer\n");
-3
View File
@@ -302,9 +302,6 @@ static int decode_element(AVCodecContext *avctx, AVFrame *frame, int ch_index,
decorr_shift = get_bits(&alac->gb, 8);
decorr_left_weight = get_bits(&alac->gb, 8);
if (channels == 2 && decorr_left_weight && decorr_shift > 31)
return AVERROR_INVALIDDATA;
for (ch = 0; ch < channels; ch++) {
prediction_type[ch] = get_bits(&alac->gb, 4);
lpc_quant[ch] = get_bits(&alac->gb, 4);
+2 -2
View File
@@ -29,12 +29,12 @@ static void decorrelate_stereo(int32_t *buffer[2], int nb_samples,
int i;
for (i = 0; i < nb_samples; i++) {
uint32_t a, b;
int32_t a, b;
a = buffer[0][i];
b = buffer[1][i];
a -= (int)(b * decorr_left_weight) >> decorr_shift;
a -= (b * decorr_left_weight) >> decorr_shift;
b += a;
buffer[0][i] = b;
+3 -3
View File
@@ -679,7 +679,9 @@ extern AVCodec ff_xsub_decoder;
/* external libraries */
extern AVCodec ff_aac_at_encoder;
extern AVCodec ff_aac_at_decoder;
extern AVCodec ff_aac_mf_encoder;
extern AVCodec ff_ac3_at_decoder;
extern AVCodec ff_ac3_mf_encoder;
extern AVCodec ff_adpcm_ima_qt_at_decoder;
extern AVCodec ff_alac_at_encoder;
extern AVCodec ff_alac_at_decoder;
@@ -691,6 +693,7 @@ extern AVCodec ff_ilbc_at_decoder;
extern AVCodec ff_mp1_at_decoder;
extern AVCodec ff_mp2_at_decoder;
extern AVCodec ff_mp3_at_decoder;
extern AVCodec ff_mp3_mf_encoder;
extern AVCodec ff_pcm_alaw_at_encoder;
extern AVCodec ff_pcm_alaw_at_decoder;
extern AVCodec ff_pcm_mulaw_at_encoder;
@@ -754,8 +757,6 @@ extern AVCodec ff_idf_decoder;
/* external libraries, that shouldn't be used by default if one of the
* above is available */
extern AVCodec ff_aac_mf_encoder;
extern AVCodec ff_ac3_mf_encoder;
extern AVCodec ff_h263_v4l2m2m_encoder;
extern AVCodec ff_libaom_av1_decoder;
extern AVCodec ff_libopenh264_encoder;
@@ -788,7 +789,6 @@ extern AVCodec ff_mjpeg_cuvid_decoder;
extern AVCodec ff_mjpeg_qsv_encoder;
extern AVCodec ff_mjpeg_qsv_decoder;
extern AVCodec ff_mjpeg_vaapi_encoder;
extern AVCodec ff_mp3_mf_encoder;
extern AVCodec ff_mpeg1_cuvid_decoder;
extern AVCodec ff_mpeg2_cuvid_decoder;
extern AVCodec ff_mpeg2_qsv_encoder;
+6 -17
View File
@@ -762,7 +762,7 @@ static int read_var_block_data(ALSDecContext *ctx, ALSBlockData *bd)
}
for (k = 2; k < opt_order; k++)
quant_cof[k] = (quant_cof[k] * (1U << 14)) + (add_base << 13);
quant_cof[k] = (quant_cof[k] * (1 << 14)) + (add_base << 13);
}
}
@@ -1016,10 +1016,6 @@ static int read_block(ALSDecContext *ctx, ALSBlockData *bd)
ALSSpecificConfig *sconf = &ctx->sconf;
*bd->shift_lsbs = 0;
if (get_bits_left(gb) < 7)
return AVERROR_INVALIDDATA;
// read block type flag and read the samples accordingly
if (get_bits1(gb)) {
ret = read_var_block_data(ctx, bd);
@@ -1632,7 +1628,7 @@ static int read_frame_data(ALSDecContext *ctx, unsigned int ra_frame)
AVCodecContext *avctx = ctx->avctx;
GetBitContext *gb = &ctx->gb;
unsigned int div_blocks[32]; ///< block sizes.
int c;
unsigned int c;
unsigned int js_blocks[2];
uint32_t bs_info = 0;
int ret;
@@ -1810,17 +1806,14 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame_ptr,
else
ctx->cur_frame_length = sconf->frame_length;
ctx->highest_decoded_channel = -1;
ctx->highest_decoded_channel = 0;
// decode the frame data
if ((invalid_frame = read_frame_data(ctx, ra_frame)) < 0)
av_log(ctx->avctx, AV_LOG_WARNING,
"Reading frame data failed. Skipping RA unit.\n");
if (ctx->highest_decoded_channel == -1) {
av_log(ctx->avctx, AV_LOG_WARNING,
"No channel data decoded.\n");
if (ctx->highest_decoded_channel == 0)
return AVERROR_INVALIDDATA;
}
ctx->frame_id++;
@@ -2116,8 +2109,8 @@ static av_cold int decode_init(AVCodecContext *avctx)
ctx->nbits = av_malloc_array(ctx->cur_frame_length, sizeof(*ctx->nbits));
ctx->mlz = av_mallocz(sizeof(*ctx->mlz));
if (!ctx->larray || !ctx->nbits || !ctx->mlz || !ctx->acf || !ctx->shift_value
|| !ctx->last_shift_value || !ctx->last_acf_mantissa || !ctx->raw_mantissa) {
if (!ctx->mlz || !ctx->acf || !ctx->shift_value || !ctx->last_shift_value
|| !ctx->last_acf_mantissa || !ctx->raw_mantissa) {
av_log(avctx, AV_LOG_ERROR, "Allocating buffer memory failed.\n");
ret = AVERROR(ENOMEM);
goto fail;
@@ -2128,10 +2121,6 @@ static av_cold int decode_init(AVCodecContext *avctx)
for (c = 0; c < avctx->channels; ++c) {
ctx->raw_mantissa[c] = av_mallocz_array(ctx->cur_frame_length, sizeof(**ctx->raw_mantissa));
if (!ctx->raw_mantissa[c]) {
av_log(avctx, AV_LOG_ERROR, "Allocating buffer memory failed.\n");
return AVERROR(ENOMEM);
}
}
}
+1 -8
View File
@@ -431,8 +431,7 @@ static int decode_frame(AVCodecContext *avctx,
s->args[s->nb_args] = FFMAX(s->args[s->nb_args], 0) * 10 + buf[0] - '0';
break;
case ';':
if (s->nb_args < MAX_NB_ARGS)
s->nb_args++;
s->nb_args++;
if (s->nb_args < MAX_NB_ARGS)
s->args[s->nb_args] = 0;
break;
@@ -475,11 +474,6 @@ static av_cold int decode_close(AVCodecContext *avctx)
return 0;
}
static const AVCodecDefault ansi_defaults[] = {
{ "max_pixels", "640*480" },
{ NULL },
};
AVCodec ff_ansi_decoder = {
.name = "ansi",
.long_name = NULL_IF_CONFIG_SMALL("ASCII/ANSI art"),
@@ -491,5 +485,4 @@ AVCodec ff_ansi_decoder = {
.decode = decode_frame,
.capabilities = AV_CODEC_CAP_DR1,
.caps_internal = FF_CODEC_CAP_INIT_THREADSAFE,
.defaults = ansi_defaults,
};
+21 -32
View File
@@ -102,7 +102,7 @@ typedef struct APEFilter {
int16_t *historybuffer; ///< filter memory
int16_t *delay; ///< filtered values
uint32_t avg;
int avg;
} APEFilter;
typedef struct APERice {
@@ -852,7 +852,7 @@ static av_always_inline int filter_fast_3320(APEPredictor *p,
}
predictionA = p->buf[delayA] * 2U - p->buf[delayA - 1];
p->lastA[filter] = decoded + (unsigned)((int32_t)(predictionA * p->coeffsA[filter][0]) >> 9);
p->lastA[filter] = decoded + ((int32_t)(predictionA * p->coeffsA[filter][0]) >> 9);
if ((decoded ^ predictionA) > 0)
p->coeffsA[filter][0]++;
@@ -882,8 +882,8 @@ static av_always_inline int filter_3800(APEPredictor *p,
return predictionA;
}
d2 = p->buf[delayA];
d1 = (p->buf[delayA] - (unsigned)p->buf[delayA - 1]) * 2;
d0 = p->buf[delayA] + ((p->buf[delayA - 2] - (unsigned)p->buf[delayA - 1]) * 8);
d1 = (p->buf[delayA] - p->buf[delayA - 1]) * 2U;
d0 = p->buf[delayA] + ((p->buf[delayA - 2] - p->buf[delayA - 1]) * 8U);
d3 = p->buf[delayB] * 2U - p->buf[delayB - 1];
d4 = p->buf[delayB];
@@ -903,7 +903,7 @@ static av_always_inline int filter_3800(APEPredictor *p,
p->coeffsB[filter][0] += (((d3 >> 29) & 4) - 2) * sign;
p->coeffsB[filter][1] -= (((d4 >> 30) & 2) - 1) * sign;
p->filterB[filter] = p->lastA[filter] + (unsigned)(predictionB >> shift);
p->filterB[filter] = p->lastA[filter] + (predictionB >> shift);
p->filterA[filter] = p->filterB[filter] + (unsigned)((int)(p->filterA[filter] * 31U) >> 5);
return p->filterA[filter];
@@ -928,7 +928,7 @@ static void long_filter_high_3800(int32_t *buffer, int order, int shift, int len
dotprod += delay[j] * (unsigned)coeffs[j];
coeffs[j] += ((delay[j] >> 31) | 1) * sign;
}
buffer[i] -= (unsigned)(dotprod >> shift);
buffer[i] -= dotprod >> shift;
for (j = 0; j < order - 1; j++)
delay[j] = delay[j + 1];
delay[order - 1] = buffer[i];
@@ -952,7 +952,7 @@ static void long_filter_ehigh_3830(int32_t *buffer, int length)
for (j = 7; j > 0; j--)
delay[j] = delay[j - 1];
delay[0] = buffer[i];
buffer[i] -= (unsigned)(dotprod >> 9);
buffer[i] -= dotprod >> 9;
}
}
@@ -1061,13 +1061,13 @@ static av_always_inline int predictor_update_3930(APEPredictor *p,
const int delayA)
{
int32_t predictionA, sign;
uint32_t d0, d1, d2, d3;
int32_t d0, d1, d2, d3;
p->buf[delayA] = p->lastA[filter];
d0 = p->buf[delayA ];
d1 = p->buf[delayA ] - (unsigned)p->buf[delayA - 1];
d2 = p->buf[delayA - 1] - (unsigned)p->buf[delayA - 2];
d3 = p->buf[delayA - 2] - (unsigned)p->buf[delayA - 3];
d1 = p->buf[delayA ] - p->buf[delayA - 1];
d2 = p->buf[delayA - 1] - p->buf[delayA - 2];
d3 = p->buf[delayA - 2] - p->buf[delayA - 3];
predictionA = d0 * p->coeffsA[filter][0] +
d1 * p->coeffsA[filter][1] +
@@ -1078,10 +1078,10 @@ static av_always_inline int predictor_update_3930(APEPredictor *p,
p->filterA[filter] = p->lastA[filter] + ((int)(p->filterA[filter] * 31U) >> 5);
sign = APESIGN(decoded);
p->coeffsA[filter][0] += (((int32_t)d0 < 0) * 2 - 1) * sign;
p->coeffsA[filter][1] += (((int32_t)d1 < 0) * 2 - 1) * sign;
p->coeffsA[filter][2] += (((int32_t)d2 < 0) * 2 - 1) * sign;
p->coeffsA[filter][3] += (((int32_t)d3 < 0) * 2 - 1) * sign;
p->coeffsA[filter][0] += ((d0 < 0) * 2 - 1) * sign;
p->coeffsA[filter][1] += ((d1 < 0) * 2 - 1) * sign;
p->coeffsA[filter][2] += ((d2 < 0) * 2 - 1) * sign;
p->coeffsA[filter][3] += ((d3 < 0) * 2 - 1) * sign;
return p->filterA[filter];
}
@@ -1309,7 +1309,7 @@ static void do_apply_filter(APEContext *ctx, int version, APEFilter *f,
absres = res < 0 ? -(unsigned)res : res;
if (absres)
*f->adaptcoeffs = APESIGN(res) *
(8 << ((absres > f->avg * 3LL) + (absres > (f->avg + f->avg / 3))));
(8 << ((absres > f->avg * 3) + (absres > f->avg * 4 / 3)));
/* equivalent to the following code
if (absres <= f->avg * 4 / 3)
*f->adaptcoeffs = APESIGN(res) * 8;
@@ -1559,7 +1559,7 @@ static int ape_decode_frame(AVCodecContext *avctx, void *data,
for (ch = 0; ch < s->channels; ch++) {
sample8 = (uint8_t *)frame->data[ch];
for (i = 0; i < blockstodecode; i++)
*sample8++ = (s->decoded[ch][i] + 0x80U) & 0xff;
*sample8++ = (s->decoded[ch][i] + 0x80) & 0xff;
}
break;
case 16:
@@ -1573,7 +1573,7 @@ static int ape_decode_frame(AVCodecContext *avctx, void *data,
for (ch = 0; ch < s->channels; ch++) {
sample24 = (int32_t *)frame->data[ch];
for (i = 0; i < blockstodecode; i++)
*sample24++ = s->decoded[ch][i] * 256U;
*sample24++ = s->decoded[ch][i] * 256;
}
break;
}
@@ -1581,24 +1581,13 @@ static int ape_decode_frame(AVCodecContext *avctx, void *data,
s->samples -= blockstodecode;
if (avctx->err_recognition & AV_EF_CRCCHECK &&
s->fileversion >= 3900) {
s->fileversion >= 3900 && s->bps < 24) {
uint32_t crc = s->CRC_state;
const AVCRC *crc_tab = av_crc_get_table(AV_CRC_32_IEEE_LE);
int stride = s->bps == 24 ? 4 : (s->bps>>3);
int offset = s->bps == 24;
int bytes = s->bps >> 3;
for (i = 0; i < blockstodecode; i++) {
for (ch = 0; ch < s->channels; ch++) {
#if HAVE_BIGENDIAN
uint8_t *smp_native = frame->data[ch] + i*stride;
uint8_t smp[4];
for(int j = 0; j<stride; j++)
smp[j] = smp_native[stride-j-1];
#else
uint8_t *smp = frame->data[ch] + i*stride;
#endif
crc = av_crc(crc_tab, crc, smp+offset, bytes);
uint8_t *smp = frame->data[ch] + (i*(s->bps >> 3));
crc = av_crc(crc_tab, crc, smp, s->bps >> 3);
}
}
+1
View File
@@ -48,3 +48,4 @@ function ff_scalarproduct_int16_neon, export=1
vmov.32 r0, d3[0]
bx lr
endfunc
+3 -3
View File
@@ -229,7 +229,7 @@ A .endif
.endif
// Begin loop
1:
01:
.if TOTAL_TAPS == 0
// Things simplify a lot in this case
// In fact this could be pipelined further if it's worth it...
@@ -241,7 +241,7 @@ A .endif
str ST0, [PST, #-4]!
str ST0, [PST, #4 * (MAX_BLOCKSIZE + MAX_FIR_ORDER)]
str ST0, [PSAMP], #4 * MAX_CHANNELS
bne 1b
bne 01b
.else
.if \fir_taps & 1
.set LOAD_REG, 1
@@ -333,7 +333,7 @@ T orr AC0, AC0, AC1
str ST3, [PST, #-4]!
str ST2, [PST, #4 * (MAX_BLOCKSIZE + MAX_FIR_ORDER)]
str ST3, [PSAMP], #4 * MAX_CHANNELS
bne 1b
bne 01b
.endif
b 99f
+3 -10
View File
@@ -279,13 +279,11 @@ function \type\()_8tap_\size\()h_\idx1\idx2
sub r1, r1, r5
.endif
@ size >= 16 loads two qwords and increments r2,
@ size 4 loads 1 d word, increments r2 and loads 1 32-bit lane
@ for size 8 it's enough with one qword and no postincrement
@ for size 4/8 it's enough with one qword and no
@ postincrement
.if \size >= 16
sub r3, r3, r5
sub r3, r3, #8
.elseif \size == 4
sub r3, r3, #8
.endif
@ Load the filter vector
vld1.16 {q0}, [r12,:128]
@@ -297,14 +295,9 @@ function \type\()_8tap_\size\()h_\idx1\idx2
.if \size >= 16
vld1.8 {d18, d19, d20}, [r2]!
vld1.8 {d24, d25, d26}, [r7]!
.elseif \size == 8
.else
vld1.8 {q9}, [r2]
vld1.8 {q12}, [r7]
.else @ size == 4
vld1.8 {d18}, [r2]!
vld1.8 {d24}, [r7]!
vld1.32 {d19[0]}, [r2]
vld1.32 {d25[0]}, [r7]
.endif
vmovl.u8 q8, d18
vmovl.u8 q9, d19
-4
View File
@@ -362,10 +362,6 @@ static av_cold int atrac1_decode_init(AVCodecContext *avctx)
ff_atrac_generate_tables();
q->fdsp = avpriv_float_dsp_alloc(avctx->flags & AV_CODEC_FLAG_BITEXACT);
if (!q->fdsp) {
atrac1_decode_end(avctx);
return AVERROR(ENOMEM);
}
q->bands[0] = q->low;
q->bands[1] = q->mid;
+29 -44
View File
@@ -45,10 +45,6 @@ static const enum AVPixelFormat pix_fmts_12bit[2][2] = {
{ AV_PIX_FMT_YUV422P12, AV_PIX_FMT_YUV420P12 },
};
static const enum AVPixelFormat pix_fmts_rgb[3] = {
AV_PIX_FMT_GBRP, AV_PIX_FMT_GBRP10, AV_PIX_FMT_GBRP12,
};
static int av1_parser_parse(AVCodecParserContext *ctx,
AVCodecContext *avctx,
const uint8_t **out_data, int *out_size,
@@ -57,8 +53,6 @@ static int av1_parser_parse(AVCodecParserContext *ctx,
AV1ParseContext *s = ctx->priv_data;
CodedBitstreamFragment *td = &s->temporal_unit;
CodedBitstreamAV1Context *av1 = s->cbc->priv_data;
AV1RawSequenceHeader *seq;
AV1RawColorConfig *color;
int ret;
*out_data = data;
@@ -92,12 +86,11 @@ static int av1_parser_parse(AVCodecParserContext *ctx,
goto end;
}
seq = av1->sequence_header;
color = &seq->color_config;
for (int i = 0; i < td->nb_units; i++) {
CodedBitstreamUnit *unit = &td->units[i];
AV1RawOBU *obu = unit->content;
AV1RawSequenceHeader *seq = av1->sequence_header;
AV1RawColorConfig *color = &seq->color_config;
AV1RawFrameHeader *frame;
int frame_type;
@@ -134,6 +127,9 @@ static int av1_parser_parse(AVCodecParserContext *ctx,
ctx->key_frame = frame_type == AV1_FRAME_KEY;
}
avctx->profile = seq->seq_profile;
avctx->level = seq->seq_level_idx[0];
switch (frame_type) {
case AV1_FRAME_KEY:
case AV1_FRAME_INTRA_ONLY:
@@ -147,44 +143,33 @@ static int av1_parser_parse(AVCodecParserContext *ctx,
break;
}
ctx->picture_structure = AV_PICTURE_STRUCTURE_FRAME;
}
switch (av1->bit_depth) {
case 8:
ctx->format = color->mono_chrome ? AV_PIX_FMT_GRAY8
: pix_fmts_8bit [color->subsampling_x][color->subsampling_y];
break;
case 10:
ctx->format = color->mono_chrome ? AV_PIX_FMT_GRAY10
: pix_fmts_10bit[color->subsampling_x][color->subsampling_y];
break;
case 12:
ctx->format = color->mono_chrome ? AV_PIX_FMT_GRAY12
: pix_fmts_12bit[color->subsampling_x][color->subsampling_y];
break;
}
av_assert2(ctx->format != AV_PIX_FMT_NONE);
switch (av1->bit_depth) {
case 8:
ctx->format = color->mono_chrome ? AV_PIX_FMT_GRAY8
: pix_fmts_8bit [color->subsampling_x][color->subsampling_y];
break;
case 10:
ctx->format = color->mono_chrome ? AV_PIX_FMT_GRAY10
: pix_fmts_10bit[color->subsampling_x][color->subsampling_y];
break;
case 12:
ctx->format = color->mono_chrome ? AV_PIX_FMT_GRAY12
: pix_fmts_12bit[color->subsampling_x][color->subsampling_y];
break;
}
av_assert2(ctx->format != AV_PIX_FMT_NONE);
if (!color->subsampling_x && !color->subsampling_y &&
color->matrix_coefficients == AVCOL_SPC_RGB &&
color->color_primaries == AVCOL_PRI_BT709 &&
color->transfer_characteristics == AVCOL_TRC_IEC61966_2_1)
ctx->format = pix_fmts_rgb[color->high_bitdepth + color->twelve_bit];
avctx->colorspace = (enum AVColorSpace) color->matrix_coefficients;
avctx->color_primaries = (enum AVColorPrimaries) color->color_primaries;
avctx->color_trc = (enum AVColorTransferCharacteristic) color->transfer_characteristics;
avctx->color_range = color->color_range ? AVCOL_RANGE_JPEG : AVCOL_RANGE_MPEG;
avctx->pix_fmt = ctx->format;
avctx->profile = seq->seq_profile;
avctx->level = seq->seq_level_idx[0];
avctx->colorspace = (enum AVColorSpace) color->matrix_coefficients;
avctx->color_primaries = (enum AVColorPrimaries) color->color_primaries;
avctx->color_trc = (enum AVColorTransferCharacteristic) color->transfer_characteristics;
avctx->color_range = color->color_range ? AVCOL_RANGE_JPEG : AVCOL_RANGE_MPEG;
if (ctx->width != avctx->width || ctx->height != avctx->height) {
ret = ff_set_dimensions(avctx, ctx->width, ctx->height);
if (ret < 0)
goto end;
if (ctx->width != avctx->width || ctx->height != avctx->height) {
ret = ff_set_dimensions(avctx, ctx->width, ctx->height);
if (ret < 0)
goto end;
}
}
if (avctx->framerate.num)
-4
View File
@@ -1294,10 +1294,6 @@ typedef struct AVCodecContext {
* this callback and filled with the extra buffers if there are more
* buffers than buf[] can hold. extended_buf will be freed in
* av_frame_unref().
* Decoders will generally initialize the whole buffer before it is output
* but it can in rare error conditions happen that uninitialized data is passed
* through. \important The buffers returned by get_buffer* should thus not contain sensitive
* data.
*
* If AV_CODEC_CAP_DR1 is not set then get_buffer2() must call
* avcodec_default_get_buffer2() instead of providing buffers allocated by
+1 -3
View File
@@ -54,8 +54,6 @@ static av_cold int init(AVCodecContext *avctx)
}
a->mjpeg_avctx = avcodec_alloc_context3(codec);
if (!a->mjpeg_avctx)
return AVERROR(ENOMEM);
av_dict_set(&thread_opt, "threads", "1", 0); // Is this needed ?
a->mjpeg_avctx->refcounted_frames = 1;
@@ -171,5 +169,5 @@ AVCodec ff_avrn_decoder = {
.close = end,
.decode = decode_frame,
.max_lowres = 3,
.caps_internal = FF_CODEC_CAP_INIT_THREADSAFE | FF_CODEC_CAP_INIT_CLEANUP,
.caps_internal = FF_CODEC_CAP_INIT_THREADSAFE,
};
+8 -7
View File
@@ -867,7 +867,7 @@ static int binkb_decode_plane(BinkContext *c, AVFrame *frame, GetBitContext *gb,
binkb_init_bundles(c);
ref_start = frame->data[plane_idx];
ref_end = frame->data[plane_idx] + ((bh - 1) * frame->linesize[plane_idx] + bw - 1) * 8;
ref_end = frame->data[plane_idx] + (bh * frame->linesize[plane_idx] + bw) * 8;
for (i = 0; i < 64; i++)
coordmap[i] = (i & 7) + (i >> 3) * stride;
@@ -923,7 +923,7 @@ static int binkb_decode_plane(BinkContext *c, AVFrame *frame, GetBitContext *gb,
xoff = binkb_get_value(c, BINKB_SRC_X_OFF);
yoff = binkb_get_value(c, BINKB_SRC_Y_OFF) + ybias;
ref = dst + xoff + yoff * stride;
if (ref < ref_start || ref > ref_end) {
if (ref < ref_start || ref + 8*stride > ref_end) {
av_log(c->avctx, AV_LOG_WARNING, "Reference block is out of bounds\n");
} else if (ref + 8*stride < dst || ref >= dst + 8*stride) {
c->hdsp.put_pixels_tab[1][0](dst, ref, stride, 8);
@@ -939,7 +939,7 @@ static int binkb_decode_plane(BinkContext *c, AVFrame *frame, GetBitContext *gb,
xoff = binkb_get_value(c, BINKB_SRC_X_OFF);
yoff = binkb_get_value(c, BINKB_SRC_Y_OFF) + ybias;
ref = dst + xoff + yoff * stride;
if (ref < ref_start || ref > ref_end) {
if (ref < ref_start || ref + 8 * stride > ref_end) {
av_log(c->avctx, AV_LOG_WARNING, "Reference block is out of bounds\n");
} else if (ref + 8*stride < dst || ref >= dst + 8*stride) {
c->hdsp.put_pixels_tab[1][0](dst, ref, stride, 8);
@@ -971,7 +971,7 @@ static int binkb_decode_plane(BinkContext *c, AVFrame *frame, GetBitContext *gb,
xoff = binkb_get_value(c, BINKB_SRC_X_OFF);
yoff = binkb_get_value(c, BINKB_SRC_Y_OFF) + ybias;
ref = dst + xoff + yoff * stride;
if (ref < ref_start || ref > ref_end) {
if (ref < ref_start || ref + 8 * stride > ref_end) {
av_log(c->avctx, AV_LOG_WARNING, "Reference block is out of bounds\n");
} else if (ref + 8*stride < dst || ref >= dst + 8*stride) {
c->hdsp.put_pixels_tab[1][0](dst, ref, stride, 8);
@@ -1084,7 +1084,7 @@ static int bink_decode_plane(BinkContext *c, AVFrame *frame, GetBitContext *gb,
for (bx = 0; bx < bw; bx++, dst += 8, prev += 8) {
blk = get_value(c, BINK_SRC_BLOCK_TYPES);
// 16x16 block type on odd line means part of the already decoded block, so skip it
if (((by & 1) || (bx & 1)) && blk == SCALED_BLOCK) {
if ((by & 1) && blk == SCALED_BLOCK) {
bx++;
dst += 8;
prev += 8;
@@ -1381,8 +1381,10 @@ static av_cold int decode_init(AVCodecContext *avctx)
ff_hpeldsp_init(&c->hdsp, avctx->flags);
ff_binkdsp_init(&c->binkdsp);
if ((ret = init_bundles(c)) < 0)
if ((ret = init_bundles(c)) < 0) {
free_bundles(c);
return ret;
}
if (c->version == 'b') {
if (!binkb_initialised) {
@@ -1422,5 +1424,4 @@ AVCodec ff_bink_decoder = {
.decode = decode_frame,
.flush = flush,
.capabilities = AV_CODEC_CAP_DR1,
.caps_internal = FF_CODEC_CAP_INIT_CLEANUP,
};
+4 -3
View File
@@ -40,6 +40,8 @@
#include "rdft.h"
#include "wma_freqs.h"
static float quant_table[96];
#define MAX_CHANNELS 2
#define BINK_BLOCK_MAX_SIZE (MAX_CHANNELS << 11)
@@ -56,7 +58,6 @@ typedef struct BinkAudioContext {
float root;
DECLARE_ALIGNED(32, FFTSample, coeffs)[BINK_BLOCK_MAX_SIZE];
float previous[MAX_CHANNELS][BINK_BLOCK_MAX_SIZE / 16]; ///< coeffs from previous audio block
float quant_table[96];
AVPacket *pkt;
union {
RDFTContext rdft;
@@ -115,7 +116,7 @@ static av_cold int decode_init(AVCodecContext *avctx)
s->root = s->frame_len / (sqrt(s->frame_len) * 32768.0);
for (i = 0; i < 96; i++) {
/* constant is result of 0.066399999/log10(M_E) */
s->quant_table[i] = expf(i * 0.15289164787221953823f) * s->root;
quant_table[i] = expf(i * 0.15289164787221953823f) * s->root;
}
/* calculate number of bands */
@@ -196,7 +197,7 @@ static int decode_block(BinkAudioContext *s, float **out, int use_dct)
return AVERROR_INVALIDDATA;
for (i = 0; i < s->num_bands; i++) {
int value = get_bits(gb, 8);
quant[i] = s->quant_table[FFMIN(value, 95)];
quant[i] = quant_table[FFMIN(value, 95)];
}
k = 0;
+2 -2
View File
@@ -129,7 +129,7 @@ static int alloc_table(VLC *vlc, int size, int use_static)
typedef struct VLCcode {
uint8_t bits;
VLC_TYPE symbol;
uint16_t symbol;
/** codeword, with the first bit-to-be-read in the msb
* (even if intended for a little-endian bitstream reader) */
uint32_t code;
@@ -162,9 +162,9 @@ static int build_table(VLC *vlc, int table_nb_bits, int nb_codes,
uint32_t code;
volatile VLC_TYPE (* volatile table)[2]; // the double volatile is needed to prevent an internal compiler error in gcc 4.2
table_size = 1 << table_nb_bits;
if (table_nb_bits > 30)
return AVERROR(EINVAL);
table_size = 1 << table_nb_bits;
table_index = alloc_table(vlc, table_size, flags & INIT_VLC_USE_NEW_STATIC);
ff_dlog(NULL, "new table index=%d size=%d\n", table_index, table_size);
if (table_index < 0)
+2 -2
View File
@@ -693,11 +693,11 @@ static int cbs_insert_unit(CodedBitstreamContext *ctx,
memmove(units + position + 1, units + position,
(frag->nb_units - position) * sizeof(*units));
} else {
units = av_malloc_array(frag->nb_units*2 + 1, sizeof(*units));
units = av_malloc_array(frag->nb_units + 1, sizeof(*units));
if (!units)
return AVERROR(ENOMEM);
frag->nb_units_allocated = 2*frag->nb_units_allocated + 1;
++frag->nb_units_allocated;
if (position > 0)
memcpy(units, frag->units, position * sizeof(*units));
+6 -19
View File
@@ -36,7 +36,7 @@ static int cbs_av1_read_uvlc(CodedBitstreamContext *ctx, GetBitContext *gbc,
position = get_bits_count(gbc);
zeroes = 0;
while (zeroes < 32) {
while (1) {
if (get_bits_left(gbc) < 1) {
av_log(ctx->log_ctx, AV_LOG_ERROR, "Invalid uvlc code at "
"%s: bitstream ended.\n", name);
@@ -49,18 +49,7 @@ static int cbs_av1_read_uvlc(CodedBitstreamContext *ctx, GetBitContext *gbc,
}
if (zeroes >= 32) {
// The spec allows at least thirty-two zero bits followed by a
// one to mean 2^32-1, with no constraint on the number of
// zeroes. The libaom reference decoder does not match this,
// instead reading thirty-two zeroes but not the following one
// to mean 2^32-1. These two interpretations are incompatible
// and other implementations may follow one or the other.
// Therefore we reject thirty-two zeroes because the intended
// behaviour is not clear.
av_log(ctx->log_ctx, AV_LOG_ERROR, "Thirty-two zero bits in "
"%s uvlc code: considered invalid due to conflicting "
"standard and reference decoder behaviour.\n", name);
return AVERROR_INVALIDDATA;
value = MAX_UINT_BITS(32);
} else {
if (get_bits_left(gbc) < zeroes) {
av_log(ctx->log_ctx, AV_LOG_ERROR, "Invalid uvlc code at "
@@ -136,9 +125,8 @@ static int cbs_av1_write_uvlc(CodedBitstreamContext *ctx, PutBitContext *pbc,
put_bits(pbc, 1, 1);
} else {
zeroes = av_log2(value + 1);
v = value - (1U << zeroes) + 1;
put_bits(pbc, zeroes, 0);
put_bits(pbc, 1, 1);
v = value - (1 << zeroes) + 1;
put_bits(pbc, zeroes + 1, 1);
put_bits(pbc, zeroes, v);
}
@@ -394,7 +382,7 @@ static int cbs_av1_write_increment(CodedBitstreamContext *ctx, PutBitContext *pb
}
if (len > 0)
put_bits(pbc, len, (1U << len) - 1 - (value != range_max));
put_bits(pbc, len, (1 << len) - 1 - (value != range_max));
return 0;
}
@@ -723,11 +711,10 @@ static size_t cbs_av1_get_payload_bytes_left(GetBitContext *gbc)
#define infer(name, value) do { \
if (current->name != (value)) { \
av_log(ctx->log_ctx, AV_LOG_ERROR, \
av_log(ctx->log_ctx, AV_LOG_WARNING, "Warning: " \
"%s does not match inferred value: " \
"%"PRId64", but should be %"PRId64".\n", \
#name, (int64_t)current->name, (int64_t)(value)); \
return AVERROR_INVALIDDATA; \
} \
} while (0)
+2 -3
View File
@@ -158,8 +158,8 @@ typedef struct AV1RawFrameHeader {
uint8_t use_superres;
uint8_t coded_denom;
uint8_t render_and_frame_size_different;
uint16_t render_width_minus_1;
uint16_t render_height_minus_1;
uint8_t render_width_minus_1;
uint8_t render_height_minus_1;
uint8_t found_ref[AV1_REFS_PER_FRAME];
@@ -429,7 +429,6 @@ typedef struct CodedBitstreamAV1Context {
int operating_point_idc;
int bit_depth;
int order_hint;
int frame_width;
int frame_height;
int upscaled_width;
+19 -33
View File
@@ -366,7 +366,7 @@ static int FUNC(set_frame_refs)(CodedBitstreamContext *ctx, RWContext *rw,
for (i = 0; i < AV1_NUM_REF_FRAMES; i++)
shifted_order_hints[i] = cur_frame_hint +
cbs_av1_get_relative_dist(seq, priv->ref[i].order_hint,
priv->order_hint);
current->order_hint);
latest_order_hint = shifted_order_hints[current->last_frame_idx];
earliest_order_hint = shifted_order_hints[current->golden_frame_idx];
@@ -541,7 +541,7 @@ static int FUNC(frame_size_with_refs)(CodedBitstreamContext *ctx, RWContext *rw,
}
priv->upscaled_width = ref->upscaled_width;
priv->frame_width = priv->upscaled_width;
priv->frame_width = ref->frame_width;
priv->frame_height = ref->frame_height;
priv->render_width = ref->render_width;
priv->render_height = ref->render_height;
@@ -993,7 +993,7 @@ static int FUNC(skip_mode_params)(CodedBitstreamContext *ctx, RWContext *rw,
for (i = 0; i < AV1_REFS_PER_FRAME; i++) {
ref_hint = priv->ref[current->ref_frame_idx[i]].order_hint;
dist = cbs_av1_get_relative_dist(seq, ref_hint,
priv->order_hint);
current->order_hint);
if (dist < 0) {
if (forward_idx < 0 ||
cbs_av1_get_relative_dist(seq, ref_hint,
@@ -1261,10 +1261,10 @@ static int FUNC(uncompressed_header)(CodedBitstreamContext *ctx, RWContext *rw,
flag(show_existing_frame);
if (current->show_existing_frame) {
AV1ReferenceFrameState *ref;
AV1ReferenceFrameState *frame;
fb(3, frame_to_show_map_idx);
ref = &priv->ref[current->frame_to_show_map_idx];
frame = &priv->ref[current->frame_to_show_map_idx];
if (seq->decoder_model_info_present_flag &&
!seq->timing_info.equal_picture_interval) {
@@ -1275,24 +1275,12 @@ static int FUNC(uncompressed_header)(CodedBitstreamContext *ctx, RWContext *rw,
if (seq->frame_id_numbers_present_flag)
fb(id_len, display_frame_id);
infer(frame_type, ref->frame_type);
if (current->frame_type == AV1_FRAME_KEY) {
if (frame->frame_type == AV1_FRAME_KEY)
infer(refresh_frame_flags, all_frames);
// Section 7.21
infer(current_frame_id, ref->frame_id);
priv->upscaled_width = ref->upscaled_width;
priv->frame_width = ref->frame_width;
priv->frame_height = ref->frame_height;
priv->render_width = ref->render_width;
priv->render_height = ref->render_height;
priv->bit_depth = ref->bit_depth;
priv->order_hint = ref->order_hint;
} else
else
infer(refresh_frame_flags, 0);
// Section 7.20
goto update_refs;
return 0;
}
fb(2, frame_type);
@@ -1378,7 +1366,6 @@ static int FUNC(uncompressed_header)(CodedBitstreamContext *ctx, RWContext *rw,
fb(order_hint_bits, order_hint);
else
infer(order_hint, 0);
priv->order_hint = current->order_hint;
if (frame_is_intra || current->error_resilient_mode)
infer(primary_ref_frame, AV1_PRIMARY_REF_NONE);
@@ -1394,7 +1381,7 @@ static int FUNC(uncompressed_header)(CodedBitstreamContext *ctx, RWContext *rw,
int in_temporal_layer = (op_pt_idc >> priv->temporal_id ) & 1;
int in_spatial_layer = (op_pt_idc >> (priv->spatial_id + 8)) & 1;
if (seq->operating_point_idc[i] == 0 ||
(in_temporal_layer && in_spatial_layer)) {
in_temporal_layer || in_spatial_layer) {
fbs(seq->decoder_model_info.buffer_removal_time_length_minus_1 + 1,
buffer_removal_time[i], 1, i);
}
@@ -1554,16 +1541,6 @@ static int FUNC(uncompressed_header)(CodedBitstreamContext *ctx, RWContext *rw,
CHECK(FUNC(film_grain_params)(ctx, rw, current));
av_log(ctx->log_ctx, AV_LOG_DEBUG, "Frame %d: size %dx%d "
"upscaled %d render %dx%d subsample %dx%d "
"bitdepth %d tiles %dx%d.\n", priv->order_hint,
priv->frame_width, priv->frame_height, priv->upscaled_width,
priv->render_width, priv->render_height,
seq->color_config.subsampling_x + 1,
seq->color_config.subsampling_y + 1, priv->bit_depth,
priv->tile_rows, priv->tile_cols);
update_refs:
for (i = 0; i < AV1_NUM_REF_FRAMES; i++) {
if (current->refresh_frame_flags & (1 << i)) {
priv->ref[i] = (AV1ReferenceFrameState) {
@@ -1578,11 +1555,20 @@ update_refs:
.subsampling_x = seq->color_config.subsampling_x,
.subsampling_y = seq->color_config.subsampling_y,
.bit_depth = priv->bit_depth,
.order_hint = priv->order_hint,
.order_hint = current->order_hint,
};
}
}
av_log(ctx->log_ctx, AV_LOG_DEBUG, "Frame %d: size %dx%d "
"upscaled %d render %dx%d subsample %dx%d "
"bitdepth %d tiles %dx%d.\n", current->order_hint,
priv->frame_width, priv->frame_height, priv->upscaled_width,
priv->render_width, priv->render_height,
seq->color_config.subsampling_x + 1,
seq->color_config.subsampling_y + 1, priv->bit_depth,
priv->tile_rows, priv->tile_cols);
return 0;
}
+1 -2
View File
@@ -408,11 +408,10 @@ static int cbs_h2645_read_more_rbsp_data(GetBitContext *gbc)
#define infer(name, value) do { \
if (current->name != (value)) { \
av_log(ctx->log_ctx, AV_LOG_ERROR, \
av_log(ctx->log_ctx, AV_LOG_WARNING, "Warning: " \
"%s does not match inferred value: " \
"%"PRId64", but should be %"PRId64".\n", \
#name, (int64_t)current->name, (int64_t)(value)); \
return AVERROR_INVALIDDATA; \
} \
} while (0)
+1 -29
View File
@@ -728,7 +728,7 @@ static int FUNC(sps_scc_extension)(CodedBitstreamContext *ctx, RWContext *rw,
flag(sps_palette_predictor_initializer_present_flag);
if (current->sps_palette_predictor_initializer_present_flag) {
ue(sps_num_palette_predictor_initializer_minus1, 0, 127);
ue(sps_num_palette_predictor_initializer_minus1, 0, 128);
for (comp = 0; comp < (current->chroma_format_idc ? 3 : 1); comp++) {
int bit_depth = comp == 0 ? current->bit_depth_luma_minus8 + 8
: current->bit_depth_chroma_minus8 + 8;
@@ -744,32 +744,6 @@ static int FUNC(sps_scc_extension)(CodedBitstreamContext *ctx, RWContext *rw,
return 0;
}
static int FUNC(vui_parameters_default)(CodedBitstreamContext *ctx,
RWContext *rw, H265RawVUI *current,
H265RawSPS *sps)
{
infer(aspect_ratio_idc, 0);
infer(video_format, 5);
infer(video_full_range_flag, 0);
infer(colour_primaries, 2);
infer(transfer_characteristics, 2);
infer(matrix_coefficients, 2);
infer(chroma_sample_loc_type_top_field, 0);
infer(chroma_sample_loc_type_bottom_field, 0);
infer(tiles_fixed_structure_flag, 0);
infer(motion_vectors_over_pic_boundaries_flag, 1);
infer(min_spatial_segmentation_idc, 0);
infer(max_bytes_per_pic_denom, 2);
infer(max_bits_per_min_cu_denom, 1);
infer(log2_max_mv_length_horizontal, 15);
infer(log2_max_mv_length_vertical, 15);
return 0;
}
static int FUNC(sps)(CodedBitstreamContext *ctx, RWContext *rw,
H265RawSPS *current)
{
@@ -934,8 +908,6 @@ static int FUNC(sps)(CodedBitstreamContext *ctx, RWContext *rw,
flag(vui_parameters_present_flag);
if (current->vui_parameters_present_flag)
CHECK(FUNC(vui_parameters)(ctx, rw, &current->vui, current));
else
CHECK(FUNC(vui_parameters_default)(ctx, rw, &current->vui, current));
flag(sps_extension_present_flag);
if (current->sps_extension_present_flag) {
+2 -3
View File
@@ -149,7 +149,6 @@ static int cbs_jpeg_split_fragment(CodedBitstreamContext *ctx,
break;
} else if (marker == JPEG_MARKER_SOS) {
next_marker = -1;
end = start;
for (i = start; i + 1 < frag->data_size; i++) {
if (frag->data[i] != 0xff)
continue;
@@ -166,13 +165,13 @@ static int cbs_jpeg_split_fragment(CodedBitstreamContext *ctx,
}
} else {
i = start;
if (i > frag->data_size - 2) {
if (i + 2 > frag->data_size) {
av_log(ctx->log_ctx, AV_LOG_ERROR, "Invalid JPEG image: "
"truncated at %02x marker.\n", marker);
return AVERROR_INVALIDDATA;
}
length = AV_RB16(frag->data + i);
if (length > frag->data_size - i) {
if (i + length > frag->data_size) {
av_log(ctx->log_ctx, AV_LOG_ERROR, "Invalid JPEG image: "
"truncated at %02x marker segment.\n", marker);
return AVERROR_INVALIDDATA;
+1 -1
View File
@@ -422,7 +422,7 @@ static int cbs_vp9_split_fragment(CodedBitstreamContext *ctx,
superframe_header = frag->data[frag->data_size - 1];
if ((superframe_header & 0xe0) == 0xc0) {
VP9RawSuperframeIndex sfi = {0};
VP9RawSuperframeIndex sfi;
GetBitContext gbc;
size_t index_size, pos;
int i;
+1 -1
View File
@@ -239,7 +239,7 @@ static void cdg_scroll(CDGraphicsContext *cc, uint8_t *data,
for (y = FFMAX(0, vinc); y < FFMIN(CDG_FULL_HEIGHT + vinc, CDG_FULL_HEIGHT); y++)
memcpy(out + FFMAX(0, hinc) + stride * y,
in + FFMAX(0, hinc) - hinc + (y - vinc) * stride,
FFABS(stride) - FFABS(hinc));
FFMIN(stride + hinc, stride));
if (vinc > 0)
cdg_fill_wrapper(0, 0, out,
+3 -3
View File
@@ -65,11 +65,11 @@ int ff_celp_lp_synthesis_filter(int16_t *out, const int16_t *filter_coeffs,
int i,n;
for (n = 0; n < buffer_length; n++) {
int sum = rounder, sum1;
int sum = -rounder, sum1;
for (i = 1; i <= filter_length; i++)
sum -= (unsigned)(filter_coeffs[i-1] * out[n-i]);
sum += (unsigned)(filter_coeffs[i-1] * out[n-i]);
sum1 = ((sum >> 12) + in[n]) >> shift;
sum1 = ((-sum >> 12) + in[n]) >> shift;
sum = av_clip_int16(sum1);
if (stop_on_overflow && sum != sum1)
+1 -1
View File
@@ -78,7 +78,7 @@ int64_t ff_dot_product(const int16_t *a, const int16_t *b, int length);
*
* @return value << offset, if offset>=0; value >> -offset - otherwise
*/
static inline unsigned bidir_sal(unsigned value, int offset)
static inline int bidir_sal(int value, int offset)
{
if(offset < 0) return value >> -offset;
else return value << offset;
-10
View File
@@ -503,10 +503,6 @@ static int cfhd_decode(AVCodecContext *avctx, void *data, int *got_frame,
avpriv_report_missing_feature(avctx, "Transform type of %"PRIu16, data);
ret = AVERROR_PATCHWELCOME;
break;
} else if (data == 1) {
av_log(avctx, AV_LOG_ERROR, "unsupported transform type\n");
ret = AVERROR_PATCHWELCOME;
break;
}
av_log(avctx, AV_LOG_DEBUG, "Transform-type? %"PRIu16"\n", data);
} else if (abstag >= 0x4000 && abstag <= 0x40ff) {
@@ -611,12 +607,6 @@ static int cfhd_decode(AVCodecContext *avctx, void *data, int *got_frame,
s->peak.level = 0;
} else if (tag == -74 && s->peak.offset) {
s->peak.level = data;
if (s->peak.offset < 4 - bytestream2_tell(&s->peak.base) ||
s->peak.offset > 4 + bytestream2_get_bytes_left(&s->peak.base)
) {
ret = AVERROR_INVALIDDATA;
goto end;
}
bytestream2_seek(&s->peak.base, s->peak.offset - 4, SEEK_CUR);
} else
av_log(avctx, AV_LOG_DEBUG, "Unknown tag %i data %x\n", tag, data);
+2 -2
View File
@@ -665,8 +665,8 @@ static av_cold int clv_decode_init(AVCodecContext *avctx)
}
c->tile_shift = av_log2(c->tile_size);
if (1U << c->tile_shift != c->tile_size || c->tile_shift < 1 || c->tile_shift > 30) {
av_log(avctx, AV_LOG_ERROR, "Tile size: %d, is not power of 2 > 1 and < 2^31\n", c->tile_size);
if (1U << c->tile_shift != c->tile_size) {
av_log(avctx, AV_LOG_ERROR, "Tile size: %d, is not power of 2.\n", c->tile_size);
return AVERROR_INVALIDDATA;
}
+1
View File
@@ -91,3 +91,4 @@ AVCodec ff_cljr_decoder = {
.decode = decode_frame,
.capabilities = AV_CODEC_CAP_DR1,
};
+4 -4
View File
@@ -1084,10 +1084,6 @@ static av_cold int cook_decode_init(AVCodecContext *avctx)
ff_audiodsp_init(&q->adsp);
while (bytestream2_get_bytes_left(&gb)) {
if (s >= FFMIN(MAX_SUBPACKETS, avctx->block_align)) {
avpriv_request_sample(avctx, "subpackets > %d", FFMIN(MAX_SUBPACKETS, avctx->block_align));
return AVERROR_PATCHWELCOME;
}
/* 8 for mono, 16 for stereo, ? for multichannel
Swap to right endianness so we don't need to care later on. */
q->subpacket[s].cookversion = bytestream2_get_be32(&gb);
@@ -1219,6 +1215,10 @@ static av_cold int cook_decode_init(AVCodecContext *avctx)
q->num_subpackets++;
s++;
if (s > FFMIN(MAX_SUBPACKETS, avctx->block_align)) {
avpriv_request_sample(avctx, "subpackets > %d", FFMIN(MAX_SUBPACKETS, avctx->block_align));
return AVERROR_PATCHWELCOME;
}
}
/* Try to catch some obviously faulty streams, otherwise it might be exploitable */
-1
View File
@@ -111,7 +111,6 @@ static int cpia_decode_frame(AVCodecContext *avctx,
// Read line length, two byte little endian
linelength = AV_RL16(src);
src += 2;
src_size -= 2;
if (src_size < linelength) {
frame->decode_error_flags = FF_DECODE_ERROR_INVALID_BITSTREAM;
+2 -5
View File
@@ -71,9 +71,6 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
int buf_size = avpkt->size;
CamStudioContext *c = avctx->priv_data;
int ret;
int bpp = avctx->bits_per_coded_sample / 8;
int bugdelta = FFALIGN(avctx->width * bpp, 4) * avctx->height
- (avctx->width & ~3) * bpp * avctx->height;
if (buf_size < 2) {
av_log(avctx, AV_LOG_ERROR, "coded frame too small\n");
@@ -87,7 +84,7 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
switch ((buf[0] >> 1) & 7) {
case 0: { // lzo compression
int outlen = c->decomp_size, inlen = buf_size - 2;
if (av_lzo1x_decode(c->decomp_buf, &outlen, &buf[2], &inlen) || (outlen && outlen != bugdelta)) {
if (av_lzo1x_decode(c->decomp_buf, &outlen, &buf[2], &inlen) || outlen) {
av_log(avctx, AV_LOG_ERROR, "error during lzo decompression\n");
return AVERROR_INVALIDDATA;
}
@@ -96,7 +93,7 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
case 1: { // zlib compression
#if CONFIG_ZLIB
unsigned long dlen = c->decomp_size;
if (uncompress(c->decomp_buf, &dlen, &buf[2], buf_size - 2) != Z_OK || (dlen != c->decomp_size && dlen != c->decomp_size - bugdelta)) {
if (uncompress(c->decomp_buf, &dlen, &buf[2], buf_size - 2) != Z_OK) {
av_log(avctx, AV_LOG_ERROR, "error during zlib decompression\n");
return AVERROR_INVALIDDATA;
}
+17 -26
View File
@@ -88,7 +88,7 @@ typedef struct CuvidContext
CUVIDDECODECAPS caps8, caps10, caps12;
CUVIDPARSERPARAMS cuparseinfo;
CUVIDEOFORMATEX *cuparse_ext;
CUVIDEOFORMATEX cuparse_ext;
CudaFunctions *cudl;
CuvidFunctions *cvdl;
@@ -684,7 +684,6 @@ static av_cold int cuvid_decode_end(AVCodecContext *avctx)
av_buffer_unref(&ctx->hwdevice);
av_freep(&ctx->key_frame);
av_freep(&ctx->cuparse_ext);
cuvid_free_functions(&ctx->cvdl);
@@ -794,8 +793,6 @@ static av_cold int cuvid_decode_init(AVCodecContext *avctx)
CUVIDSOURCEDATAPACKET seq_pkt;
CUcontext cuda_ctx = NULL;
CUcontext dummy;
uint8_t *extradata;
int extradata_size;
int ret = 0;
enum AVPixelFormat pix_fmts[3] = { AV_PIX_FMT_CUDA,
@@ -892,8 +889,11 @@ static av_cold int cuvid_decode_init(AVCodecContext *avctx)
ctx->cudl = device_hwctx->internal->cuda_dl;
memset(&ctx->cuparseinfo, 0, sizeof(ctx->cuparseinfo));
memset(&ctx->cuparse_ext, 0, sizeof(ctx->cuparse_ext));
memset(&seq_pkt, 0, sizeof(seq_pkt));
ctx->cuparseinfo.pExtVideoInfo = &ctx->cuparse_ext;
switch (avctx->codec->id) {
#if CONFIG_H264_CUVID_DECODER
case AV_CODEC_ID_H264:
@@ -947,26 +947,17 @@ static av_cold int cuvid_decode_init(AVCodecContext *avctx)
if (avctx->codec->bsfs) {
const AVCodecParameters *par = avctx->internal->bsf->par_out;
extradata = par->extradata;
extradata_size = par->extradata_size;
} else {
extradata = avctx->extradata;
extradata_size = avctx->extradata_size;
ctx->cuparse_ext.format.seqhdr_data_length = par->extradata_size;
memcpy(ctx->cuparse_ext.raw_seqhdr_data,
par->extradata,
FFMIN(sizeof(ctx->cuparse_ext.raw_seqhdr_data), par->extradata_size));
} else if (avctx->extradata_size > 0) {
ctx->cuparse_ext.format.seqhdr_data_length = avctx->extradata_size;
memcpy(ctx->cuparse_ext.raw_seqhdr_data,
avctx->extradata,
FFMIN(sizeof(ctx->cuparse_ext.raw_seqhdr_data), avctx->extradata_size));
}
ctx->cuparse_ext = av_mallocz(sizeof(*ctx->cuparse_ext)
+ FFMAX(extradata_size - (int)sizeof(ctx->cuparse_ext->raw_seqhdr_data), 0));
if (!ctx->cuparse_ext) {
ret = AVERROR(ENOMEM);
goto error;
}
if (extradata_size > 0)
memcpy(ctx->cuparse_ext->raw_seqhdr_data, extradata, extradata_size);
ctx->cuparse_ext->format.seqhdr_data_length = extradata_size;
ctx->cuparseinfo.pExtVideoInfo = ctx->cuparse_ext;
ctx->key_frame = av_mallocz(ctx->nb_surfaces * sizeof(int));
if (!ctx->key_frame) {
ret = AVERROR(ENOMEM);
@@ -995,8 +986,8 @@ static av_cold int cuvid_decode_init(AVCodecContext *avctx)
if (ret < 0)
goto error;
seq_pkt.payload = ctx->cuparse_ext->raw_seqhdr_data;
seq_pkt.payload_size = ctx->cuparse_ext->format.seqhdr_data_length;
seq_pkt.payload = ctx->cuparse_ext.raw_seqhdr_data;
seq_pkt.payload_size = ctx->cuparse_ext.format.seqhdr_data_length;
if (seq_pkt.payload && seq_pkt.payload_size) {
ret = CHECK_CU(ctx->cvdl->cuvidParseVideoData(ctx->cuparser, &seq_pkt));
@@ -1055,8 +1046,8 @@ static void cuvid_flush(AVCodecContext *avctx)
if (ret < 0)
goto error;
seq_pkt.payload = ctx->cuparse_ext->raw_seqhdr_data;
seq_pkt.payload_size = ctx->cuparse_ext->format.seqhdr_data_length;
seq_pkt.payload = ctx->cuparse_ext.raw_seqhdr_data;
seq_pkt.payload_size = ctx->cuparse_ext.format.seqhdr_data_length;
if (seq_pkt.payload && seq_pkt.payload_size) {
ret = CHECK_CU(ctx->cvdl->cuvidParseVideoData(ctx->cuparser, &seq_pkt));
+1 -1
View File
@@ -328,7 +328,7 @@ static void dmix_add_c(int32_t *dst, const int32_t *src, int coeff, ptrdiff_t le
int i;
for (i = 0; i < len; i++)
dst[i] += (unsigned)mul15(src[i], coeff);
dst[i] += mul15(src[i], coeff);
}
static void dmix_scale_c(int32_t *dst, int scale, ptrdiff_t len)
+1 -2
View File
@@ -1858,8 +1858,7 @@ int ff_get_buffer(AVCodecContext *avctx, AVFrame *frame, int flags)
int ret;
if (avctx->codec_type == AVMEDIA_TYPE_VIDEO) {
if ((unsigned)avctx->width > INT_MAX - STRIDE_ALIGN ||
(ret = av_image_check_size2(FFALIGN(avctx->width, STRIDE_ALIGN), avctx->height, avctx->max_pixels, AV_PIX_FMT_NONE, 0, avctx)) < 0 || avctx->pix_fmt<0) {
if ((ret = av_image_check_size2(FFALIGN(avctx->width, STRIDE_ALIGN), avctx->height, avctx->max_pixels, AV_PIX_FMT_NONE, 0, avctx)) < 0 || avctx->pix_fmt<0) {
av_log(avctx, AV_LOG_ERROR, "video_get_buffer: image parameters invalid\n");
ret = AVERROR(EINVAL);
goto fail;
+1 -1
View File
@@ -215,7 +215,7 @@ static int dirac_combine_frame(AVCodecParserContext *s, AVCodecContext *avctx,
int64_t pts = AV_RB32(cur_pu + 13);
if (s->last_pts == 0 && s->last_dts == 0)
s->dts = pts - 1;
else if (s->last_dts != AV_NOPTS_VALUE)
else
s->dts = s->last_dts + 1;
s->pts = pts;
if (!avctx->has_b_frames && (cur_pu[4] & 0x03))
+2 -2
View File
@@ -1431,8 +1431,8 @@ static void global_mv(DiracContext *s, DiracBlock *block, int x, int y, int ref)
int *c = s->globalmc[ref].perspective;
int64_t m = (1<<ep) - (c[0]*(int64_t)x + c[1]*(int64_t)y);
int64_t mx = m * (uint64_t)((A[0][0] * (int64_t)x + A[0][1]*(int64_t)y) + (1LL<<ez) * b[0]);
int64_t my = m * (uint64_t)((A[1][0] * (int64_t)x + A[1][1]*(int64_t)y) + (1LL<<ez) * b[1]);
int64_t mx = m * (int64_t)((A[0][0] * (int64_t)x + A[0][1]*(int64_t)y) + (1LL<<ez) * b[0]);
int64_t my = m * (int64_t)((A[1][0] * (int64_t)x + A[1][1]*(int64_t)y) + (1LL<<ez) * b[1]);
block->u.mv[ref][0] = (mx + (1<<(ez+ep))) >> (ez+ep);
block->u.mv[ref][1] = (my + (1<<(ez+ep))) >> (ez+ep);

Some files were not shown because too many files have changed in this diff Show More