Compare commits
210 Commits
n4.3.7
...
release/4.3
| Author | SHA1 | Date | |
|---|---|---|---|
| d91b3a16b5 | |||
| 257ab5a6ac | |||
| 58882aa298 | |||
| be1665dbec | |||
| a7f6ee19a8 | |||
| 6dc71760d2 | |||
| 01d5c40143 | |||
| 0ef8ce13b8 | |||
| 2d32a6611a | |||
| 415ed8bb09 | |||
| a3acba8949 | |||
| 55774a0f19 | |||
| c1a2e08c73 | |||
| 93d70c41e3 | |||
| a778fe8fe4 | |||
| 37403e0aee | |||
| 02e9b60b66 | |||
| b6bf959e12 | |||
| e1f5d0ff91 | |||
| fe01189b0a | |||
| db25eb352a | |||
| 69a6974116 | |||
| 5a020f8690 | |||
| ffbb1058b7 | |||
| 71274326b1 | |||
| 486e46719e | |||
| c32e89653a | |||
| 7acd31d09a | |||
| 23afb77b8e | |||
| 80ebfdb7ca | |||
| e134cf5798 | |||
| 3c61273c8f | |||
| fb358672d9 | |||
| e91a7c9ca6 | |||
| d2c9b04388 | |||
| 08d94cbe2b | |||
| af336762bd | |||
| 2f80158781 | |||
| 3a12fc4309 | |||
| 48118f2fae | |||
| 9e7278e832 | |||
| 1ca62c1718 | |||
| 43b8852feb | |||
| f5d8b82891 | |||
| 719ebc4adb | |||
| 34eb233f07 | |||
| dda63e60a0 | |||
| 72c82bbb9f | |||
| afc97bddb6 | |||
| a8ea6f8465 | |||
| ef2e5030a9 | |||
| 9762ef37a5 | |||
| efe13fd99c | |||
| 1c4667297c | |||
| a0ec70455c | |||
| 6ad3e87a87 | |||
| daa0d94cf9 | |||
| 421207a390 | |||
| 3ce439f5fe | |||
| 9fbcecf435 | |||
| d8778cbccd | |||
| ac8fda6566 | |||
| 33a45199aa | |||
| 503a3e4c38 | |||
| affeb1dde1 | |||
| 334d24b539 | |||
| b612d3634c | |||
| 6c14d8e338 | |||
| 9bef02015e | |||
| d1ea5ab824 | |||
| e958e49b31 | |||
| a24784d504 | |||
| 71885a72b6 | |||
| 736791b4e2 | |||
| c02c96b93d | |||
| 50a5c2f75d | |||
| 95b2b8393d | |||
| 3c005e89b2 | |||
| ab42cdfd0e | |||
| 8d3f10011d | |||
| a51d618e8b | |||
| 8bc1b2ee5d | |||
| 9912935062 | |||
| 795cec23b2 | |||
| f0442346e6 | |||
| dfdb353fff | |||
| d0a7da4874 | |||
| 5a6bfec33d | |||
| 4c9978124d | |||
| a0e85cda1d | |||
| c84b693703 | |||
| c5b3a3f969 | |||
| 985ee23665 | |||
| 9b7d4ad167 | |||
| a3431f279e | |||
| c9a2d48c2e | |||
| 4eceda7528 | |||
| 0e4eaa307f | |||
| e102dfd331 | |||
| e6a6343976 | |||
| cb57b8352d | |||
| 5b82c7c04c | |||
| a24f1e1f7a | |||
| 072d22d402 | |||
| 156ed79d28 | |||
| 418c7e221d | |||
| b583dd4e44 | |||
| aea5aadc20 | |||
| 3d9afae908 | |||
| 6cd0bdf3b0 | |||
| 131bd9436c | |||
| 69107495c5 | |||
| c5a083a50b | |||
| 4af46cabf6 | |||
| d67155005d | |||
| 9d64371b0e | |||
| 7c1337ccb0 | |||
| 610ca2cd52 | |||
| b2294bf784 | |||
| efb211a449 | |||
| b9a187b164 | |||
| 5ed2358b12 | |||
| 2ece1f83e4 | |||
| 64426fd699 | |||
| d7256bf014 | |||
| d5a411f34f | |||
| 205732d468 | |||
| 09f16093e6 | |||
| a8975a3b0d | |||
| 5785193655 | |||
| 0f75b041dc | |||
| e346c7424b | |||
| 999720f8fd | |||
| 23af4eac80 | |||
| 17a8081d2f | |||
| fcef4eb0e7 | |||
| 3cc8b4b495 | |||
| c31d0108b1 | |||
| 7c6f9c872f | |||
| 72bca563d5 | |||
| e3f7976c44 | |||
| aa3cdc085d | |||
| a1524dbc93 | |||
| 1c1f537e06 | |||
| f21ba27cd6 | |||
| 02e6d29c33 | |||
| 000b9de913 | |||
| adabe1aa9e | |||
| a7c43833f2 | |||
| f96036e410 | |||
| 5364d0de5c | |||
| 8860326a1a | |||
| 0ca3416176 | |||
| 75de958c0f | |||
| 3cfd197bea | |||
| 4a04c96f70 | |||
| da2240ccbe | |||
| 4430a979bc | |||
| 2c7d846959 | |||
| 0e2bff1a88 | |||
| 35be459c27 | |||
| 8e35cb8c22 | |||
| 113960bb02 | |||
| 359893d829 | |||
| 37cb93097a | |||
| 2bdb01c847 | |||
| 1c349d968c | |||
| c8b26518fc | |||
| 271b7ce6f5 | |||
| 1736bfa088 | |||
| 2115efc337 | |||
| 1fa6ad5474 | |||
| dff8c05a14 | |||
| 55f4161005 | |||
| 55cce2ab4a | |||
| cf564cb826 | |||
| ce84532589 | |||
| ec75ce6ede | |||
| 6f185c6ee9 | |||
| cc5694cc02 | |||
| 9893e36786 | |||
| 39ba817a49 | |||
| 3174e73cc5 | |||
| 20aba0c184 | |||
| b5129a08fd | |||
| 747a3f6999 | |||
| 13807f204b | |||
| bf071ff5f1 | |||
| 917c2b02c4 | |||
| 81fbeedd1b | |||
| 93c16626b5 | |||
| bad037ba50 | |||
| 86dfa07e76 | |||
| 0d1f773904 | |||
| 58213b5970 | |||
| 70fd924f31 | |||
| d6e212d312 | |||
| 1d553fc50e | |||
| d8eaf39a72 | |||
| cc8b888905 | |||
| c803c8ef60 | |||
| 3eada55c21 | |||
| 8b56b03028 | |||
| 02af994743 | |||
| dfb6bd6b08 | |||
| 0258d8302d | |||
| da915126bf | |||
| 4c539a8162 | |||
| 744b0f80ac | |||
| bd1e6d2af3 |
@@ -0,0 +1,23 @@
|
||||
exclude: ^tests/ref/
|
||||
|
||||
repos:
|
||||
- repo: https://github.com/pre-commit/pre-commit-hooks
|
||||
rev: v5.0.0
|
||||
hooks:
|
||||
- id: check-case-conflict
|
||||
- id: check-executables-have-shebangs
|
||||
- id: check-illegal-windows-names
|
||||
- id: check-shebang-scripts-are-executable
|
||||
- id: check-yaml
|
||||
- id: end-of-file-fixer
|
||||
- id: fix-byte-order-marker
|
||||
- id: mixed-line-ending
|
||||
- id: trailing-whitespace
|
||||
- repo: local
|
||||
hooks:
|
||||
- id: aarch64-asm-indent
|
||||
name: fix aarch64 assembly indentation
|
||||
files: ^.*/aarch64/.*\.S$
|
||||
language: script
|
||||
entry: ./tools/check_arm_indent.sh --apply
|
||||
pass_filenames: false
|
||||
@@ -0,0 +1,29 @@
|
||||
name: Lint
|
||||
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- release/4.3
|
||||
pull_request:
|
||||
|
||||
jobs:
|
||||
lint:
|
||||
name: Pre-Commit
|
||||
runs-on: utilities
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
- name: Install pre-commit CI
|
||||
id: install
|
||||
run: |
|
||||
python3 -m venv ~/pre-commit
|
||||
~/pre-commit/bin/pip install --upgrade pip setuptools
|
||||
~/pre-commit/bin/pip install pre-commit
|
||||
echo "envhash=$({ python3 --version && cat .forgejo/pre-commit/config.yaml; } | sha256sum | cut -d' ' -f1)" >> $FORGEJO_OUTPUT
|
||||
- name: Cache
|
||||
uses: actions/cache@v4
|
||||
with:
|
||||
path: ~/.cache/pre-commit
|
||||
key: pre-commit-${{ steps.install.outputs.envhash }}
|
||||
- name: Run pre-commit CI
|
||||
run: ~/pre-commit/bin/pre-commit run -c .forgejo/pre-commit/config.yaml --show-diff-on-failure --color=always --all-files
|
||||
@@ -0,0 +1,80 @@
|
||||
name: Test
|
||||
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- release/4.3
|
||||
pull_request:
|
||||
|
||||
jobs:
|
||||
run_fate:
|
||||
name: Fate (${{ matrix.runner }}, ${{ matrix.shared }}, ${{ matrix.bits }} bit)
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
runner: [linux-aarch64]
|
||||
shared: ['static']
|
||||
bits: ['64']
|
||||
include:
|
||||
- runner: linux-amd64
|
||||
shared: 'static'
|
||||
bits: '32'
|
||||
- runner: linux-amd64
|
||||
shared: 'shared'
|
||||
bits: '64'
|
||||
runs-on: ${{ matrix.runner }}
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
- name: Configure
|
||||
run: |
|
||||
./configure --enable-gpl --enable-nonfree --enable-memory-poisoning --assert-level=2 \
|
||||
$([ "${{ matrix.bits }}" != "32" ] || echo --arch=x86_32 --extra-cflags=-m32 --extra-cxxflags=-m32 --extra-ldflags=-m32) \
|
||||
$([ "${{ matrix.shared }}" != "shared" ] || echo --enable-shared --disable-static) \
|
||||
|| CFGRES=$? && CFGRES=$?
|
||||
cat ffbuild/config.log
|
||||
exit $CFGRES
|
||||
- name: Build
|
||||
run: make -j$(nproc)
|
||||
- name: Restore Cached Fate-Suite
|
||||
id: cache
|
||||
uses: actions/cache/restore@v4
|
||||
with:
|
||||
path: fate-suite
|
||||
key: fate-suite
|
||||
restore-keys: |
|
||||
fate-suite-
|
||||
- name: Sync Fate-Suite
|
||||
id: fate
|
||||
run: |
|
||||
make fate-rsync SAMPLES=$PWD/fate-suite
|
||||
echo "hash=$(find fate-suite -type f -printf "%P %s %T@\n" | sort | sha256sum | cut -d' ' -f1)" >> $FORGEJO_OUTPUT
|
||||
- name: Cache Fate-Suite
|
||||
uses: actions/cache/save@v4
|
||||
if: ${{ format('fate-suite-{0}', steps.fate.outputs.hash) != steps.cache.outputs.cache-matched-key }}
|
||||
with:
|
||||
path: fate-suite
|
||||
key: fate-suite-${{ steps.fate.outputs.hash }}
|
||||
- name: Run Fate
|
||||
run: LD_LIBRARY_PATH="$(printf "%s:" "$PWD"/lib*)$PWD" make fate fate-build SAMPLES=$PWD/fate-suite -j$(nproc)
|
||||
compile_only:
|
||||
name: Fate (Win64, Build-Only)
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
image: ["ghcr.io/btbn/ffmpeg-builds/win64-gpl-4.3:latest"]
|
||||
runs-on: linux-amd64
|
||||
container: ${{ matrix.image }}
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
- name: Configure
|
||||
run: |
|
||||
./configure --pkg-config-flags="--static" $FFBUILD_TARGET_FLAGS $FF_CONFIGURE \
|
||||
--cc="$CC" --cxx="$CXX" --ar="$AR" --ranlib="$RANLIB" --nm="$NM" \
|
||||
--extra-cflags="$FF_CFLAGS" --extra-cxxflags="$FF_CXXFLAGS" \
|
||||
--extra-libs="$FF_LIBS" --extra-ldflags="$FF_LDFLAGS" --extra-ldexeflags="$FF_LDEXEFLAGS"
|
||||
- name: Build
|
||||
run: make -j$(nproc)
|
||||
- name: Run Fate
|
||||
run: make -j$(nproc) fate-build
|
||||
+9
-9
@@ -55,7 +55,7 @@ modified by someone else and passed on, the recipients should know
|
||||
that what they have is not the original version, so that the original
|
||||
author's reputation will not be affected by problems that might be
|
||||
introduced by others.
|
||||
|
||||
|
||||
Finally, software patents pose a constant threat to the existence of
|
||||
any free program. We wish to make sure that a company cannot
|
||||
effectively restrict the users of a free program by obtaining a
|
||||
@@ -111,7 +111,7 @@ modification follow. Pay close attention to the difference between a
|
||||
"work based on the library" and a "work that uses the library". The
|
||||
former contains code derived from the library, whereas the latter must
|
||||
be combined with the library in order to run.
|
||||
|
||||
|
||||
GNU LESSER GENERAL PUBLIC LICENSE
|
||||
TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
|
||||
|
||||
@@ -158,7 +158,7 @@ Library.
|
||||
You may charge a fee for the physical act of transferring a copy,
|
||||
and you may at your option offer warranty protection in exchange for a
|
||||
fee.
|
||||
|
||||
|
||||
2. You may modify your copy or copies of the Library or any portion
|
||||
of it, thus forming a work based on the Library, and copy and
|
||||
distribute such modifications or work under the terms of Section 1
|
||||
@@ -216,7 +216,7 @@ instead of to this License. (If a newer version than version 2 of the
|
||||
ordinary GNU General Public License has appeared, then you can specify
|
||||
that version instead if you wish.) Do not make any other change in
|
||||
these notices.
|
||||
|
||||
|
||||
Once this change is made in a given copy, it is irreversible for
|
||||
that copy, so the ordinary GNU General Public License applies to all
|
||||
subsequent copies and derivative works made from that copy.
|
||||
@@ -267,7 +267,7 @@ Library will still fall under Section 6.)
|
||||
distribute the object code for the work under the terms of Section 6.
|
||||
Any executables containing that work also fall under Section 6,
|
||||
whether or not they are linked directly with the Library itself.
|
||||
|
||||
|
||||
6. As an exception to the Sections above, you may also combine or
|
||||
link a "work that uses the Library" with the Library to produce a
|
||||
work containing portions of the Library, and distribute that work
|
||||
@@ -329,7 +329,7 @@ restrictions of other proprietary libraries that do not normally
|
||||
accompany the operating system. Such a contradiction means you cannot
|
||||
use both them and the Library together in an executable that you
|
||||
distribute.
|
||||
|
||||
|
||||
7. You may place library facilities that are a work based on the
|
||||
Library side-by-side in a single library together with other library
|
||||
facilities not covered by this License, and distribute such a combined
|
||||
@@ -370,7 +370,7 @@ subject to these terms and conditions. You may not impose any further
|
||||
restrictions on the recipients' exercise of the rights granted herein.
|
||||
You are not responsible for enforcing compliance by third parties with
|
||||
this License.
|
||||
|
||||
|
||||
11. If, as a consequence of a court judgment or allegation of patent
|
||||
infringement or for any other reason (not limited to patent issues),
|
||||
conditions are imposed on you (whether by court order, agreement or
|
||||
@@ -422,7 +422,7 @@ conditions either of that version or of any later version published by
|
||||
the Free Software Foundation. If the Library does not specify a
|
||||
license version number, you may choose any version ever published by
|
||||
the Free Software Foundation.
|
||||
|
||||
|
||||
14. If you wish to incorporate parts of the Library into other free
|
||||
programs whose distribution conditions are incompatible with these,
|
||||
write to the author to ask for permission. For software which is
|
||||
@@ -456,7 +456,7 @@ SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
|
||||
DAMAGES.
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
||||
|
||||
How to Apply These Terms to Your New Libraries
|
||||
|
||||
If you develop a new library, and you want it to be of the greatest
|
||||
|
||||
@@ -2,6 +2,201 @@ Entries are sorted chronologically from oldest to youngest within each release,
|
||||
releases are sorted from youngest to oldest.
|
||||
|
||||
|
||||
version 4.3.9:
|
||||
configure: update copyright year
|
||||
avformat/hls: Partially revert "reduce default max reload to 3"
|
||||
avformat/hls: Fix twitter
|
||||
libavformat/hls: Be more restrictive on mpegts extensions
|
||||
avformat/hls: .ts is always ok even if its a mov/mp4
|
||||
avformat/hls: Print input format in error message
|
||||
avformat/hls: Be more picky on extensions
|
||||
avformat: add ff_match_url_ext()
|
||||
avfilter/bwdif: account for chroma sub-sampling in min size calculation
|
||||
avformat/iff: Check that we have a stream in read_dst_frame()
|
||||
avformat/mlvdec: fix size checks
|
||||
avformat/mxfdec: Check edit unit for overflow in mxf_set_current_edit_unit()
|
||||
avcodec/h263dec: Check against previous dimensions instead of coded
|
||||
avformat/mxfdec: Check avio_read() success in mxf_decrypt_triplet()
|
||||
avcodec/huffyuvdec: Initialize whole output for decode_gray_bitstream()
|
||||
avformat/ipmovie: Check signature_buffer read
|
||||
avformat/wtvdec: Initialize buf
|
||||
avcodec/cbs_vp9: Initialize VP9RawSuperframeIndex
|
||||
avformat/vqf: Propagate errors from add_metadata()
|
||||
avformat/vqf: Check avio_read() in add_metadata()
|
||||
avformat/dashdec: Check whitelist
|
||||
avutil/avstring: dont mess with NULL pointers in av_match_list()
|
||||
avcodec/mpegvideo_enc: Check FLV1 resolution limits
|
||||
avcodec/ffv1enc: Fix handling of 32bit unsigned symbols
|
||||
avcodec/vc1dec: Clear block_index in vc1_decode_reset()
|
||||
avcodec/aacsbr_template: Clear n_q on error
|
||||
swscale/output: Fix undefined overflow in yuv2rgba64_full_X_c_template()
|
||||
avfilter/af_pan: Fix sscanf() use
|
||||
avfilter/vf_addroi: Add missing NULL termination to addroi_var_names[]()
|
||||
avformat/rmdec: check that buf if completely filled
|
||||
avcodec/hapdec: Clear tex buffer
|
||||
avformat/mxfdec: Check that key was read sucessfull
|
||||
avformat/rpl: Fix check for negative values
|
||||
avformat/mlvdec: Check avio_read()
|
||||
avcodec/utils: Fix block align overflow for ADPCM_IMA_WAV
|
||||
avformat/matroskadec: Check pre_ns for overflow
|
||||
avcodec/webp: Check ref_x/y
|
||||
avcodec/ilbcdec: Initialize tempbuff2
|
||||
avformat/dxa: check bpc
|
||||
swscale/slice: clear allocated memory in alloc_lines()
|
||||
avformat/icodec: fix integer overflow with nb_pal
|
||||
doc/developer: Document relationship between git accounts and MAINTAINERS
|
||||
avformat/vividas: Check avio_read() for failure
|
||||
avformat/ilbc: Check avio_read() for failure
|
||||
avformat/nistspheredec: Clear buffer
|
||||
INSTALL: explain the circular dependency issue and solution
|
||||
avformat/mpegts: Initialize predefined_SLConfigDescriptor_seen
|
||||
avformat/mxfdec: Fix overflow in midpoint computation
|
||||
swscale/output: used unsigned for bit accumulation
|
||||
avcodec/rangecoder: only perform renorm check/loop for callers that need it
|
||||
avcodec/ffv1dec: Fix end computation with ec=2
|
||||
avcodec/ffv1enc: Prevent generation of files with broken slices
|
||||
avformat/matroskadec: Check desc_bytes so bits fit in 64bit
|
||||
avcodec/ffv1enc: Correct error message about unsupported version
|
||||
avcodec/ffv1enc: Slice combination is unsupported
|
||||
avcodec/ffv1enc: 2Pass mode is not possible with golomb coding
|
||||
avcodec/ffv1enc: Fix >8bit context size
|
||||
avcodec/xan: Add basic input size check
|
||||
avcodec/svq3: Check for minimum size input
|
||||
avcodec/eacmv: Check input size for intra frames
|
||||
avcodec/jfdctint_template: use unsigned z* in row_fdct()
|
||||
avformat/mxfdec: Check timecode for overflow
|
||||
avformat/mxfdec: More offset_temp checks
|
||||
swscale/output: Fix undefined integer overflow in yuv2rgba64_2_c_template()
|
||||
swscale/swscale: Use unsigned operation to avoid undefined behavior
|
||||
avcodec/vc2enc: basic sanity check on slice_max_bytes
|
||||
avformat/mvdec: Check if name was fully read
|
||||
avcodec/wmavoice: Do not use uninitialized pitch[0]
|
||||
avcodec/notchlc: Check bytes left before reading
|
||||
avcodec/vc1_block: propagate error codes
|
||||
avformat/apetag: Check APETAGEX
|
||||
avcodec/avcodec: Warn about data returned from get_buffer*()
|
||||
avcodec/aic: Clear slice_data
|
||||
avcodec/vc1dec: Clear mb_type_base and ttblk_base
|
||||
avcodec/shorten: clear padding
|
||||
avformat/mpeg: Check an avio_read() for failure
|
||||
avcodec/mvha: Clear remaining space after inflate()
|
||||
avformat/segafilm: Set keyframe
|
||||
avcodec/dxva2: initialize hr in ff_dxva2_common_end_frame()
|
||||
avcodec/dxva2: initialize validate
|
||||
avcodec/dxva2: Initialize ConfigBitstreamRaw
|
||||
avcodec/dxva2: Initialize dxva_size and check it
|
||||
avfilter/vf_xfade: Compute w2, h2 with float
|
||||
avfilter/vf_v360: Assert that vf was initialized
|
||||
avfilter/vf_tonemap_opencl: Dereference after NULL check
|
||||
avfilter/vf_xfade_opencl: Check ff_inlink_consume_frame() for failure
|
||||
avformat/lmlm4: Eliminate some AVERROR(EIO)
|
||||
avformat/wtvdec: Check length of read mpeg2_descriptor
|
||||
avformat/wtvdec: clear sectors
|
||||
vp9: recon: Use emulated edge to prevent buffer overflows
|
||||
arm: vp9mc: Load only 12 pixels in the 4 pixel wide horizontal filter
|
||||
aarch64: vp9mc: Load only 12 pixels in the 4 pixel wide horizontal filter
|
||||
avformat/libzmq: fix check for zmq protocol prefix
|
||||
configure: improve check for POSIX ioctl
|
||||
configure: restore autodetection of v4l2 and fbdev
|
||||
configure: use just the pkg-config for sndio
|
||||
configure: enable ffnvcodec, nvenc, nvdec for FreeBSD
|
||||
avutil/ppc/cpu: Also use the machdep.altivec sysctl on NetBSD
|
||||
avutil/ppc/cpu: Use proper header for OpenBSD PPC CPU detection
|
||||
lavd/v4l2: Use proper field type for second parameter of ioctl() with BSD's
|
||||
configure: use pkg-config for sndio
|
||||
libavcodec/arm/mlpdsp_armv5te: fix label format to work with binutils 2.43
|
||||
|
||||
|
||||
version 4.3.8:
|
||||
avcodec/parser: ensure input padding is zeroed
|
||||
avformat/img2dec: Clear padding data after EOF
|
||||
avformat/wavdec: Check if there are 16 bytes before testing them
|
||||
avcodec/snow: Fix off by 1 error in run_buffer
|
||||
avcodec/utils: apply the same alignment to YUV410 as we do to YUV420 for snow
|
||||
update for 4.3.8
|
||||
avcodec/diracdsp: Remove unused variable
|
||||
avcodec/vaapi_encode: Check hwctx
|
||||
avcodec/proresdec: Consider negative bits left
|
||||
avcodec/hevc/hevcdec: Do not allow slices to depend on failed slices
|
||||
avutil/slicethread: Check pthread_*_init() for failure
|
||||
avutil/frame: Check log2_crop_align
|
||||
avutil/buffer: Check ff_mutex_init() for failure
|
||||
avformat/xmv: Check this_packet_size
|
||||
avformat/ty: rec_size seems to only need 32bit
|
||||
avformat/tty: Check avio_size()
|
||||
avformat/siff: Basic pkt_size check
|
||||
avformat/sauce: Check avio_size() for failure
|
||||
avformat/sapdec: Check ffurl_get_file_handle() for error
|
||||
avformat/nsvdec: Check asize for PCM
|
||||
avformat/mp3dec: Check header_filesize
|
||||
avformat/mp3dec; Check for avio_size() failure
|
||||
avformat/mov: Use 64bit for str_size
|
||||
avformat/mm: Check length
|
||||
avformat/hnm: Check *chunk_size
|
||||
avformat/hlsenc: Check ret
|
||||
avformat/bintext: Check avio_size() return
|
||||
avformat/asfdec_o: Check size of index object
|
||||
avfilter/scale_eval: Use 64bit, check values in ff_scale_adjust_dimensions()
|
||||
avfilter/vf_lut3d: Check av_scanf()
|
||||
avfilter/vf_deshake_opencl: Ensure that the first iteration initializes the best variables
|
||||
swscale/output: Fix integer overflows in yuv2rgba64_X_c_template
|
||||
avformat/mxfdec: Reorder elements of expression in bisect loop
|
||||
avcodec/pnmdec: Use 64bit for input size check
|
||||
avcodec/utvideoenc: Use unsigned shift to build flags
|
||||
avcodec/vc2enc: Fix overflows with storing large values
|
||||
avcodec/mpegvideo_enc: Do not duplicate pictures on shifting
|
||||
avcodec/tiff: Check value on positive signed targets
|
||||
avfilter/vf_bm3d: Dont round MSE2SSE to an integer
|
||||
avdevice/dshow: Check device_filter_unique_name before use
|
||||
avdevice/dshow_filter: Use wcscpy_s()
|
||||
avcodec/flac_parser: Assert that we do not overrun the link_penalty array
|
||||
avcodec/pixlet: Simplify pfx computation
|
||||
avcodec/motion_est: Fix score squaring overflow
|
||||
avcodec/loco: Check loco_get_rice() for failure
|
||||
avcodec/loco: check get_ur_golomb_jpegls() for failure
|
||||
avcodec/imm4: check cbphi for error
|
||||
avcodec/iff: Use signed count
|
||||
avcodec/golomb: Assert that k is in the supported range for get_ur/sr_golomb()
|
||||
avcodec/golomb: Document return for get_ur_golomb_jpegls() and get_sr_golomb_flac()
|
||||
avcodec/dxv: Fix type in get_opcodes()
|
||||
avcodec/xsubdec: Check parse_timecode()
|
||||
avutil/imgutils: av_image_check_size2() ensure width and height fit in 32bit
|
||||
avcodec/proresenc_kostya: use unsigned alpha for rotation
|
||||
avformat/rtmppkt: Simplify and deobfuscate amf_tag_skip() slightly
|
||||
avformat/rmdec: use 64bit for audio_framesize checks
|
||||
avutil/hwcontext_d3d11va: correct sizeof IDirect3DSurface9
|
||||
avutil/hwcontext_d3d11va: correct sizeof AVD3D11FrameDescriptor
|
||||
avformat/tls_schannel: Initialize ret
|
||||
avformat/subfile: Assert that whence is a known case
|
||||
avformat/subfile: Merge if into switch()
|
||||
avformat/rtsp: Check that lower transport is handled in one of the if()
|
||||
avformat/rtsp: initialize reply1
|
||||
avformat/rtsp: use < 0 for error check
|
||||
avformat/rtpenc_vc2hq: Check sizes
|
||||
avfilter/af_aderivative: Free out on error
|
||||
avfilter/af_pan: check nb_output_channels before use
|
||||
cbs_av1: Reject thirty-two zero bits in uvlc code
|
||||
tools/coverity: Phase 1 study of anti-halicogenic for coverity av_rescale()
|
||||
avfilter/vf_avgblur: Check plane instead of AVFrame
|
||||
avformat/rdt: Check pkt_len
|
||||
avformat/mpeg: Check len in mpegps_probe()
|
||||
avdevice/dshow: Check ICaptureGraphBuilder2_SetFiltergraph() for failure
|
||||
avcodec/mfenc: check IMFSample_ConvertToContiguousBuffer() for failure
|
||||
avcodec/vc1_loopfilter: Factor duplicate code in vc1_b_h_intfi_loop_filter()
|
||||
avformat/img2dec: assert no pipe on ts_from_file
|
||||
avcodec/cbs_jpeg: Try to move the read entity to one side in a test
|
||||
avformat/mov: Check edit list for overflow
|
||||
fftools/ffmpeg: Check read() for failure
|
||||
swscale/output: Avoid undefined overflow in yuv2rgb_write_full()
|
||||
swscale/output: alpha can become negative after scaling, use multiply
|
||||
avcodec/targaenc: Allocate space for the palette
|
||||
avcodec/r210enc: Use av_rescale for bitrate
|
||||
avcodec/jfdctint_template: Fewer integer anomalies
|
||||
avcodec/snowenc: MV limits due to mv_penalty table size
|
||||
avformat/mxfdec: Check container_ul->desc before use
|
||||
MAINTAINERS: Update the entries for the release maintainer for FFmpeg
|
||||
|
||||
|
||||
version 4.3.7:
|
||||
avfilter/vf_rotate: Check ff_draw_init2() return value
|
||||
avformat/matroskadec: Assert that num_levels is non negative
|
||||
|
||||
@@ -15,3 +15,11 @@ NOTICE
|
||||
------
|
||||
|
||||
- Non system dependencies (e.g. libx264, libvpx) are disabled by default.
|
||||
|
||||
NOTICE for Package Maintainers
|
||||
------------------------------
|
||||
|
||||
- It is recommended to build FFmpeg twice, first with minimal external dependencies so
|
||||
that 3rd party packages, which depend on FFmpegs libavutil/libavfilter/libavcodec/libavformat
|
||||
can then be built. And last build FFmpeg with full dependancies (which may in turn depend on
|
||||
some of these 3rd party packages). This avoids circular dependencies during build.
|
||||
|
||||
+5
-3
@@ -577,10 +577,12 @@ wm4
|
||||
Releases
|
||||
========
|
||||
|
||||
7.0 Michael Niedermayer
|
||||
6.1 Michael Niedermayer
|
||||
5.1 Michael Niedermayer
|
||||
4.4 Michael Niedermayer
|
||||
3.4 Michael Niedermayer
|
||||
2.8 Michael Niedermayer
|
||||
2.7 Michael Niedermayer
|
||||
2.6 Michael Niedermayer
|
||||
2.5 Michael Niedermayer
|
||||
|
||||
If you want to maintain an older release, please contact us
|
||||
|
||||
|
||||
@@ -2330,6 +2330,7 @@ HAVE_LIST="
|
||||
opencl_vaapi_intel_media
|
||||
perl
|
||||
pod2man
|
||||
posix_ioctl
|
||||
texi2html
|
||||
"
|
||||
|
||||
@@ -6541,11 +6542,13 @@ perl -v > /dev/null 2>&1 && enable perl || disable perl
|
||||
pod2man --help > /dev/null 2>&1 && enable pod2man || disable pod2man
|
||||
rsync --help 2> /dev/null | grep -q 'contimeout' && enable rsync_contimeout || disable rsync_contimeout
|
||||
|
||||
check_headers linux/fb.h
|
||||
check_headers linux/videodev2.h
|
||||
test_code cc linux/videodev2.h "struct v4l2_frmsizeenum vfse; vfse.discrete.width = 0;" && enable_sanitized struct_v4l2_frmivalenum_discrete
|
||||
test_code cc sys/ioctl.h "int ioctl(int, int, ...)" && enable posix_ioctl
|
||||
|
||||
# check V4L2 codecs available in the API
|
||||
if enabled v4l2_m2m; then
|
||||
check_headers linux/fb.h
|
||||
check_headers linux/videodev2.h
|
||||
test_code cc linux/videodev2.h "struct v4l2_frmsizeenum vfse; vfse.discrete.width = 0;" && enable_sanitized struct_v4l2_frmivalenum_discrete
|
||||
check_cc v4l2_m2m linux/videodev2.h "int i = V4L2_CAP_VIDEO_M2M_MPLANE | V4L2_CAP_VIDEO_M2M | V4L2_BUF_FLAG_LAST;"
|
||||
check_cc vc1_v4l2_m2m linux/videodev2.h "int i = V4L2_PIX_FMT_VC1_ANNEX_G;"
|
||||
check_cc mpeg1_v4l2_m2m linux/videodev2.h "int i = V4L2_PIX_FMT_MPEG1;"
|
||||
@@ -6590,7 +6593,7 @@ enabled alsa && { check_pkg_config alsa alsa "alsa/asoundlib.h" snd_pcm_htimesta
|
||||
enabled libjack &&
|
||||
require_pkg_config libjack jack jack/jack.h jack_port_get_latency_range
|
||||
|
||||
enabled sndio && check_lib sndio sndio.h sio_open -lsndio
|
||||
enabled sndio && check_pkg_config sndio sndio sndio.h sio_open
|
||||
|
||||
if enabled libcdio; then
|
||||
check_pkg_config libcdio libcdio_paranoia "cdio/cdda.h cdio/paranoia.h" cdio_cddap_open ||
|
||||
@@ -6687,7 +6690,7 @@ enabled vulkan &&
|
||||
|
||||
if enabled x86; then
|
||||
case $target_os in
|
||||
mingw32*|mingw64*|win32|win64|linux|cygwin*)
|
||||
freebsd|mingw32*|mingw64*|win32|win64|linux|cygwin*)
|
||||
;;
|
||||
*)
|
||||
disable ffnvcodec cuvid nvdec nvenc
|
||||
@@ -7515,7 +7518,7 @@ cat > $TMPH <<EOF
|
||||
#define FFMPEG_CONFIG_H
|
||||
#define FFMPEG_CONFIGURATION "$(c_escape $FFMPEG_CONFIGURATION)"
|
||||
#define FFMPEG_LICENSE "$(c_escape $license)"
|
||||
#define CONFIG_THIS_YEAR 2024
|
||||
#define CONFIG_THIS_YEAR 2025
|
||||
#define FFMPEG_DATADIR "$(eval c_escape $datadir)"
|
||||
#define AVCONV_DATADIR "$(eval c_escape $datadir)"
|
||||
#define CC_IDENT "$(c_escape ${cc_ident:-Unknown compiler})"
|
||||
|
||||
+1
-1
@@ -38,7 +38,7 @@ PROJECT_NAME = FFmpeg
|
||||
# could be handy for archiving the generated documentation or if some version
|
||||
# control system is used.
|
||||
|
||||
PROJECT_NUMBER = 4.3.7
|
||||
PROJECT_NUMBER = 4.3.9
|
||||
|
||||
# Using the PROJECT_BRIEF tag one can provide an optional one line description
|
||||
# for a project that appears at the top of each page and should give viewer a
|
||||
|
||||
@@ -63,4 +63,3 @@ make -j<num>
|
||||
make -k
|
||||
Continue build in case of errors, this is useful for the regression tests
|
||||
sometimes but note that it will still not run all reg tests.
|
||||
|
||||
|
||||
@@ -327,6 +327,13 @@ segment index to start live streams at (negative values are from the end).
|
||||
@item allowed_extensions
|
||||
',' separated list of file extensions that hls is allowed to access.
|
||||
|
||||
@item extension_picky
|
||||
This blocks disallowed extensions from probing
|
||||
It also requires all available segments to have matching extensions to the format
|
||||
except mpegts, which is always allowed.
|
||||
It is recommended to set the whitelists correctly instead of depending on extensions
|
||||
Enabled by default.
|
||||
|
||||
@item max_reload
|
||||
Maximum number of times a insufficient list is attempted to be reloaded.
|
||||
Default value is 1000.
|
||||
|
||||
@@ -762,6 +762,25 @@ In case you need finer control over how valgrind is invoked, use the
|
||||
@code{--target-exec='valgrind <your_custom_valgrind_options>} option in
|
||||
your configure line instead.
|
||||
|
||||
@anchor{Maintenance}
|
||||
@chapter Maintenance process
|
||||
|
||||
@anchor{MAINTAINERS}
|
||||
@section MAINTAINERS
|
||||
|
||||
The developers maintaining each part of the codebase are listed in @file{MAINTAINERS}.
|
||||
Being listed in @file{MAINTAINERS}, gives one the right to have git write access to
|
||||
the specific repository.
|
||||
|
||||
@anchor{Becoming a maintainer}
|
||||
@section Becoming a maintainer
|
||||
|
||||
People add themselves to @file{MAINTAINERS} by sending a patch like any other code
|
||||
change. These get reviewed by the community like any other patch. It is expected
|
||||
that, if someone has an objection to a new maintainer, she is willing to object
|
||||
in public with her full name and is willing to take over maintainership for the area.
|
||||
|
||||
|
||||
@anchor{Release process}
|
||||
@chapter Release process
|
||||
|
||||
|
||||
@@ -157,4 +157,3 @@ PFD[32] would for example be signed 32 bit little-endian IEEE float
|
||||
@item XVID @tab non-compliant MPEG-4 generated by old Xvid
|
||||
@item XVIX @tab non-compliant MPEG-4 generated by old Xvid with interlacing bug
|
||||
@end multitable
|
||||
|
||||
|
||||
Regular → Executable
Regular → Executable
@@ -44,4 +44,3 @@ a+b*c;
|
||||
here the reader knows that a,b,c are meant to be signed integers but for C
|
||||
standard compliance / to avoid undefined behavior they are stored in unsigned
|
||||
ints.
|
||||
|
||||
|
||||
@@ -1,3 +1,5 @@
|
||||
#!/bin/sh
|
||||
|
||||
toupper(){
|
||||
echo "$@" | tr abcdefghijklmnopqrstuvwxyz ABCDEFGHIJKLMNOPQRSTUVWXYZ
|
||||
}
|
||||
|
||||
+3
-2
@@ -468,8 +468,9 @@ static int read_key(void)
|
||||
}
|
||||
//Read it
|
||||
if(nchars != 0) {
|
||||
read(0, &ch, 1);
|
||||
return ch;
|
||||
if (read(0, &ch, 1) == 1)
|
||||
return ch;
|
||||
return 0;
|
||||
}else{
|
||||
return -1;
|
||||
}
|
||||
|
||||
+1
-1
@@ -1174,7 +1174,7 @@ SKIPHEADERS-$(CONFIG_QSV) += qsv.h qsv_internal.h
|
||||
SKIPHEADERS-$(CONFIG_QSVDEC) += qsvdec.h
|
||||
SKIPHEADERS-$(CONFIG_QSVENC) += qsvenc.h
|
||||
SKIPHEADERS-$(CONFIG_XVMC) += xvmc.h
|
||||
SKIPHEADERS-$(CONFIG_VAAPI) += vaapi_decode.h vaapi_encode.h
|
||||
SKIPHEADERS-$(CONFIG_VAAPI) += vaapi_decode.h vaapi_hevc.h vaapi_encode.h
|
||||
SKIPHEADERS-$(CONFIG_VDPAU) += vdpau.h vdpau_internal.h
|
||||
SKIPHEADERS-$(CONFIG_VIDEOTOOLBOX) += videotoolbox.h vt_internal.h
|
||||
SKIPHEADERS-$(CONFIG_V4L2_M2M) += v4l2_buffers.h v4l2_context.h v4l2_m2m.h
|
||||
|
||||
+27
-13
@@ -173,6 +173,7 @@ void ff_aac_search_for_tns(AACEncContext *s, SingleChannelElement *sce)
|
||||
sce->ics.window_sequence[0] == LONG_START_SEQUENCE ? 0 : 2;
|
||||
const int sfb_len = sfb_end - sfb_start;
|
||||
const int coef_len = sce->ics.swb_offset[sfb_end] - sce->ics.swb_offset[sfb_start];
|
||||
const int n_filt = is8 ? 1 : order != TNS_MAX_ORDER ? 2 : 3;
|
||||
|
||||
if (coef_len <= 0 || sfb_len <= 0) {
|
||||
sce->tns.present = 0;
|
||||
@@ -180,16 +181,30 @@ void ff_aac_search_for_tns(AACEncContext *s, SingleChannelElement *sce)
|
||||
}
|
||||
|
||||
for (w = 0; w < sce->ics.num_windows; w++) {
|
||||
float en[2] = {0.0f, 0.0f};
|
||||
int oc_start = 0, os_start = 0;
|
||||
float en[4] = {0.0f, 0.0f, 0.0f, 0.0f};
|
||||
int oc_start = 0;
|
||||
int coef_start = sce->ics.swb_offset[sfb_start];
|
||||
|
||||
for (g = sfb_start; g < sce->ics.num_swb && g <= sfb_end; g++) {
|
||||
FFPsyBand *band = &s->psy.ch[s->cur_channel].psy_bands[w*16+g];
|
||||
if (g > sfb_start + (sfb_len/2))
|
||||
en[1] += band->energy;
|
||||
else
|
||||
en[0] += band->energy;
|
||||
if (n_filt == 2) {
|
||||
for (g = sfb_start; g < sce->ics.num_swb && g <= sfb_end; g++) {
|
||||
FFPsyBand *band = &s->psy.ch[s->cur_channel].psy_bands[w*16+g];
|
||||
if (g > sfb_start + (sfb_len/2))
|
||||
en[1] += band->energy; /* End */
|
||||
else
|
||||
en[0] += band->energy; /* Start */
|
||||
}
|
||||
en[2] = en[0];
|
||||
} else {
|
||||
for (g = sfb_start; g < sce->ics.num_swb && g <= sfb_end; g++) {
|
||||
FFPsyBand *band = &s->psy.ch[s->cur_channel].psy_bands[w*16+g];
|
||||
if (g > sfb_start + (sfb_len/2) + (sfb_len/4))
|
||||
en[2] += band->energy; /* End */
|
||||
else if (g > sfb_start + (sfb_len/2) - (sfb_len/4))
|
||||
en[1] += band->energy; /* Middle */
|
||||
else
|
||||
en[0] += band->energy; /* Start */
|
||||
}
|
||||
en[3] = en[0];
|
||||
}
|
||||
|
||||
/* LPC */
|
||||
@@ -199,15 +214,14 @@ void ff_aac_search_for_tns(AACEncContext *s, SingleChannelElement *sce)
|
||||
if (!order || !isfinite(gain) || gain < TNS_GAIN_THRESHOLD_LOW || gain > TNS_GAIN_THRESHOLD_HIGH)
|
||||
continue;
|
||||
|
||||
tns->n_filt[w] = is8 ? 1 : order != TNS_MAX_ORDER ? 2 : 3;
|
||||
tns->n_filt[w] = n_filt;
|
||||
for (g = 0; g < tns->n_filt[w]; g++) {
|
||||
tns->direction[w][g] = slant != 2 ? slant : en[g] < en[!g];
|
||||
tns->order[w][g] = g < tns->n_filt[w] ? order/tns->n_filt[w] : order - oc_start;
|
||||
tns->length[w][g] = g < tns->n_filt[w] ? sfb_len/tns->n_filt[w] : sfb_len - os_start;
|
||||
tns->direction[w][g] = slant != 2 ? slant : en[g] < en[g + 1];
|
||||
tns->order[w][g] = order/tns->n_filt[w];
|
||||
tns->length[w][g] = sfb_len/tns->n_filt[w];
|
||||
quantize_coefs(&coefs[oc_start], tns->coef_idx[w][g], tns->coef[w][g],
|
||||
tns->order[w][g], c_bits);
|
||||
oc_start += tns->order[w][g];
|
||||
os_start += tns->length[w][g];
|
||||
}
|
||||
count++;
|
||||
}
|
||||
|
||||
@@ -592,6 +592,7 @@ static int sbr_make_f_derived(AACContext *ac, SpectralBandReplication *sbr)
|
||||
|
||||
if (sbr->n_q > 5) {
|
||||
av_log(ac->avctx, AV_LOG_ERROR, "Too many noise floor scale factors: %d\n", sbr->n_q);
|
||||
sbr->n_q = 1;
|
||||
return -1;
|
||||
}
|
||||
|
||||
|
||||
+109
-109
@@ -19,130 +19,130 @@
|
||||
#include "libavutil/aarch64/asm.S"
|
||||
|
||||
function ff_ps_add_squares_neon, export=1
|
||||
1: ld1 {v0.4S,v1.4S}, [x1], #32
|
||||
fmul v0.4S, v0.4S, v0.4S
|
||||
fmul v1.4S, v1.4S, v1.4S
|
||||
faddp v2.4S, v0.4S, v1.4S
|
||||
ld1 {v3.4S}, [x0]
|
||||
fadd v3.4S, v3.4S, v2.4S
|
||||
st1 {v3.4S}, [x0], #16
|
||||
subs w2, w2, #4
|
||||
b.gt 1b
|
||||
1: ld1 {v0.4s,v1.4s}, [x1], #32
|
||||
fmul v0.4s, v0.4s, v0.4s
|
||||
fmul v1.4s, v1.4s, v1.4s
|
||||
faddp v2.4s, v0.4s, v1.4s
|
||||
ld1 {v3.4s}, [x0]
|
||||
fadd v3.4s, v3.4s, v2.4s
|
||||
st1 {v3.4s}, [x0], #16
|
||||
subs w2, w2, #4
|
||||
b.gt 1b
|
||||
ret
|
||||
endfunc
|
||||
|
||||
function ff_ps_mul_pair_single_neon, export=1
|
||||
1: ld1 {v0.4S,v1.4S}, [x1], #32
|
||||
ld1 {v2.4S}, [x2], #16
|
||||
zip1 v3.4S, v2.4S, v2.4S
|
||||
zip2 v4.4S, v2.4S, v2.4S
|
||||
fmul v0.4S, v0.4S, v3.4S
|
||||
fmul v1.4S, v1.4S, v4.4S
|
||||
st1 {v0.4S,v1.4S}, [x0], #32
|
||||
subs w3, w3, #4
|
||||
b.gt 1b
|
||||
1: ld1 {v0.4s,v1.4s}, [x1], #32
|
||||
ld1 {v2.4s}, [x2], #16
|
||||
zip1 v3.4s, v2.4s, v2.4s
|
||||
zip2 v4.4s, v2.4s, v2.4s
|
||||
fmul v0.4s, v0.4s, v3.4s
|
||||
fmul v1.4s, v1.4s, v4.4s
|
||||
st1 {v0.4s,v1.4s}, [x0], #32
|
||||
subs w3, w3, #4
|
||||
b.gt 1b
|
||||
ret
|
||||
endfunc
|
||||
|
||||
function ff_ps_stereo_interpolate_neon, export=1
|
||||
ld1 {v0.4S}, [x2]
|
||||
ld1 {v1.4S}, [x3]
|
||||
zip1 v4.4S, v0.4S, v0.4S
|
||||
zip2 v5.4S, v0.4S, v0.4S
|
||||
zip1 v6.4S, v1.4S, v1.4S
|
||||
zip2 v7.4S, v1.4S, v1.4S
|
||||
1: ld1 {v2.2S}, [x0]
|
||||
ld1 {v3.2S}, [x1]
|
||||
fadd v4.4S, v4.4S, v6.4S
|
||||
fadd v5.4S, v5.4S, v7.4S
|
||||
mov v2.D[1], v2.D[0]
|
||||
mov v3.D[1], v3.D[0]
|
||||
fmul v2.4S, v2.4S, v4.4S
|
||||
fmla v2.4S, v3.4S, v5.4S
|
||||
st1 {v2.D}[0], [x0], #8
|
||||
st1 {v2.D}[1], [x1], #8
|
||||
subs w4, w4, #1
|
||||
b.gt 1b
|
||||
ld1 {v0.4s}, [x2]
|
||||
ld1 {v1.4s}, [x3]
|
||||
zip1 v4.4s, v0.4s, v0.4s
|
||||
zip2 v5.4s, v0.4s, v0.4s
|
||||
zip1 v6.4s, v1.4s, v1.4s
|
||||
zip2 v7.4s, v1.4s, v1.4s
|
||||
1: ld1 {v2.2s}, [x0]
|
||||
ld1 {v3.2s}, [x1]
|
||||
fadd v4.4s, v4.4s, v6.4s
|
||||
fadd v5.4s, v5.4s, v7.4s
|
||||
mov v2.d[1], v2.d[0]
|
||||
mov v3.d[1], v3.d[0]
|
||||
fmul v2.4s, v2.4s, v4.4s
|
||||
fmla v2.4s, v3.4s, v5.4s
|
||||
st1 {v2.d}[0], [x0], #8
|
||||
st1 {v2.d}[1], [x1], #8
|
||||
subs w4, w4, #1
|
||||
b.gt 1b
|
||||
ret
|
||||
endfunc
|
||||
|
||||
function ff_ps_stereo_interpolate_ipdopd_neon, export=1
|
||||
ld1 {v0.4S,v1.4S}, [x2]
|
||||
ld1 {v6.4S,v7.4S}, [x3]
|
||||
fneg v2.4S, v1.4S
|
||||
fneg v3.4S, v7.4S
|
||||
zip1 v16.4S, v0.4S, v0.4S
|
||||
zip2 v17.4S, v0.4S, v0.4S
|
||||
zip1 v18.4S, v2.4S, v1.4S
|
||||
zip2 v19.4S, v2.4S, v1.4S
|
||||
zip1 v20.4S, v6.4S, v6.4S
|
||||
zip2 v21.4S, v6.4S, v6.4S
|
||||
zip1 v22.4S, v3.4S, v7.4S
|
||||
zip2 v23.4S, v3.4S, v7.4S
|
||||
1: ld1 {v2.2S}, [x0]
|
||||
ld1 {v3.2S}, [x1]
|
||||
fadd v16.4S, v16.4S, v20.4S
|
||||
fadd v17.4S, v17.4S, v21.4S
|
||||
mov v2.D[1], v2.D[0]
|
||||
mov v3.D[1], v3.D[0]
|
||||
fmul v4.4S, v2.4S, v16.4S
|
||||
fmla v4.4S, v3.4S, v17.4S
|
||||
fadd v18.4S, v18.4S, v22.4S
|
||||
fadd v19.4S, v19.4S, v23.4S
|
||||
ext v2.16B, v2.16B, v2.16B, #4
|
||||
ext v3.16B, v3.16B, v3.16B, #4
|
||||
fmla v4.4S, v2.4S, v18.4S
|
||||
fmla v4.4S, v3.4S, v19.4S
|
||||
st1 {v4.D}[0], [x0], #8
|
||||
st1 {v4.D}[1], [x1], #8
|
||||
subs w4, w4, #1
|
||||
b.gt 1b
|
||||
ld1 {v0.4s,v1.4s}, [x2]
|
||||
ld1 {v6.4s,v7.4s}, [x3]
|
||||
fneg v2.4s, v1.4s
|
||||
fneg v3.4s, v7.4s
|
||||
zip1 v16.4s, v0.4s, v0.4s
|
||||
zip2 v17.4s, v0.4s, v0.4s
|
||||
zip1 v18.4s, v2.4s, v1.4s
|
||||
zip2 v19.4s, v2.4s, v1.4s
|
||||
zip1 v20.4s, v6.4s, v6.4s
|
||||
zip2 v21.4s, v6.4s, v6.4s
|
||||
zip1 v22.4s, v3.4s, v7.4s
|
||||
zip2 v23.4s, v3.4s, v7.4s
|
||||
1: ld1 {v2.2s}, [x0]
|
||||
ld1 {v3.2s}, [x1]
|
||||
fadd v16.4s, v16.4s, v20.4s
|
||||
fadd v17.4s, v17.4s, v21.4s
|
||||
mov v2.d[1], v2.d[0]
|
||||
mov v3.d[1], v3.d[0]
|
||||
fmul v4.4s, v2.4s, v16.4s
|
||||
fmla v4.4s, v3.4s, v17.4s
|
||||
fadd v18.4s, v18.4s, v22.4s
|
||||
fadd v19.4s, v19.4s, v23.4s
|
||||
ext v2.16b, v2.16b, v2.16b, #4
|
||||
ext v3.16b, v3.16b, v3.16b, #4
|
||||
fmla v4.4s, v2.4s, v18.4s
|
||||
fmla v4.4s, v3.4s, v19.4s
|
||||
st1 {v4.d}[0], [x0], #8
|
||||
st1 {v4.d}[1], [x1], #8
|
||||
subs w4, w4, #1
|
||||
b.gt 1b
|
||||
ret
|
||||
endfunc
|
||||
|
||||
function ff_ps_hybrid_analysis_neon, export=1
|
||||
lsl x3, x3, #3
|
||||
ld2 {v0.4S,v1.4S}, [x1], #32
|
||||
ld2 {v2.2S,v3.2S}, [x1], #16
|
||||
ld1 {v24.2S}, [x1], #8
|
||||
ld2 {v4.2S,v5.2S}, [x1], #16
|
||||
ld2 {v6.4S,v7.4S}, [x1]
|
||||
rev64 v6.4S, v6.4S
|
||||
rev64 v7.4S, v7.4S
|
||||
ext v6.16B, v6.16B, v6.16B, #8
|
||||
ext v7.16B, v7.16B, v7.16B, #8
|
||||
rev64 v4.2S, v4.2S
|
||||
rev64 v5.2S, v5.2S
|
||||
mov v2.D[1], v3.D[0]
|
||||
mov v4.D[1], v5.D[0]
|
||||
mov v5.D[1], v2.D[0]
|
||||
mov v3.D[1], v4.D[0]
|
||||
fadd v16.4S, v0.4S, v6.4S
|
||||
fadd v17.4S, v1.4S, v7.4S
|
||||
fsub v18.4S, v1.4S, v7.4S
|
||||
fsub v19.4S, v0.4S, v6.4S
|
||||
fadd v22.4S, v2.4S, v4.4S
|
||||
fsub v23.4S, v5.4S, v3.4S
|
||||
trn1 v20.2D, v22.2D, v23.2D // {re4+re8, re5+re7, im8-im4, im7-im5}
|
||||
trn2 v21.2D, v22.2D, v23.2D // {im4+im8, im5+im7, re4-re8, re5-re7}
|
||||
1: ld2 {v2.4S,v3.4S}, [x2], #32
|
||||
ld2 {v4.2S,v5.2S}, [x2], #16
|
||||
ld1 {v6.2S}, [x2], #8
|
||||
add x2, x2, #8
|
||||
mov v4.D[1], v5.D[0]
|
||||
mov v6.S[1], v6.S[0]
|
||||
fmul v6.2S, v6.2S, v24.2S
|
||||
fmul v0.4S, v2.4S, v16.4S
|
||||
fmul v1.4S, v2.4S, v17.4S
|
||||
fmls v0.4S, v3.4S, v18.4S
|
||||
fmla v1.4S, v3.4S, v19.4S
|
||||
fmla v0.4S, v4.4S, v20.4S
|
||||
fmla v1.4S, v4.4S, v21.4S
|
||||
faddp v0.4S, v0.4S, v1.4S
|
||||
faddp v0.4S, v0.4S, v0.4S
|
||||
fadd v0.2S, v0.2S, v6.2S
|
||||
st1 {v0.2S}, [x0], x3
|
||||
subs w4, w4, #1
|
||||
b.gt 1b
|
||||
lsl x3, x3, #3
|
||||
ld2 {v0.4s,v1.4s}, [x1], #32
|
||||
ld2 {v2.2s,v3.2s}, [x1], #16
|
||||
ld1 {v24.2s}, [x1], #8
|
||||
ld2 {v4.2s,v5.2s}, [x1], #16
|
||||
ld2 {v6.4s,v7.4s}, [x1]
|
||||
rev64 v6.4s, v6.4s
|
||||
rev64 v7.4s, v7.4s
|
||||
ext v6.16b, v6.16b, v6.16b, #8
|
||||
ext v7.16b, v7.16b, v7.16b, #8
|
||||
rev64 v4.2s, v4.2s
|
||||
rev64 v5.2s, v5.2s
|
||||
mov v2.d[1], v3.d[0]
|
||||
mov v4.d[1], v5.d[0]
|
||||
mov v5.d[1], v2.d[0]
|
||||
mov v3.d[1], v4.d[0]
|
||||
fadd v16.4s, v0.4s, v6.4s
|
||||
fadd v17.4s, v1.4s, v7.4s
|
||||
fsub v18.4s, v1.4s, v7.4s
|
||||
fsub v19.4s, v0.4s, v6.4s
|
||||
fadd v22.4s, v2.4s, v4.4s
|
||||
fsub v23.4s, v5.4s, v3.4s
|
||||
trn1 v20.2d, v22.2d, v23.2d // {re4+re8, re5+re7, im8-im4, im7-im5}
|
||||
trn2 v21.2d, v22.2d, v23.2d // {im4+im8, im5+im7, re4-re8, re5-re7}
|
||||
1: ld2 {v2.4s,v3.4s}, [x2], #32
|
||||
ld2 {v4.2s,v5.2s}, [x2], #16
|
||||
ld1 {v6.2s}, [x2], #8
|
||||
add x2, x2, #8
|
||||
mov v4.d[1], v5.d[0]
|
||||
mov v6.s[1], v6.s[0]
|
||||
fmul v6.2s, v6.2s, v24.2s
|
||||
fmul v0.4s, v2.4s, v16.4s
|
||||
fmul v1.4s, v2.4s, v17.4s
|
||||
fmls v0.4s, v3.4s, v18.4s
|
||||
fmla v1.4s, v3.4s, v19.4s
|
||||
fmla v0.4s, v4.4s, v20.4s
|
||||
fmla v1.4s, v4.4s, v21.4s
|
||||
faddp v0.4s, v0.4s, v1.4s
|
||||
faddp v0.4s, v0.4s, v0.4s
|
||||
fadd v0.2s, v0.2s, v6.2s
|
||||
st1 {v0.2s}, [x0], x3
|
||||
subs w4, w4, #1
|
||||
b.gt 1b
|
||||
ret
|
||||
endfunc
|
||||
|
||||
@@ -353,18 +353,18 @@ function fft\n\()_neon, align=6
|
||||
endfunc
|
||||
.endm
|
||||
|
||||
def_fft 32, 16, 8
|
||||
def_fft 64, 32, 16
|
||||
def_fft 128, 64, 32
|
||||
def_fft 256, 128, 64
|
||||
def_fft 512, 256, 128
|
||||
def_fft 1024, 512, 256
|
||||
def_fft 2048, 1024, 512
|
||||
def_fft 4096, 2048, 1024
|
||||
def_fft 8192, 4096, 2048
|
||||
def_fft 16384, 8192, 4096
|
||||
def_fft 32768, 16384, 8192
|
||||
def_fft 65536, 32768, 16384
|
||||
def_fft 32, 16, 8
|
||||
def_fft 64, 32, 16
|
||||
def_fft 128, 64, 32
|
||||
def_fft 256, 128, 64
|
||||
def_fft 512, 256, 128
|
||||
def_fft 1024, 512, 256
|
||||
def_fft 2048, 1024, 512
|
||||
def_fft 4096, 2048, 1024
|
||||
def_fft 8192, 4096, 2048
|
||||
def_fft 16384, 8192, 4096
|
||||
def_fft 32768, 16384, 8192
|
||||
def_fft 65536, 32768, 16384
|
||||
|
||||
function ff_fft_calc_neon, export=1
|
||||
prfm pldl1keep, [x1]
|
||||
|
||||
+205
-205
@@ -36,11 +36,11 @@ function ff_\type\()_\codec\()_chroma_mc8_neon, export=1
|
||||
lsl w9, w9, #3
|
||||
lsl w10, w10, #1
|
||||
add w9, w9, w10
|
||||
add x6, x6, w9, UXTW
|
||||
ld1r {v22.8H}, [x6]
|
||||
add x6, x6, w9, uxtw
|
||||
ld1r {v22.8h}, [x6]
|
||||
.endif
|
||||
.ifc \codec,vc1
|
||||
movi v22.8H, #28
|
||||
movi v22.8h, #28
|
||||
.endif
|
||||
mul w7, w4, w5
|
||||
lsl w14, w5, #3
|
||||
@@ -53,139 +53,139 @@ function ff_\type\()_\codec\()_chroma_mc8_neon, export=1
|
||||
add w4, w4, #64
|
||||
b.eq 2f
|
||||
|
||||
dup v0.8B, w4
|
||||
dup v1.8B, w12
|
||||
ld1 {v4.8B, v5.8B}, [x1], x2
|
||||
dup v2.8B, w6
|
||||
dup v3.8B, w7
|
||||
ext v5.8B, v4.8B, v5.8B, #1
|
||||
1: ld1 {v6.8B, v7.8B}, [x1], x2
|
||||
umull v16.8H, v4.8B, v0.8B
|
||||
umlal v16.8H, v5.8B, v1.8B
|
||||
ext v7.8B, v6.8B, v7.8B, #1
|
||||
ld1 {v4.8B, v5.8B}, [x1], x2
|
||||
umlal v16.8H, v6.8B, v2.8B
|
||||
dup v0.8b, w4
|
||||
dup v1.8b, w12
|
||||
ld1 {v4.8b, v5.8b}, [x1], x2
|
||||
dup v2.8b, w6
|
||||
dup v3.8b, w7
|
||||
ext v5.8b, v4.8b, v5.8b, #1
|
||||
1: ld1 {v6.8b, v7.8b}, [x1], x2
|
||||
umull v16.8h, v4.8b, v0.8b
|
||||
umlal v16.8h, v5.8b, v1.8b
|
||||
ext v7.8b, v6.8b, v7.8b, #1
|
||||
ld1 {v4.8b, v5.8b}, [x1], x2
|
||||
umlal v16.8h, v6.8b, v2.8b
|
||||
prfm pldl1strm, [x1]
|
||||
ext v5.8B, v4.8B, v5.8B, #1
|
||||
umlal v16.8H, v7.8B, v3.8B
|
||||
umull v17.8H, v6.8B, v0.8B
|
||||
ext v5.8b, v4.8b, v5.8b, #1
|
||||
umlal v16.8h, v7.8b, v3.8b
|
||||
umull v17.8h, v6.8b, v0.8b
|
||||
subs w3, w3, #2
|
||||
umlal v17.8H, v7.8B, v1.8B
|
||||
umlal v17.8H, v4.8B, v2.8B
|
||||
umlal v17.8H, v5.8B, v3.8B
|
||||
umlal v17.8h, v7.8b, v1.8b
|
||||
umlal v17.8h, v4.8b, v2.8b
|
||||
umlal v17.8h, v5.8b, v3.8b
|
||||
prfm pldl1strm, [x1, x2]
|
||||
.ifc \codec,h264
|
||||
rshrn v16.8B, v16.8H, #6
|
||||
rshrn v17.8B, v17.8H, #6
|
||||
rshrn v16.8b, v16.8h, #6
|
||||
rshrn v17.8b, v17.8h, #6
|
||||
.else
|
||||
add v16.8H, v16.8H, v22.8H
|
||||
add v17.8H, v17.8H, v22.8H
|
||||
shrn v16.8B, v16.8H, #6
|
||||
shrn v17.8B, v17.8H, #6
|
||||
add v16.8h, v16.8h, v22.8h
|
||||
add v17.8h, v17.8h, v22.8h
|
||||
shrn v16.8b, v16.8h, #6
|
||||
shrn v17.8b, v17.8h, #6
|
||||
.endif
|
||||
.ifc \type,avg
|
||||
ld1 {v20.8B}, [x8], x2
|
||||
ld1 {v21.8B}, [x8], x2
|
||||
urhadd v16.8B, v16.8B, v20.8B
|
||||
urhadd v17.8B, v17.8B, v21.8B
|
||||
ld1 {v20.8b}, [x8], x2
|
||||
ld1 {v21.8b}, [x8], x2
|
||||
urhadd v16.8b, v16.8b, v20.8b
|
||||
urhadd v17.8b, v17.8b, v21.8b
|
||||
.endif
|
||||
st1 {v16.8B}, [x0], x2
|
||||
st1 {v17.8B}, [x0], x2
|
||||
st1 {v16.8b}, [x0], x2
|
||||
st1 {v17.8b}, [x0], x2
|
||||
b.gt 1b
|
||||
ret
|
||||
|
||||
2: adds w12, w12, w6
|
||||
dup v0.8B, w4
|
||||
dup v0.8b, w4
|
||||
b.eq 5f
|
||||
tst w6, w6
|
||||
dup v1.8B, w12
|
||||
dup v1.8b, w12
|
||||
b.eq 4f
|
||||
|
||||
ld1 {v4.8B}, [x1], x2
|
||||
3: ld1 {v6.8B}, [x1], x2
|
||||
umull v16.8H, v4.8B, v0.8B
|
||||
umlal v16.8H, v6.8B, v1.8B
|
||||
ld1 {v4.8B}, [x1], x2
|
||||
umull v17.8H, v6.8B, v0.8B
|
||||
umlal v17.8H, v4.8B, v1.8B
|
||||
ld1 {v4.8b}, [x1], x2
|
||||
3: ld1 {v6.8b}, [x1], x2
|
||||
umull v16.8h, v4.8b, v0.8b
|
||||
umlal v16.8h, v6.8b, v1.8b
|
||||
ld1 {v4.8b}, [x1], x2
|
||||
umull v17.8h, v6.8b, v0.8b
|
||||
umlal v17.8h, v4.8b, v1.8b
|
||||
prfm pldl1strm, [x1]
|
||||
.ifc \codec,h264
|
||||
rshrn v16.8B, v16.8H, #6
|
||||
rshrn v17.8B, v17.8H, #6
|
||||
rshrn v16.8b, v16.8h, #6
|
||||
rshrn v17.8b, v17.8h, #6
|
||||
.else
|
||||
add v16.8H, v16.8H, v22.8H
|
||||
add v17.8H, v17.8H, v22.8H
|
||||
shrn v16.8B, v16.8H, #6
|
||||
shrn v17.8B, v17.8H, #6
|
||||
add v16.8h, v16.8h, v22.8h
|
||||
add v17.8h, v17.8h, v22.8h
|
||||
shrn v16.8b, v16.8h, #6
|
||||
shrn v17.8b, v17.8h, #6
|
||||
.endif
|
||||
prfm pldl1strm, [x1, x2]
|
||||
.ifc \type,avg
|
||||
ld1 {v20.8B}, [x8], x2
|
||||
ld1 {v21.8B}, [x8], x2
|
||||
urhadd v16.8B, v16.8B, v20.8B
|
||||
urhadd v17.8B, v17.8B, v21.8B
|
||||
ld1 {v20.8b}, [x8], x2
|
||||
ld1 {v21.8b}, [x8], x2
|
||||
urhadd v16.8b, v16.8b, v20.8b
|
||||
urhadd v17.8b, v17.8b, v21.8b
|
||||
.endif
|
||||
subs w3, w3, #2
|
||||
st1 {v16.8B}, [x0], x2
|
||||
st1 {v17.8B}, [x0], x2
|
||||
st1 {v16.8b}, [x0], x2
|
||||
st1 {v17.8b}, [x0], x2
|
||||
b.gt 3b
|
||||
ret
|
||||
|
||||
4: ld1 {v4.8B, v5.8B}, [x1], x2
|
||||
ld1 {v6.8B, v7.8B}, [x1], x2
|
||||
ext v5.8B, v4.8B, v5.8B, #1
|
||||
ext v7.8B, v6.8B, v7.8B, #1
|
||||
4: ld1 {v4.8b, v5.8b}, [x1], x2
|
||||
ld1 {v6.8b, v7.8b}, [x1], x2
|
||||
ext v5.8b, v4.8b, v5.8b, #1
|
||||
ext v7.8b, v6.8b, v7.8b, #1
|
||||
prfm pldl1strm, [x1]
|
||||
subs w3, w3, #2
|
||||
umull v16.8H, v4.8B, v0.8B
|
||||
umlal v16.8H, v5.8B, v1.8B
|
||||
umull v17.8H, v6.8B, v0.8B
|
||||
umlal v17.8H, v7.8B, v1.8B
|
||||
umull v16.8h, v4.8b, v0.8b
|
||||
umlal v16.8h, v5.8b, v1.8b
|
||||
umull v17.8h, v6.8b, v0.8b
|
||||
umlal v17.8h, v7.8b, v1.8b
|
||||
prfm pldl1strm, [x1, x2]
|
||||
.ifc \codec,h264
|
||||
rshrn v16.8B, v16.8H, #6
|
||||
rshrn v17.8B, v17.8H, #6
|
||||
rshrn v16.8b, v16.8h, #6
|
||||
rshrn v17.8b, v17.8h, #6
|
||||
.else
|
||||
add v16.8H, v16.8H, v22.8H
|
||||
add v17.8H, v17.8H, v22.8H
|
||||
shrn v16.8B, v16.8H, #6
|
||||
shrn v17.8B, v17.8H, #6
|
||||
add v16.8h, v16.8h, v22.8h
|
||||
add v17.8h, v17.8h, v22.8h
|
||||
shrn v16.8b, v16.8h, #6
|
||||
shrn v17.8b, v17.8h, #6
|
||||
.endif
|
||||
.ifc \type,avg
|
||||
ld1 {v20.8B}, [x8], x2
|
||||
ld1 {v21.8B}, [x8], x2
|
||||
urhadd v16.8B, v16.8B, v20.8B
|
||||
urhadd v17.8B, v17.8B, v21.8B
|
||||
ld1 {v20.8b}, [x8], x2
|
||||
ld1 {v21.8b}, [x8], x2
|
||||
urhadd v16.8b, v16.8b, v20.8b
|
||||
urhadd v17.8b, v17.8b, v21.8b
|
||||
.endif
|
||||
st1 {v16.8B}, [x0], x2
|
||||
st1 {v17.8B}, [x0], x2
|
||||
st1 {v16.8b}, [x0], x2
|
||||
st1 {v17.8b}, [x0], x2
|
||||
b.gt 4b
|
||||
ret
|
||||
|
||||
5: ld1 {v4.8B}, [x1], x2
|
||||
ld1 {v5.8B}, [x1], x2
|
||||
5: ld1 {v4.8b}, [x1], x2
|
||||
ld1 {v5.8b}, [x1], x2
|
||||
prfm pldl1strm, [x1]
|
||||
subs w3, w3, #2
|
||||
umull v16.8H, v4.8B, v0.8B
|
||||
umull v17.8H, v5.8B, v0.8B
|
||||
umull v16.8h, v4.8b, v0.8b
|
||||
umull v17.8h, v5.8b, v0.8b
|
||||
prfm pldl1strm, [x1, x2]
|
||||
.ifc \codec,h264
|
||||
rshrn v16.8B, v16.8H, #6
|
||||
rshrn v17.8B, v17.8H, #6
|
||||
rshrn v16.8b, v16.8h, #6
|
||||
rshrn v17.8b, v17.8h, #6
|
||||
.else
|
||||
add v16.8H, v16.8H, v22.8H
|
||||
add v17.8H, v17.8H, v22.8H
|
||||
shrn v16.8B, v16.8H, #6
|
||||
shrn v17.8B, v17.8H, #6
|
||||
add v16.8h, v16.8h, v22.8h
|
||||
add v17.8h, v17.8h, v22.8h
|
||||
shrn v16.8b, v16.8h, #6
|
||||
shrn v17.8b, v17.8h, #6
|
||||
.endif
|
||||
.ifc \type,avg
|
||||
ld1 {v20.8B}, [x8], x2
|
||||
ld1 {v21.8B}, [x8], x2
|
||||
urhadd v16.8B, v16.8B, v20.8B
|
||||
urhadd v17.8B, v17.8B, v21.8B
|
||||
ld1 {v20.8b}, [x8], x2
|
||||
ld1 {v21.8b}, [x8], x2
|
||||
urhadd v16.8b, v16.8b, v20.8b
|
||||
urhadd v17.8b, v17.8b, v21.8b
|
||||
.endif
|
||||
st1 {v16.8B}, [x0], x2
|
||||
st1 {v17.8B}, [x0], x2
|
||||
st1 {v16.8b}, [x0], x2
|
||||
st1 {v17.8b}, [x0], x2
|
||||
b.gt 5b
|
||||
ret
|
||||
endfunc
|
||||
@@ -206,11 +206,11 @@ function ff_\type\()_\codec\()_chroma_mc4_neon, export=1
|
||||
lsl w9, w9, #3
|
||||
lsl w10, w10, #1
|
||||
add w9, w9, w10
|
||||
add x6, x6, w9, UXTW
|
||||
ld1r {v22.8H}, [x6]
|
||||
add x6, x6, w9, uxtw
|
||||
ld1r {v22.8h}, [x6]
|
||||
.endif
|
||||
.ifc \codec,vc1
|
||||
movi v22.8H, #28
|
||||
movi v22.8h, #28
|
||||
.endif
|
||||
mul w7, w4, w5
|
||||
lsl w14, w5, #3
|
||||
@@ -223,133 +223,133 @@ function ff_\type\()_\codec\()_chroma_mc4_neon, export=1
|
||||
add w4, w4, #64
|
||||
b.eq 2f
|
||||
|
||||
dup v24.8B, w4
|
||||
dup v25.8B, w12
|
||||
ld1 {v4.8B}, [x1], x2
|
||||
dup v26.8B, w6
|
||||
dup v27.8B, w7
|
||||
ext v5.8B, v4.8B, v5.8B, #1
|
||||
trn1 v0.2S, v24.2S, v25.2S
|
||||
trn1 v2.2S, v26.2S, v27.2S
|
||||
trn1 v4.2S, v4.2S, v5.2S
|
||||
1: ld1 {v6.8B}, [x1], x2
|
||||
ext v7.8B, v6.8B, v7.8B, #1
|
||||
trn1 v6.2S, v6.2S, v7.2S
|
||||
umull v18.8H, v4.8B, v0.8B
|
||||
umlal v18.8H, v6.8B, v2.8B
|
||||
ld1 {v4.8B}, [x1], x2
|
||||
ext v5.8B, v4.8B, v5.8B, #1
|
||||
trn1 v4.2S, v4.2S, v5.2S
|
||||
dup v24.8b, w4
|
||||
dup v25.8b, w12
|
||||
ld1 {v4.8b}, [x1], x2
|
||||
dup v26.8b, w6
|
||||
dup v27.8b, w7
|
||||
ext v5.8b, v4.8b, v5.8b, #1
|
||||
trn1 v0.2s, v24.2s, v25.2s
|
||||
trn1 v2.2s, v26.2s, v27.2s
|
||||
trn1 v4.2s, v4.2s, v5.2s
|
||||
1: ld1 {v6.8b}, [x1], x2
|
||||
ext v7.8b, v6.8b, v7.8b, #1
|
||||
trn1 v6.2s, v6.2s, v7.2s
|
||||
umull v18.8h, v4.8b, v0.8b
|
||||
umlal v18.8h, v6.8b, v2.8b
|
||||
ld1 {v4.8b}, [x1], x2
|
||||
ext v5.8b, v4.8b, v5.8b, #1
|
||||
trn1 v4.2s, v4.2s, v5.2s
|
||||
prfm pldl1strm, [x1]
|
||||
umull v19.8H, v6.8B, v0.8B
|
||||
umlal v19.8H, v4.8B, v2.8B
|
||||
trn1 v30.2D, v18.2D, v19.2D
|
||||
trn2 v31.2D, v18.2D, v19.2D
|
||||
add v18.8H, v30.8H, v31.8H
|
||||
umull v19.8h, v6.8b, v0.8b
|
||||
umlal v19.8h, v4.8b, v2.8b
|
||||
trn1 v30.2d, v18.2d, v19.2d
|
||||
trn2 v31.2d, v18.2d, v19.2d
|
||||
add v18.8h, v30.8h, v31.8h
|
||||
.ifc \codec,h264
|
||||
rshrn v16.8B, v18.8H, #6
|
||||
rshrn v16.8b, v18.8h, #6
|
||||
.else
|
||||
add v18.8H, v18.8H, v22.8H
|
||||
shrn v16.8B, v18.8H, #6
|
||||
add v18.8h, v18.8h, v22.8h
|
||||
shrn v16.8b, v18.8h, #6
|
||||
.endif
|
||||
subs w3, w3, #2
|
||||
prfm pldl1strm, [x1, x2]
|
||||
.ifc \type,avg
|
||||
ld1 {v20.S}[0], [x8], x2
|
||||
ld1 {v20.S}[1], [x8], x2
|
||||
urhadd v16.8B, v16.8B, v20.8B
|
||||
ld1 {v20.s}[0], [x8], x2
|
||||
ld1 {v20.s}[1], [x8], x2
|
||||
urhadd v16.8b, v16.8b, v20.8b
|
||||
.endif
|
||||
st1 {v16.S}[0], [x0], x2
|
||||
st1 {v16.S}[1], [x0], x2
|
||||
st1 {v16.s}[0], [x0], x2
|
||||
st1 {v16.s}[1], [x0], x2
|
||||
b.gt 1b
|
||||
ret
|
||||
|
||||
2: adds w12, w12, w6
|
||||
dup v30.8B, w4
|
||||
dup v30.8b, w4
|
||||
b.eq 5f
|
||||
tst w6, w6
|
||||
dup v31.8B, w12
|
||||
trn1 v0.2S, v30.2S, v31.2S
|
||||
trn2 v1.2S, v30.2S, v31.2S
|
||||
dup v31.8b, w12
|
||||
trn1 v0.2s, v30.2s, v31.2s
|
||||
trn2 v1.2s, v30.2s, v31.2s
|
||||
b.eq 4f
|
||||
|
||||
ext v1.8B, v0.8B, v1.8B, #4
|
||||
ld1 {v4.S}[0], [x1], x2
|
||||
3: ld1 {v4.S}[1], [x1], x2
|
||||
umull v18.8H, v4.8B, v0.8B
|
||||
ld1 {v4.S}[0], [x1], x2
|
||||
umull v19.8H, v4.8B, v1.8B
|
||||
trn1 v30.2D, v18.2D, v19.2D
|
||||
trn2 v31.2D, v18.2D, v19.2D
|
||||
add v18.8H, v30.8H, v31.8H
|
||||
ext v1.8b, v0.8b, v1.8b, #4
|
||||
ld1 {v4.s}[0], [x1], x2
|
||||
3: ld1 {v4.s}[1], [x1], x2
|
||||
umull v18.8h, v4.8b, v0.8b
|
||||
ld1 {v4.s}[0], [x1], x2
|
||||
umull v19.8h, v4.8b, v1.8b
|
||||
trn1 v30.2d, v18.2d, v19.2d
|
||||
trn2 v31.2d, v18.2d, v19.2d
|
||||
add v18.8h, v30.8h, v31.8h
|
||||
prfm pldl1strm, [x1]
|
||||
.ifc \codec,h264
|
||||
rshrn v16.8B, v18.8H, #6
|
||||
rshrn v16.8b, v18.8h, #6
|
||||
.else
|
||||
add v18.8H, v18.8H, v22.8H
|
||||
shrn v16.8B, v18.8H, #6
|
||||
add v18.8h, v18.8h, v22.8h
|
||||
shrn v16.8b, v18.8h, #6
|
||||
.endif
|
||||
.ifc \type,avg
|
||||
ld1 {v20.S}[0], [x8], x2
|
||||
ld1 {v20.S}[1], [x8], x2
|
||||
urhadd v16.8B, v16.8B, v20.8B
|
||||
ld1 {v20.s}[0], [x8], x2
|
||||
ld1 {v20.s}[1], [x8], x2
|
||||
urhadd v16.8b, v16.8b, v20.8b
|
||||
.endif
|
||||
subs w3, w3, #2
|
||||
prfm pldl1strm, [x1, x2]
|
||||
st1 {v16.S}[0], [x0], x2
|
||||
st1 {v16.S}[1], [x0], x2
|
||||
st1 {v16.s}[0], [x0], x2
|
||||
st1 {v16.s}[1], [x0], x2
|
||||
b.gt 3b
|
||||
ret
|
||||
|
||||
4: ld1 {v4.8B}, [x1], x2
|
||||
ld1 {v6.8B}, [x1], x2
|
||||
ext v5.8B, v4.8B, v5.8B, #1
|
||||
ext v7.8B, v6.8B, v7.8B, #1
|
||||
trn1 v4.2S, v4.2S, v5.2S
|
||||
trn1 v6.2S, v6.2S, v7.2S
|
||||
umull v18.8H, v4.8B, v0.8B
|
||||
umull v19.8H, v6.8B, v0.8B
|
||||
4: ld1 {v4.8b}, [x1], x2
|
||||
ld1 {v6.8b}, [x1], x2
|
||||
ext v5.8b, v4.8b, v5.8b, #1
|
||||
ext v7.8b, v6.8b, v7.8b, #1
|
||||
trn1 v4.2s, v4.2s, v5.2s
|
||||
trn1 v6.2s, v6.2s, v7.2s
|
||||
umull v18.8h, v4.8b, v0.8b
|
||||
umull v19.8h, v6.8b, v0.8b
|
||||
subs w3, w3, #2
|
||||
trn1 v30.2D, v18.2D, v19.2D
|
||||
trn2 v31.2D, v18.2D, v19.2D
|
||||
add v18.8H, v30.8H, v31.8H
|
||||
trn1 v30.2d, v18.2d, v19.2d
|
||||
trn2 v31.2d, v18.2d, v19.2d
|
||||
add v18.8h, v30.8h, v31.8h
|
||||
prfm pldl1strm, [x1]
|
||||
.ifc \codec,h264
|
||||
rshrn v16.8B, v18.8H, #6
|
||||
rshrn v16.8b, v18.8h, #6
|
||||
.else
|
||||
add v18.8H, v18.8H, v22.8H
|
||||
shrn v16.8B, v18.8H, #6
|
||||
add v18.8h, v18.8h, v22.8h
|
||||
shrn v16.8b, v18.8h, #6
|
||||
.endif
|
||||
.ifc \type,avg
|
||||
ld1 {v20.S}[0], [x8], x2
|
||||
ld1 {v20.S}[1], [x8], x2
|
||||
urhadd v16.8B, v16.8B, v20.8B
|
||||
ld1 {v20.s}[0], [x8], x2
|
||||
ld1 {v20.s}[1], [x8], x2
|
||||
urhadd v16.8b, v16.8b, v20.8b
|
||||
.endif
|
||||
prfm pldl1strm, [x1]
|
||||
st1 {v16.S}[0], [x0], x2
|
||||
st1 {v16.S}[1], [x0], x2
|
||||
st1 {v16.s}[0], [x0], x2
|
||||
st1 {v16.s}[1], [x0], x2
|
||||
b.gt 4b
|
||||
ret
|
||||
|
||||
5: ld1 {v4.S}[0], [x1], x2
|
||||
ld1 {v4.S}[1], [x1], x2
|
||||
umull v18.8H, v4.8B, v30.8B
|
||||
5: ld1 {v4.s}[0], [x1], x2
|
||||
ld1 {v4.s}[1], [x1], x2
|
||||
umull v18.8h, v4.8b, v30.8b
|
||||
subs w3, w3, #2
|
||||
prfm pldl1strm, [x1]
|
||||
.ifc \codec,h264
|
||||
rshrn v16.8B, v18.8H, #6
|
||||
rshrn v16.8b, v18.8h, #6
|
||||
.else
|
||||
add v18.8H, v18.8H, v22.8H
|
||||
shrn v16.8B, v18.8H, #6
|
||||
add v18.8h, v18.8h, v22.8h
|
||||
shrn v16.8b, v18.8h, #6
|
||||
.endif
|
||||
.ifc \type,avg
|
||||
ld1 {v20.S}[0], [x8], x2
|
||||
ld1 {v20.S}[1], [x8], x2
|
||||
urhadd v16.8B, v16.8B, v20.8B
|
||||
ld1 {v20.s}[0], [x8], x2
|
||||
ld1 {v20.s}[1], [x8], x2
|
||||
urhadd v16.8b, v16.8b, v20.8b
|
||||
.endif
|
||||
prfm pldl1strm, [x1]
|
||||
st1 {v16.S}[0], [x0], x2
|
||||
st1 {v16.S}[1], [x0], x2
|
||||
st1 {v16.s}[0], [x0], x2
|
||||
st1 {v16.s}[1], [x0], x2
|
||||
b.gt 5b
|
||||
ret
|
||||
endfunc
|
||||
@@ -370,51 +370,51 @@ function ff_\type\()_h264_chroma_mc2_neon, export=1
|
||||
sub w4, w7, w13
|
||||
sub w4, w4, w14
|
||||
add w4, w4, #64
|
||||
dup v0.8B, w4
|
||||
dup v2.8B, w12
|
||||
dup v1.8B, w6
|
||||
dup v3.8B, w7
|
||||
trn1 v0.4H, v0.4H, v2.4H
|
||||
trn1 v1.4H, v1.4H, v3.4H
|
||||
dup v0.8b, w4
|
||||
dup v2.8b, w12
|
||||
dup v1.8b, w6
|
||||
dup v3.8b, w7
|
||||
trn1 v0.4h, v0.4h, v2.4h
|
||||
trn1 v1.4h, v1.4h, v3.4h
|
||||
1:
|
||||
ld1 {v4.S}[0], [x1], x2
|
||||
ld1 {v4.S}[1], [x1], x2
|
||||
rev64 v5.2S, v4.2S
|
||||
ld1 {v5.S}[1], [x1]
|
||||
ext v6.8B, v4.8B, v5.8B, #1
|
||||
ext v7.8B, v5.8B, v4.8B, #1
|
||||
trn1 v4.4H, v4.4H, v6.4H
|
||||
trn1 v5.4H, v5.4H, v7.4H
|
||||
umull v16.8H, v4.8B, v0.8B
|
||||
umlal v16.8H, v5.8B, v1.8B
|
||||
ld1 {v4.s}[0], [x1], x2
|
||||
ld1 {v4.s}[1], [x1], x2
|
||||
rev64 v5.2s, v4.2s
|
||||
ld1 {v5.s}[1], [x1]
|
||||
ext v6.8b, v4.8b, v5.8b, #1
|
||||
ext v7.8b, v5.8b, v4.8b, #1
|
||||
trn1 v4.4h, v4.4h, v6.4h
|
||||
trn1 v5.4h, v5.4h, v7.4h
|
||||
umull v16.8h, v4.8b, v0.8b
|
||||
umlal v16.8h, v5.8b, v1.8b
|
||||
.ifc \type,avg
|
||||
ld1 {v18.H}[0], [x0], x2
|
||||
ld1 {v18.H}[2], [x0]
|
||||
ld1 {v18.h}[0], [x0], x2
|
||||
ld1 {v18.h}[2], [x0]
|
||||
sub x0, x0, x2
|
||||
.endif
|
||||
rev64 v17.4S, v16.4S
|
||||
add v16.8H, v16.8H, v17.8H
|
||||
rshrn v16.8B, v16.8H, #6
|
||||
rev64 v17.4s, v16.4s
|
||||
add v16.8h, v16.8h, v17.8h
|
||||
rshrn v16.8b, v16.8h, #6
|
||||
.ifc \type,avg
|
||||
urhadd v16.8B, v16.8B, v18.8B
|
||||
urhadd v16.8b, v16.8b, v18.8b
|
||||
.endif
|
||||
st1 {v16.H}[0], [x0], x2
|
||||
st1 {v16.H}[2], [x0], x2
|
||||
st1 {v16.h}[0], [x0], x2
|
||||
st1 {v16.h}[2], [x0], x2
|
||||
subs w3, w3, #2
|
||||
b.gt 1b
|
||||
ret
|
||||
|
||||
2:
|
||||
ld1 {v16.H}[0], [x1], x2
|
||||
ld1 {v16.H}[1], [x1], x2
|
||||
ld1 {v16.h}[0], [x1], x2
|
||||
ld1 {v16.h}[1], [x1], x2
|
||||
.ifc \type,avg
|
||||
ld1 {v18.H}[0], [x0], x2
|
||||
ld1 {v18.H}[1], [x0]
|
||||
ld1 {v18.h}[0], [x0], x2
|
||||
ld1 {v18.h}[1], [x0]
|
||||
sub x0, x0, x2
|
||||
urhadd v16.8B, v16.8B, v18.8B
|
||||
urhadd v16.8b, v16.8b, v18.8b
|
||||
.endif
|
||||
st1 {v16.H}[0], [x0], x2
|
||||
st1 {v16.H}[1], [x0], x2
|
||||
st1 {v16.h}[0], [x0], x2
|
||||
st1 {v16.h}[1], [x0], x2
|
||||
subs w3, w3, #2
|
||||
b.gt 2b
|
||||
ret
|
||||
|
||||
+531
-531
File diff suppressed because it is too large
Load Diff
+272
-272
@@ -27,114 +27,114 @@
|
||||
.macro lowpass_const r
|
||||
movz \r, #20, lsl #16
|
||||
movk \r, #5
|
||||
mov v6.S[0], \r
|
||||
mov v6.s[0], \r
|
||||
.endm
|
||||
|
||||
//trashes v0-v5
|
||||
.macro lowpass_8 r0, r1, r2, r3, d0, d1, narrow=1
|
||||
ext v2.8B, \r0\().8B, \r1\().8B, #2
|
||||
ext v3.8B, \r0\().8B, \r1\().8B, #3
|
||||
uaddl v2.8H, v2.8B, v3.8B
|
||||
ext v4.8B, \r0\().8B, \r1\().8B, #1
|
||||
ext v5.8B, \r0\().8B, \r1\().8B, #4
|
||||
uaddl v4.8H, v4.8B, v5.8B
|
||||
ext v1.8B, \r0\().8B, \r1\().8B, #5
|
||||
uaddl \d0\().8H, \r0\().8B, v1.8B
|
||||
ext v0.8B, \r2\().8B, \r3\().8B, #2
|
||||
mla \d0\().8H, v2.8H, v6.H[1]
|
||||
ext v1.8B, \r2\().8B, \r3\().8B, #3
|
||||
uaddl v0.8H, v0.8B, v1.8B
|
||||
ext v1.8B, \r2\().8B, \r3\().8B, #1
|
||||
mls \d0\().8H, v4.8H, v6.H[0]
|
||||
ext v3.8B, \r2\().8B, \r3\().8B, #4
|
||||
uaddl v1.8H, v1.8B, v3.8B
|
||||
ext v2.8B, \r2\().8B, \r3\().8B, #5
|
||||
uaddl \d1\().8H, \r2\().8B, v2.8B
|
||||
mla \d1\().8H, v0.8H, v6.H[1]
|
||||
mls \d1\().8H, v1.8H, v6.H[0]
|
||||
ext v2.8b, \r0\().8b, \r1\().8b, #2
|
||||
ext v3.8b, \r0\().8b, \r1\().8b, #3
|
||||
uaddl v2.8h, v2.8b, v3.8b
|
||||
ext v4.8b, \r0\().8b, \r1\().8b, #1
|
||||
ext v5.8b, \r0\().8b, \r1\().8b, #4
|
||||
uaddl v4.8h, v4.8b, v5.8b
|
||||
ext v1.8b, \r0\().8b, \r1\().8b, #5
|
||||
uaddl \d0\().8h, \r0\().8b, v1.8b
|
||||
ext v0.8b, \r2\().8b, \r3\().8b, #2
|
||||
mla \d0\().8h, v2.8h, v6.h[1]
|
||||
ext v1.8b, \r2\().8b, \r3\().8b, #3
|
||||
uaddl v0.8h, v0.8b, v1.8b
|
||||
ext v1.8b, \r2\().8b, \r3\().8b, #1
|
||||
mls \d0\().8h, v4.8h, v6.h[0]
|
||||
ext v3.8b, \r2\().8b, \r3\().8b, #4
|
||||
uaddl v1.8h, v1.8b, v3.8b
|
||||
ext v2.8b, \r2\().8b, \r3\().8b, #5
|
||||
uaddl \d1\().8h, \r2\().8b, v2.8b
|
||||
mla \d1\().8h, v0.8h, v6.h[1]
|
||||
mls \d1\().8h, v1.8h, v6.h[0]
|
||||
.if \narrow
|
||||
sqrshrun \d0\().8B, \d0\().8H, #5
|
||||
sqrshrun \d1\().8B, \d1\().8H, #5
|
||||
sqrshrun \d0\().8b, \d0\().8h, #5
|
||||
sqrshrun \d1\().8b, \d1\().8h, #5
|
||||
.endif
|
||||
.endm
|
||||
|
||||
//trashes v0-v5, v7, v30-v31
|
||||
.macro lowpass_8H r0, r1
|
||||
ext v0.16B, \r0\().16B, \r0\().16B, #2
|
||||
ext v1.16B, \r0\().16B, \r0\().16B, #3
|
||||
uaddl v0.8H, v0.8B, v1.8B
|
||||
ext v2.16B, \r0\().16B, \r0\().16B, #1
|
||||
ext v3.16B, \r0\().16B, \r0\().16B, #4
|
||||
uaddl v2.8H, v2.8B, v3.8B
|
||||
ext v30.16B, \r0\().16B, \r0\().16B, #5
|
||||
uaddl \r0\().8H, \r0\().8B, v30.8B
|
||||
ext v4.16B, \r1\().16B, \r1\().16B, #2
|
||||
mla \r0\().8H, v0.8H, v6.H[1]
|
||||
ext v5.16B, \r1\().16B, \r1\().16B, #3
|
||||
uaddl v4.8H, v4.8B, v5.8B
|
||||
ext v7.16B, \r1\().16B, \r1\().16B, #1
|
||||
mls \r0\().8H, v2.8H, v6.H[0]
|
||||
ext v0.16B, \r1\().16B, \r1\().16B, #4
|
||||
uaddl v7.8H, v7.8B, v0.8B
|
||||
ext v31.16B, \r1\().16B, \r1\().16B, #5
|
||||
uaddl \r1\().8H, \r1\().8B, v31.8B
|
||||
mla \r1\().8H, v4.8H, v6.H[1]
|
||||
mls \r1\().8H, v7.8H, v6.H[0]
|
||||
ext v0.16b, \r0\().16b, \r0\().16b, #2
|
||||
ext v1.16b, \r0\().16b, \r0\().16b, #3
|
||||
uaddl v0.8h, v0.8b, v1.8b
|
||||
ext v2.16b, \r0\().16b, \r0\().16b, #1
|
||||
ext v3.16b, \r0\().16b, \r0\().16b, #4
|
||||
uaddl v2.8h, v2.8b, v3.8b
|
||||
ext v30.16b, \r0\().16b, \r0\().16b, #5
|
||||
uaddl \r0\().8h, \r0\().8b, v30.8b
|
||||
ext v4.16b, \r1\().16b, \r1\().16b, #2
|
||||
mla \r0\().8h, v0.8h, v6.h[1]
|
||||
ext v5.16b, \r1\().16b, \r1\().16b, #3
|
||||
uaddl v4.8h, v4.8b, v5.8b
|
||||
ext v7.16b, \r1\().16b, \r1\().16b, #1
|
||||
mls \r0\().8h, v2.8h, v6.h[0]
|
||||
ext v0.16b, \r1\().16b, \r1\().16b, #4
|
||||
uaddl v7.8h, v7.8b, v0.8b
|
||||
ext v31.16b, \r1\().16b, \r1\().16b, #5
|
||||
uaddl \r1\().8h, \r1\().8b, v31.8b
|
||||
mla \r1\().8h, v4.8h, v6.h[1]
|
||||
mls \r1\().8h, v7.8h, v6.h[0]
|
||||
.endm
|
||||
|
||||
// trashes v2-v5, v30
|
||||
.macro lowpass_8_1 r0, r1, d0, narrow=1
|
||||
ext v2.8B, \r0\().8B, \r1\().8B, #2
|
||||
ext v3.8B, \r0\().8B, \r1\().8B, #3
|
||||
uaddl v2.8H, v2.8B, v3.8B
|
||||
ext v4.8B, \r0\().8B, \r1\().8B, #1
|
||||
ext v5.8B, \r0\().8B, \r1\().8B, #4
|
||||
uaddl v4.8H, v4.8B, v5.8B
|
||||
ext v30.8B, \r0\().8B, \r1\().8B, #5
|
||||
uaddl \d0\().8H, \r0\().8B, v30.8B
|
||||
mla \d0\().8H, v2.8H, v6.H[1]
|
||||
mls \d0\().8H, v4.8H, v6.H[0]
|
||||
ext v2.8b, \r0\().8b, \r1\().8b, #2
|
||||
ext v3.8b, \r0\().8b, \r1\().8b, #3
|
||||
uaddl v2.8h, v2.8b, v3.8b
|
||||
ext v4.8b, \r0\().8b, \r1\().8b, #1
|
||||
ext v5.8b, \r0\().8b, \r1\().8b, #4
|
||||
uaddl v4.8h, v4.8b, v5.8b
|
||||
ext v30.8b, \r0\().8b, \r1\().8b, #5
|
||||
uaddl \d0\().8h, \r0\().8b, v30.8b
|
||||
mla \d0\().8h, v2.8h, v6.h[1]
|
||||
mls \d0\().8h, v4.8h, v6.h[0]
|
||||
.if \narrow
|
||||
sqrshrun \d0\().8B, \d0\().8H, #5
|
||||
sqrshrun \d0\().8b, \d0\().8h, #5
|
||||
.endif
|
||||
.endm
|
||||
|
||||
// trashed v0-v7
|
||||
.macro lowpass_8.16 r0, r1, r2
|
||||
ext v1.16B, \r0\().16B, \r1\().16B, #4
|
||||
ext v0.16B, \r0\().16B, \r1\().16B, #6
|
||||
saddl v5.4S, v1.4H, v0.4H
|
||||
ext v2.16B, \r0\().16B, \r1\().16B, #2
|
||||
saddl2 v1.4S, v1.8H, v0.8H
|
||||
ext v3.16B, \r0\().16B, \r1\().16B, #8
|
||||
saddl v6.4S, v2.4H, v3.4H
|
||||
ext \r1\().16B, \r0\().16B, \r1\().16B, #10
|
||||
saddl2 v2.4S, v2.8H, v3.8H
|
||||
saddl v0.4S, \r0\().4H, \r1\().4H
|
||||
saddl2 v4.4S, \r0\().8H, \r1\().8H
|
||||
ext v1.16b, \r0\().16b, \r1\().16b, #4
|
||||
ext v0.16b, \r0\().16b, \r1\().16b, #6
|
||||
saddl v5.4s, v1.4h, v0.4h
|
||||
ext v2.16b, \r0\().16b, \r1\().16b, #2
|
||||
saddl2 v1.4s, v1.8h, v0.8h
|
||||
ext v3.16b, \r0\().16b, \r1\().16b, #8
|
||||
saddl v6.4s, v2.4h, v3.4h
|
||||
ext \r1\().16b, \r0\().16b, \r1\().16b, #10
|
||||
saddl2 v2.4s, v2.8h, v3.8h
|
||||
saddl v0.4s, \r0\().4h, \r1\().4h
|
||||
saddl2 v4.4s, \r0\().8h, \r1\().8h
|
||||
|
||||
shl v3.4S, v5.4S, #4
|
||||
shl v5.4S, v5.4S, #2
|
||||
shl v7.4S, v6.4S, #2
|
||||
add v5.4S, v5.4S, v3.4S
|
||||
add v6.4S, v6.4S, v7.4S
|
||||
shl v3.4s, v5.4s, #4
|
||||
shl v5.4s, v5.4s, #2
|
||||
shl v7.4s, v6.4s, #2
|
||||
add v5.4s, v5.4s, v3.4s
|
||||
add v6.4s, v6.4s, v7.4s
|
||||
|
||||
shl v3.4S, v1.4S, #4
|
||||
shl v1.4S, v1.4S, #2
|
||||
shl v7.4S, v2.4S, #2
|
||||
add v1.4S, v1.4S, v3.4S
|
||||
add v2.4S, v2.4S, v7.4S
|
||||
shl v3.4s, v1.4s, #4
|
||||
shl v1.4s, v1.4s, #2
|
||||
shl v7.4s, v2.4s, #2
|
||||
add v1.4s, v1.4s, v3.4s
|
||||
add v2.4s, v2.4s, v7.4s
|
||||
|
||||
add v5.4S, v5.4S, v0.4S
|
||||
sub v5.4S, v5.4S, v6.4S
|
||||
add v5.4s, v5.4s, v0.4s
|
||||
sub v5.4s, v5.4s, v6.4s
|
||||
|
||||
add v1.4S, v1.4S, v4.4S
|
||||
sub v1.4S, v1.4S, v2.4S
|
||||
add v1.4s, v1.4s, v4.4s
|
||||
sub v1.4s, v1.4s, v2.4s
|
||||
|
||||
rshrn v5.4H, v5.4S, #10
|
||||
rshrn2 v5.8H, v1.4S, #10
|
||||
rshrn v5.4h, v5.4s, #10
|
||||
rshrn2 v5.8h, v1.4s, #10
|
||||
|
||||
sqxtun \r2\().8B, v5.8H
|
||||
sqxtun \r2\().8b, v5.8h
|
||||
.endm
|
||||
|
||||
function put_h264_qpel16_h_lowpass_neon_packed
|
||||
@@ -163,19 +163,19 @@ function \type\()_h264_qpel16_h_lowpass_neon
|
||||
endfunc
|
||||
|
||||
function \type\()_h264_qpel8_h_lowpass_neon
|
||||
1: ld1 {v28.8B, v29.8B}, [x1], x2
|
||||
ld1 {v16.8B, v17.8B}, [x1], x2
|
||||
1: ld1 {v28.8b, v29.8b}, [x1], x2
|
||||
ld1 {v16.8b, v17.8b}, [x1], x2
|
||||
subs x12, x12, #2
|
||||
lowpass_8 v28, v29, v16, v17, v28, v16
|
||||
.ifc \type,avg
|
||||
ld1 {v2.8B}, [x0], x3
|
||||
urhadd v28.8B, v28.8B, v2.8B
|
||||
ld1 {v3.8B}, [x0]
|
||||
urhadd v16.8B, v16.8B, v3.8B
|
||||
ld1 {v2.8b}, [x0], x3
|
||||
urhadd v28.8b, v28.8b, v2.8b
|
||||
ld1 {v3.8b}, [x0]
|
||||
urhadd v16.8b, v16.8b, v3.8b
|
||||
sub x0, x0, x3
|
||||
.endif
|
||||
st1 {v28.8B}, [x0], x3
|
||||
st1 {v16.8B}, [x0], x3
|
||||
st1 {v28.8b}, [x0], x3
|
||||
st1 {v16.8b}, [x0], x3
|
||||
b.ne 1b
|
||||
ret
|
||||
endfunc
|
||||
@@ -200,23 +200,23 @@ function \type\()_h264_qpel16_h_lowpass_l2_neon
|
||||
endfunc
|
||||
|
||||
function \type\()_h264_qpel8_h_lowpass_l2_neon
|
||||
1: ld1 {v26.8B, v27.8B}, [x1], x2
|
||||
ld1 {v16.8B, v17.8B}, [x1], x2
|
||||
ld1 {v28.8B}, [x3], x2
|
||||
ld1 {v29.8B}, [x3], x2
|
||||
1: ld1 {v26.8b, v27.8b}, [x1], x2
|
||||
ld1 {v16.8b, v17.8b}, [x1], x2
|
||||
ld1 {v28.8b}, [x3], x2
|
||||
ld1 {v29.8b}, [x3], x2
|
||||
subs x12, x12, #2
|
||||
lowpass_8 v26, v27, v16, v17, v26, v27
|
||||
urhadd v26.8B, v26.8B, v28.8B
|
||||
urhadd v27.8B, v27.8B, v29.8B
|
||||
urhadd v26.8b, v26.8b, v28.8b
|
||||
urhadd v27.8b, v27.8b, v29.8b
|
||||
.ifc \type,avg
|
||||
ld1 {v2.8B}, [x0], x2
|
||||
urhadd v26.8B, v26.8B, v2.8B
|
||||
ld1 {v3.8B}, [x0]
|
||||
urhadd v27.8B, v27.8B, v3.8B
|
||||
ld1 {v2.8b}, [x0], x2
|
||||
urhadd v26.8b, v26.8b, v2.8b
|
||||
ld1 {v3.8b}, [x0]
|
||||
urhadd v27.8b, v27.8b, v3.8b
|
||||
sub x0, x0, x2
|
||||
.endif
|
||||
st1 {v26.8B}, [x0], x2
|
||||
st1 {v27.8B}, [x0], x2
|
||||
st1 {v26.8b}, [x0], x2
|
||||
st1 {v27.8b}, [x0], x2
|
||||
b.ne 1b
|
||||
ret
|
||||
endfunc
|
||||
@@ -257,19 +257,19 @@ function \type\()_h264_qpel16_v_lowpass_neon
|
||||
endfunc
|
||||
|
||||
function \type\()_h264_qpel8_v_lowpass_neon
|
||||
ld1 {v16.8B}, [x1], x3
|
||||
ld1 {v18.8B}, [x1], x3
|
||||
ld1 {v20.8B}, [x1], x3
|
||||
ld1 {v22.8B}, [x1], x3
|
||||
ld1 {v24.8B}, [x1], x3
|
||||
ld1 {v26.8B}, [x1], x3
|
||||
ld1 {v28.8B}, [x1], x3
|
||||
ld1 {v30.8B}, [x1], x3
|
||||
ld1 {v17.8B}, [x1], x3
|
||||
ld1 {v19.8B}, [x1], x3
|
||||
ld1 {v21.8B}, [x1], x3
|
||||
ld1 {v23.8B}, [x1], x3
|
||||
ld1 {v25.8B}, [x1]
|
||||
ld1 {v16.8b}, [x1], x3
|
||||
ld1 {v18.8b}, [x1], x3
|
||||
ld1 {v20.8b}, [x1], x3
|
||||
ld1 {v22.8b}, [x1], x3
|
||||
ld1 {v24.8b}, [x1], x3
|
||||
ld1 {v26.8b}, [x1], x3
|
||||
ld1 {v28.8b}, [x1], x3
|
||||
ld1 {v30.8b}, [x1], x3
|
||||
ld1 {v17.8b}, [x1], x3
|
||||
ld1 {v19.8b}, [x1], x3
|
||||
ld1 {v21.8b}, [x1], x3
|
||||
ld1 {v23.8b}, [x1], x3
|
||||
ld1 {v25.8b}, [x1]
|
||||
|
||||
transpose_8x8B v16, v18, v20, v22, v24, v26, v28, v30, v0, v1
|
||||
transpose_8x8B v17, v19, v21, v23, v25, v27, v29, v31, v0, v1
|
||||
@@ -280,33 +280,33 @@ function \type\()_h264_qpel8_v_lowpass_neon
|
||||
transpose_8x8B v16, v17, v18, v19, v20, v21, v22, v23, v0, v1
|
||||
|
||||
.ifc \type,avg
|
||||
ld1 {v24.8B}, [x0], x2
|
||||
urhadd v16.8B, v16.8B, v24.8B
|
||||
ld1 {v25.8B}, [x0], x2
|
||||
urhadd v17.8B, v17.8B, v25.8B
|
||||
ld1 {v26.8B}, [x0], x2
|
||||
urhadd v18.8B, v18.8B, v26.8B
|
||||
ld1 {v27.8B}, [x0], x2
|
||||
urhadd v19.8B, v19.8B, v27.8B
|
||||
ld1 {v28.8B}, [x0], x2
|
||||
urhadd v20.8B, v20.8B, v28.8B
|
||||
ld1 {v29.8B}, [x0], x2
|
||||
urhadd v21.8B, v21.8B, v29.8B
|
||||
ld1 {v30.8B}, [x0], x2
|
||||
urhadd v22.8B, v22.8B, v30.8B
|
||||
ld1 {v31.8B}, [x0], x2
|
||||
urhadd v23.8B, v23.8B, v31.8B
|
||||
ld1 {v24.8b}, [x0], x2
|
||||
urhadd v16.8b, v16.8b, v24.8b
|
||||
ld1 {v25.8b}, [x0], x2
|
||||
urhadd v17.8b, v17.8b, v25.8b
|
||||
ld1 {v26.8b}, [x0], x2
|
||||
urhadd v18.8b, v18.8b, v26.8b
|
||||
ld1 {v27.8b}, [x0], x2
|
||||
urhadd v19.8b, v19.8b, v27.8b
|
||||
ld1 {v28.8b}, [x0], x2
|
||||
urhadd v20.8b, v20.8b, v28.8b
|
||||
ld1 {v29.8b}, [x0], x2
|
||||
urhadd v21.8b, v21.8b, v29.8b
|
||||
ld1 {v30.8b}, [x0], x2
|
||||
urhadd v22.8b, v22.8b, v30.8b
|
||||
ld1 {v31.8b}, [x0], x2
|
||||
urhadd v23.8b, v23.8b, v31.8b
|
||||
sub x0, x0, x2, lsl #3
|
||||
.endif
|
||||
|
||||
st1 {v16.8B}, [x0], x2
|
||||
st1 {v17.8B}, [x0], x2
|
||||
st1 {v18.8B}, [x0], x2
|
||||
st1 {v19.8B}, [x0], x2
|
||||
st1 {v20.8B}, [x0], x2
|
||||
st1 {v21.8B}, [x0], x2
|
||||
st1 {v22.8B}, [x0], x2
|
||||
st1 {v23.8B}, [x0], x2
|
||||
st1 {v16.8b}, [x0], x2
|
||||
st1 {v17.8b}, [x0], x2
|
||||
st1 {v18.8b}, [x0], x2
|
||||
st1 {v19.8b}, [x0], x2
|
||||
st1 {v20.8b}, [x0], x2
|
||||
st1 {v21.8b}, [x0], x2
|
||||
st1 {v22.8b}, [x0], x2
|
||||
st1 {v23.8b}, [x0], x2
|
||||
|
||||
ret
|
||||
endfunc
|
||||
@@ -334,19 +334,19 @@ function \type\()_h264_qpel16_v_lowpass_l2_neon
|
||||
endfunc
|
||||
|
||||
function \type\()_h264_qpel8_v_lowpass_l2_neon
|
||||
ld1 {v16.8B}, [x1], x3
|
||||
ld1 {v18.8B}, [x1], x3
|
||||
ld1 {v20.8B}, [x1], x3
|
||||
ld1 {v22.8B}, [x1], x3
|
||||
ld1 {v24.8B}, [x1], x3
|
||||
ld1 {v26.8B}, [x1], x3
|
||||
ld1 {v28.8B}, [x1], x3
|
||||
ld1 {v30.8B}, [x1], x3
|
||||
ld1 {v17.8B}, [x1], x3
|
||||
ld1 {v19.8B}, [x1], x3
|
||||
ld1 {v21.8B}, [x1], x3
|
||||
ld1 {v23.8B}, [x1], x3
|
||||
ld1 {v25.8B}, [x1]
|
||||
ld1 {v16.8b}, [x1], x3
|
||||
ld1 {v18.8b}, [x1], x3
|
||||
ld1 {v20.8b}, [x1], x3
|
||||
ld1 {v22.8b}, [x1], x3
|
||||
ld1 {v24.8b}, [x1], x3
|
||||
ld1 {v26.8b}, [x1], x3
|
||||
ld1 {v28.8b}, [x1], x3
|
||||
ld1 {v30.8b}, [x1], x3
|
||||
ld1 {v17.8b}, [x1], x3
|
||||
ld1 {v19.8b}, [x1], x3
|
||||
ld1 {v21.8b}, [x1], x3
|
||||
ld1 {v23.8b}, [x1], x3
|
||||
ld1 {v25.8b}, [x1]
|
||||
|
||||
transpose_8x8B v16, v18, v20, v22, v24, v26, v28, v30, v0, v1
|
||||
transpose_8x8B v17, v19, v21, v23, v25, v27, v29, v31, v0, v1
|
||||
@@ -356,51 +356,51 @@ function \type\()_h264_qpel8_v_lowpass_l2_neon
|
||||
lowpass_8 v28, v29, v30, v31, v22, v23
|
||||
transpose_8x8B v16, v17, v18, v19, v20, v21, v22, v23, v0, v1
|
||||
|
||||
ld1 {v24.8B}, [x12], x2
|
||||
ld1 {v25.8B}, [x12], x2
|
||||
ld1 {v26.8B}, [x12], x2
|
||||
ld1 {v27.8B}, [x12], x2
|
||||
ld1 {v28.8B}, [x12], x2
|
||||
urhadd v16.8B, v24.8B, v16.8B
|
||||
urhadd v17.8B, v25.8B, v17.8B
|
||||
ld1 {v29.8B}, [x12], x2
|
||||
urhadd v18.8B, v26.8B, v18.8B
|
||||
urhadd v19.8B, v27.8B, v19.8B
|
||||
ld1 {v30.8B}, [x12], x2
|
||||
urhadd v20.8B, v28.8B, v20.8B
|
||||
urhadd v21.8B, v29.8B, v21.8B
|
||||
ld1 {v31.8B}, [x12], x2
|
||||
urhadd v22.8B, v30.8B, v22.8B
|
||||
urhadd v23.8B, v31.8B, v23.8B
|
||||
ld1 {v24.8b}, [x12], x2
|
||||
ld1 {v25.8b}, [x12], x2
|
||||
ld1 {v26.8b}, [x12], x2
|
||||
ld1 {v27.8b}, [x12], x2
|
||||
ld1 {v28.8b}, [x12], x2
|
||||
urhadd v16.8b, v24.8b, v16.8b
|
||||
urhadd v17.8b, v25.8b, v17.8b
|
||||
ld1 {v29.8b}, [x12], x2
|
||||
urhadd v18.8b, v26.8b, v18.8b
|
||||
urhadd v19.8b, v27.8b, v19.8b
|
||||
ld1 {v30.8b}, [x12], x2
|
||||
urhadd v20.8b, v28.8b, v20.8b
|
||||
urhadd v21.8b, v29.8b, v21.8b
|
||||
ld1 {v31.8b}, [x12], x2
|
||||
urhadd v22.8b, v30.8b, v22.8b
|
||||
urhadd v23.8b, v31.8b, v23.8b
|
||||
|
||||
.ifc \type,avg
|
||||
ld1 {v24.8B}, [x0], x3
|
||||
urhadd v16.8B, v16.8B, v24.8B
|
||||
ld1 {v25.8B}, [x0], x3
|
||||
urhadd v17.8B, v17.8B, v25.8B
|
||||
ld1 {v26.8B}, [x0], x3
|
||||
urhadd v18.8B, v18.8B, v26.8B
|
||||
ld1 {v27.8B}, [x0], x3
|
||||
urhadd v19.8B, v19.8B, v27.8B
|
||||
ld1 {v28.8B}, [x0], x3
|
||||
urhadd v20.8B, v20.8B, v28.8B
|
||||
ld1 {v29.8B}, [x0], x3
|
||||
urhadd v21.8B, v21.8B, v29.8B
|
||||
ld1 {v30.8B}, [x0], x3
|
||||
urhadd v22.8B, v22.8B, v30.8B
|
||||
ld1 {v31.8B}, [x0], x3
|
||||
urhadd v23.8B, v23.8B, v31.8B
|
||||
ld1 {v24.8b}, [x0], x3
|
||||
urhadd v16.8b, v16.8b, v24.8b
|
||||
ld1 {v25.8b}, [x0], x3
|
||||
urhadd v17.8b, v17.8b, v25.8b
|
||||
ld1 {v26.8b}, [x0], x3
|
||||
urhadd v18.8b, v18.8b, v26.8b
|
||||
ld1 {v27.8b}, [x0], x3
|
||||
urhadd v19.8b, v19.8b, v27.8b
|
||||
ld1 {v28.8b}, [x0], x3
|
||||
urhadd v20.8b, v20.8b, v28.8b
|
||||
ld1 {v29.8b}, [x0], x3
|
||||
urhadd v21.8b, v21.8b, v29.8b
|
||||
ld1 {v30.8b}, [x0], x3
|
||||
urhadd v22.8b, v22.8b, v30.8b
|
||||
ld1 {v31.8b}, [x0], x3
|
||||
urhadd v23.8b, v23.8b, v31.8b
|
||||
sub x0, x0, x3, lsl #3
|
||||
.endif
|
||||
|
||||
st1 {v16.8B}, [x0], x3
|
||||
st1 {v17.8B}, [x0], x3
|
||||
st1 {v18.8B}, [x0], x3
|
||||
st1 {v19.8B}, [x0], x3
|
||||
st1 {v20.8B}, [x0], x3
|
||||
st1 {v21.8B}, [x0], x3
|
||||
st1 {v22.8B}, [x0], x3
|
||||
st1 {v23.8B}, [x0], x3
|
||||
st1 {v16.8b}, [x0], x3
|
||||
st1 {v17.8b}, [x0], x3
|
||||
st1 {v18.8b}, [x0], x3
|
||||
st1 {v19.8b}, [x0], x3
|
||||
st1 {v20.8b}, [x0], x3
|
||||
st1 {v21.8b}, [x0], x3
|
||||
st1 {v22.8b}, [x0], x3
|
||||
st1 {v23.8b}, [x0], x3
|
||||
|
||||
ret
|
||||
endfunc
|
||||
@@ -411,19 +411,19 @@ endfunc
|
||||
|
||||
function put_h264_qpel8_hv_lowpass_neon_top
|
||||
lowpass_const w12
|
||||
ld1 {v16.8H}, [x1], x3
|
||||
ld1 {v17.8H}, [x1], x3
|
||||
ld1 {v18.8H}, [x1], x3
|
||||
ld1 {v19.8H}, [x1], x3
|
||||
ld1 {v20.8H}, [x1], x3
|
||||
ld1 {v21.8H}, [x1], x3
|
||||
ld1 {v22.8H}, [x1], x3
|
||||
ld1 {v23.8H}, [x1], x3
|
||||
ld1 {v24.8H}, [x1], x3
|
||||
ld1 {v25.8H}, [x1], x3
|
||||
ld1 {v26.8H}, [x1], x3
|
||||
ld1 {v27.8H}, [x1], x3
|
||||
ld1 {v28.8H}, [x1]
|
||||
ld1 {v16.8h}, [x1], x3
|
||||
ld1 {v17.8h}, [x1], x3
|
||||
ld1 {v18.8h}, [x1], x3
|
||||
ld1 {v19.8h}, [x1], x3
|
||||
ld1 {v20.8h}, [x1], x3
|
||||
ld1 {v21.8h}, [x1], x3
|
||||
ld1 {v22.8h}, [x1], x3
|
||||
ld1 {v23.8h}, [x1], x3
|
||||
ld1 {v24.8h}, [x1], x3
|
||||
ld1 {v25.8h}, [x1], x3
|
||||
ld1 {v26.8h}, [x1], x3
|
||||
ld1 {v27.8h}, [x1], x3
|
||||
ld1 {v28.8h}, [x1]
|
||||
lowpass_8H v16, v17
|
||||
lowpass_8H v18, v19
|
||||
lowpass_8H v20, v21
|
||||
@@ -447,7 +447,7 @@ function put_h264_qpel8_hv_lowpass_neon_top
|
||||
lowpass_8.16 v22, v30, v22
|
||||
lowpass_8.16 v23, v31, v23
|
||||
|
||||
transpose_8x8B v16, v17, v18, v19, v20, v21, v22, v23, v0, v1
|
||||
transpose_8x8B v16, v17, v18, v19, v20, v21, v22, v23, v0, v1
|
||||
|
||||
ret
|
||||
endfunc
|
||||
@@ -457,33 +457,33 @@ function \type\()_h264_qpel8_hv_lowpass_neon
|
||||
mov x10, x30
|
||||
bl put_h264_qpel8_hv_lowpass_neon_top
|
||||
.ifc \type,avg
|
||||
ld1 {v0.8B}, [x0], x2
|
||||
urhadd v16.8B, v16.8B, v0.8B
|
||||
ld1 {v1.8B}, [x0], x2
|
||||
urhadd v17.8B, v17.8B, v1.8B
|
||||
ld1 {v2.8B}, [x0], x2
|
||||
urhadd v18.8B, v18.8B, v2.8B
|
||||
ld1 {v3.8B}, [x0], x2
|
||||
urhadd v19.8B, v19.8B, v3.8B
|
||||
ld1 {v4.8B}, [x0], x2
|
||||
urhadd v20.8B, v20.8B, v4.8B
|
||||
ld1 {v5.8B}, [x0], x2
|
||||
urhadd v21.8B, v21.8B, v5.8B
|
||||
ld1 {v6.8B}, [x0], x2
|
||||
urhadd v22.8B, v22.8B, v6.8B
|
||||
ld1 {v7.8B}, [x0], x2
|
||||
urhadd v23.8B, v23.8B, v7.8B
|
||||
ld1 {v0.8b}, [x0], x2
|
||||
urhadd v16.8b, v16.8b, v0.8b
|
||||
ld1 {v1.8b}, [x0], x2
|
||||
urhadd v17.8b, v17.8b, v1.8b
|
||||
ld1 {v2.8b}, [x0], x2
|
||||
urhadd v18.8b, v18.8b, v2.8b
|
||||
ld1 {v3.8b}, [x0], x2
|
||||
urhadd v19.8b, v19.8b, v3.8b
|
||||
ld1 {v4.8b}, [x0], x2
|
||||
urhadd v20.8b, v20.8b, v4.8b
|
||||
ld1 {v5.8b}, [x0], x2
|
||||
urhadd v21.8b, v21.8b, v5.8b
|
||||
ld1 {v6.8b}, [x0], x2
|
||||
urhadd v22.8b, v22.8b, v6.8b
|
||||
ld1 {v7.8b}, [x0], x2
|
||||
urhadd v23.8b, v23.8b, v7.8b
|
||||
sub x0, x0, x2, lsl #3
|
||||
.endif
|
||||
|
||||
st1 {v16.8B}, [x0], x2
|
||||
st1 {v17.8B}, [x0], x2
|
||||
st1 {v18.8B}, [x0], x2
|
||||
st1 {v19.8B}, [x0], x2
|
||||
st1 {v20.8B}, [x0], x2
|
||||
st1 {v21.8B}, [x0], x2
|
||||
st1 {v22.8B}, [x0], x2
|
||||
st1 {v23.8B}, [x0], x2
|
||||
st1 {v16.8b}, [x0], x2
|
||||
st1 {v17.8b}, [x0], x2
|
||||
st1 {v18.8b}, [x0], x2
|
||||
st1 {v19.8b}, [x0], x2
|
||||
st1 {v20.8b}, [x0], x2
|
||||
st1 {v21.8b}, [x0], x2
|
||||
st1 {v22.8b}, [x0], x2
|
||||
st1 {v23.8b}, [x0], x2
|
||||
|
||||
ret x10
|
||||
endfunc
|
||||
@@ -497,45 +497,45 @@ function \type\()_h264_qpel8_hv_lowpass_l2_neon
|
||||
mov x10, x30
|
||||
bl put_h264_qpel8_hv_lowpass_neon_top
|
||||
|
||||
ld1 {v0.8B, v1.8B}, [x2], #16
|
||||
ld1 {v2.8B, v3.8B}, [x2], #16
|
||||
urhadd v0.8B, v0.8B, v16.8B
|
||||
urhadd v1.8B, v1.8B, v17.8B
|
||||
ld1 {v4.8B, v5.8B}, [x2], #16
|
||||
urhadd v2.8B, v2.8B, v18.8B
|
||||
urhadd v3.8B, v3.8B, v19.8B
|
||||
ld1 {v6.8B, v7.8B}, [x2], #16
|
||||
urhadd v4.8B, v4.8B, v20.8B
|
||||
urhadd v5.8B, v5.8B, v21.8B
|
||||
urhadd v6.8B, v6.8B, v22.8B
|
||||
urhadd v7.8B, v7.8B, v23.8B
|
||||
ld1 {v0.8b, v1.8b}, [x2], #16
|
||||
ld1 {v2.8b, v3.8b}, [x2], #16
|
||||
urhadd v0.8b, v0.8b, v16.8b
|
||||
urhadd v1.8b, v1.8b, v17.8b
|
||||
ld1 {v4.8b, v5.8b}, [x2], #16
|
||||
urhadd v2.8b, v2.8b, v18.8b
|
||||
urhadd v3.8b, v3.8b, v19.8b
|
||||
ld1 {v6.8b, v7.8b}, [x2], #16
|
||||
urhadd v4.8b, v4.8b, v20.8b
|
||||
urhadd v5.8b, v5.8b, v21.8b
|
||||
urhadd v6.8b, v6.8b, v22.8b
|
||||
urhadd v7.8b, v7.8b, v23.8b
|
||||
.ifc \type,avg
|
||||
ld1 {v16.8B}, [x0], x3
|
||||
urhadd v0.8B, v0.8B, v16.8B
|
||||
ld1 {v17.8B}, [x0], x3
|
||||
urhadd v1.8B, v1.8B, v17.8B
|
||||
ld1 {v18.8B}, [x0], x3
|
||||
urhadd v2.8B, v2.8B, v18.8B
|
||||
ld1 {v19.8B}, [x0], x3
|
||||
urhadd v3.8B, v3.8B, v19.8B
|
||||
ld1 {v20.8B}, [x0], x3
|
||||
urhadd v4.8B, v4.8B, v20.8B
|
||||
ld1 {v21.8B}, [x0], x3
|
||||
urhadd v5.8B, v5.8B, v21.8B
|
||||
ld1 {v22.8B}, [x0], x3
|
||||
urhadd v6.8B, v6.8B, v22.8B
|
||||
ld1 {v23.8B}, [x0], x3
|
||||
urhadd v7.8B, v7.8B, v23.8B
|
||||
ld1 {v16.8b}, [x0], x3
|
||||
urhadd v0.8b, v0.8b, v16.8b
|
||||
ld1 {v17.8b}, [x0], x3
|
||||
urhadd v1.8b, v1.8b, v17.8b
|
||||
ld1 {v18.8b}, [x0], x3
|
||||
urhadd v2.8b, v2.8b, v18.8b
|
||||
ld1 {v19.8b}, [x0], x3
|
||||
urhadd v3.8b, v3.8b, v19.8b
|
||||
ld1 {v20.8b}, [x0], x3
|
||||
urhadd v4.8b, v4.8b, v20.8b
|
||||
ld1 {v21.8b}, [x0], x3
|
||||
urhadd v5.8b, v5.8b, v21.8b
|
||||
ld1 {v22.8b}, [x0], x3
|
||||
urhadd v6.8b, v6.8b, v22.8b
|
||||
ld1 {v23.8b}, [x0], x3
|
||||
urhadd v7.8b, v7.8b, v23.8b
|
||||
sub x0, x0, x3, lsl #3
|
||||
.endif
|
||||
st1 {v0.8B}, [x0], x3
|
||||
st1 {v1.8B}, [x0], x3
|
||||
st1 {v2.8B}, [x0], x3
|
||||
st1 {v3.8B}, [x0], x3
|
||||
st1 {v4.8B}, [x0], x3
|
||||
st1 {v5.8B}, [x0], x3
|
||||
st1 {v6.8B}, [x0], x3
|
||||
st1 {v7.8B}, [x0], x3
|
||||
st1 {v0.8b}, [x0], x3
|
||||
st1 {v1.8b}, [x0], x3
|
||||
st1 {v2.8b}, [x0], x3
|
||||
st1 {v3.8b}, [x0], x3
|
||||
st1 {v4.8b}, [x0], x3
|
||||
st1 {v5.8b}, [x0], x3
|
||||
st1 {v6.8b}, [x0], x3
|
||||
st1 {v7.8b}, [x0], x3
|
||||
|
||||
ret x10
|
||||
endfunc
|
||||
@@ -579,8 +579,8 @@ function \type\()_h264_qpel16_hv_lowpass_l2_neon
|
||||
endfunc
|
||||
.endm
|
||||
|
||||
h264_qpel16_hv put
|
||||
h264_qpel16_hv avg
|
||||
h264_qpel16_hv put
|
||||
h264_qpel16_hv avg
|
||||
|
||||
.macro h264_qpel8 type
|
||||
function ff_\type\()_h264_qpel8_mc10_neon, export=1
|
||||
@@ -758,8 +758,8 @@ function ff_\type\()_h264_qpel8_mc33_neon, export=1
|
||||
endfunc
|
||||
.endm
|
||||
|
||||
h264_qpel8 put
|
||||
h264_qpel8 avg
|
||||
h264_qpel8 put
|
||||
h264_qpel8 avg
|
||||
|
||||
.macro h264_qpel16 type
|
||||
function ff_\type\()_h264_qpel16_mc10_neon, export=1
|
||||
@@ -930,5 +930,5 @@ function ff_\type\()_h264_qpel16_mc33_neon, export=1
|
||||
endfunc
|
||||
.endm
|
||||
|
||||
h264_qpel16 put
|
||||
h264_qpel16 avg
|
||||
h264_qpel16 put
|
||||
h264_qpel16 avg
|
||||
|
||||
+181
-181
@@ -26,295 +26,295 @@
|
||||
.if \avg
|
||||
mov x12, x0
|
||||
.endif
|
||||
1: ld1 {v0.16B}, [x1], x2
|
||||
ld1 {v1.16B}, [x1], x2
|
||||
ld1 {v2.16B}, [x1], x2
|
||||
ld1 {v3.16B}, [x1], x2
|
||||
1: ld1 {v0.16b}, [x1], x2
|
||||
ld1 {v1.16b}, [x1], x2
|
||||
ld1 {v2.16b}, [x1], x2
|
||||
ld1 {v3.16b}, [x1], x2
|
||||
.if \avg
|
||||
ld1 {v4.16B}, [x12], x2
|
||||
urhadd v0.16B, v0.16B, v4.16B
|
||||
ld1 {v5.16B}, [x12], x2
|
||||
urhadd v1.16B, v1.16B, v5.16B
|
||||
ld1 {v6.16B}, [x12], x2
|
||||
urhadd v2.16B, v2.16B, v6.16B
|
||||
ld1 {v7.16B}, [x12], x2
|
||||
urhadd v3.16B, v3.16B, v7.16B
|
||||
ld1 {v4.16b}, [x12], x2
|
||||
urhadd v0.16b, v0.16b, v4.16b
|
||||
ld1 {v5.16b}, [x12], x2
|
||||
urhadd v1.16b, v1.16b, v5.16b
|
||||
ld1 {v6.16b}, [x12], x2
|
||||
urhadd v2.16b, v2.16b, v6.16b
|
||||
ld1 {v7.16b}, [x12], x2
|
||||
urhadd v3.16b, v3.16b, v7.16b
|
||||
.endif
|
||||
subs w3, w3, #4
|
||||
st1 {v0.16B}, [x0], x2
|
||||
st1 {v1.16B}, [x0], x2
|
||||
st1 {v2.16B}, [x0], x2
|
||||
st1 {v3.16B}, [x0], x2
|
||||
st1 {v0.16b}, [x0], x2
|
||||
st1 {v1.16b}, [x0], x2
|
||||
st1 {v2.16b}, [x0], x2
|
||||
st1 {v3.16b}, [x0], x2
|
||||
b.ne 1b
|
||||
ret
|
||||
.endm
|
||||
|
||||
.macro pixels16_x2 rnd=1, avg=0
|
||||
1: ld1 {v0.16B, v1.16B}, [x1], x2
|
||||
ld1 {v2.16B, v3.16B}, [x1], x2
|
||||
1: ld1 {v0.16b, v1.16b}, [x1], x2
|
||||
ld1 {v2.16b, v3.16b}, [x1], x2
|
||||
subs w3, w3, #2
|
||||
ext v1.16B, v0.16B, v1.16B, #1
|
||||
avg v0.16B, v0.16B, v1.16B
|
||||
ext v3.16B, v2.16B, v3.16B, #1
|
||||
avg v2.16B, v2.16B, v3.16B
|
||||
ext v1.16b, v0.16b, v1.16b, #1
|
||||
avg v0.16b, v0.16b, v1.16b
|
||||
ext v3.16b, v2.16b, v3.16b, #1
|
||||
avg v2.16b, v2.16b, v3.16b
|
||||
.if \avg
|
||||
ld1 {v1.16B}, [x0], x2
|
||||
ld1 {v3.16B}, [x0]
|
||||
urhadd v0.16B, v0.16B, v1.16B
|
||||
urhadd v2.16B, v2.16B, v3.16B
|
||||
ld1 {v1.16b}, [x0], x2
|
||||
ld1 {v3.16b}, [x0]
|
||||
urhadd v0.16b, v0.16b, v1.16b
|
||||
urhadd v2.16b, v2.16b, v3.16b
|
||||
sub x0, x0, x2
|
||||
.endif
|
||||
st1 {v0.16B}, [x0], x2
|
||||
st1 {v2.16B}, [x0], x2
|
||||
st1 {v0.16b}, [x0], x2
|
||||
st1 {v2.16b}, [x0], x2
|
||||
b.ne 1b
|
||||
ret
|
||||
.endm
|
||||
|
||||
.macro pixels16_y2 rnd=1, avg=0
|
||||
sub w3, w3, #2
|
||||
ld1 {v0.16B}, [x1], x2
|
||||
ld1 {v1.16B}, [x1], x2
|
||||
ld1 {v0.16b}, [x1], x2
|
||||
ld1 {v1.16b}, [x1], x2
|
||||
1: subs w3, w3, #2
|
||||
avg v2.16B, v0.16B, v1.16B
|
||||
ld1 {v0.16B}, [x1], x2
|
||||
avg v3.16B, v0.16B, v1.16B
|
||||
ld1 {v1.16B}, [x1], x2
|
||||
avg v2.16b, v0.16b, v1.16b
|
||||
ld1 {v0.16b}, [x1], x2
|
||||
avg v3.16b, v0.16b, v1.16b
|
||||
ld1 {v1.16b}, [x1], x2
|
||||
.if \avg
|
||||
ld1 {v4.16B}, [x0], x2
|
||||
ld1 {v5.16B}, [x0]
|
||||
urhadd v2.16B, v2.16B, v4.16B
|
||||
urhadd v3.16B, v3.16B, v5.16B
|
||||
ld1 {v4.16b}, [x0], x2
|
||||
ld1 {v5.16b}, [x0]
|
||||
urhadd v2.16b, v2.16b, v4.16b
|
||||
urhadd v3.16b, v3.16b, v5.16b
|
||||
sub x0, x0, x2
|
||||
.endif
|
||||
st1 {v2.16B}, [x0], x2
|
||||
st1 {v3.16B}, [x0], x2
|
||||
st1 {v2.16b}, [x0], x2
|
||||
st1 {v3.16b}, [x0], x2
|
||||
b.ne 1b
|
||||
|
||||
avg v2.16B, v0.16B, v1.16B
|
||||
ld1 {v0.16B}, [x1], x2
|
||||
avg v3.16B, v0.16B, v1.16B
|
||||
avg v2.16b, v0.16b, v1.16b
|
||||
ld1 {v0.16b}, [x1], x2
|
||||
avg v3.16b, v0.16b, v1.16b
|
||||
.if \avg
|
||||
ld1 {v4.16B}, [x0], x2
|
||||
ld1 {v5.16B}, [x0]
|
||||
urhadd v2.16B, v2.16B, v4.16B
|
||||
urhadd v3.16B, v3.16B, v5.16B
|
||||
ld1 {v4.16b}, [x0], x2
|
||||
ld1 {v5.16b}, [x0]
|
||||
urhadd v2.16b, v2.16b, v4.16b
|
||||
urhadd v3.16b, v3.16b, v5.16b
|
||||
sub x0, x0, x2
|
||||
.endif
|
||||
st1 {v2.16B}, [x0], x2
|
||||
st1 {v3.16B}, [x0], x2
|
||||
st1 {v2.16b}, [x0], x2
|
||||
st1 {v3.16b}, [x0], x2
|
||||
|
||||
ret
|
||||
.endm
|
||||
|
||||
.macro pixels16_xy2 rnd=1, avg=0
|
||||
sub w3, w3, #2
|
||||
ld1 {v0.16B, v1.16B}, [x1], x2
|
||||
ld1 {v4.16B, v5.16B}, [x1], x2
|
||||
ld1 {v0.16b, v1.16b}, [x1], x2
|
||||
ld1 {v4.16b, v5.16b}, [x1], x2
|
||||
NRND movi v26.8H, #1
|
||||
ext v1.16B, v0.16B, v1.16B, #1
|
||||
ext v5.16B, v4.16B, v5.16B, #1
|
||||
uaddl v16.8H, v0.8B, v1.8B
|
||||
uaddl2 v20.8H, v0.16B, v1.16B
|
||||
uaddl v18.8H, v4.8B, v5.8B
|
||||
uaddl2 v22.8H, v4.16B, v5.16B
|
||||
ext v1.16b, v0.16b, v1.16b, #1
|
||||
ext v5.16b, v4.16b, v5.16b, #1
|
||||
uaddl v16.8h, v0.8b, v1.8b
|
||||
uaddl2 v20.8h, v0.16b, v1.16b
|
||||
uaddl v18.8h, v4.8b, v5.8b
|
||||
uaddl2 v22.8h, v4.16b, v5.16b
|
||||
1: subs w3, w3, #2
|
||||
ld1 {v0.16B, v1.16B}, [x1], x2
|
||||
add v24.8H, v16.8H, v18.8H
|
||||
ld1 {v0.16b, v1.16b}, [x1], x2
|
||||
add v24.8h, v16.8h, v18.8h
|
||||
NRND add v24.8H, v24.8H, v26.8H
|
||||
ext v30.16B, v0.16B, v1.16B, #1
|
||||
add v1.8H, v20.8H, v22.8H
|
||||
mshrn v28.8B, v24.8H, #2
|
||||
ext v30.16b, v0.16b, v1.16b, #1
|
||||
add v1.8h, v20.8h, v22.8h
|
||||
mshrn v28.8b, v24.8h, #2
|
||||
NRND add v1.8H, v1.8H, v26.8H
|
||||
mshrn2 v28.16B, v1.8H, #2
|
||||
mshrn2 v28.16b, v1.8h, #2
|
||||
.if \avg
|
||||
ld1 {v16.16B}, [x0]
|
||||
urhadd v28.16B, v28.16B, v16.16B
|
||||
ld1 {v16.16b}, [x0]
|
||||
urhadd v28.16b, v28.16b, v16.16b
|
||||
.endif
|
||||
uaddl v16.8H, v0.8B, v30.8B
|
||||
ld1 {v2.16B, v3.16B}, [x1], x2
|
||||
uaddl2 v20.8H, v0.16B, v30.16B
|
||||
st1 {v28.16B}, [x0], x2
|
||||
add v24.8H, v16.8H, v18.8H
|
||||
uaddl v16.8h, v0.8b, v30.8b
|
||||
ld1 {v2.16b, v3.16b}, [x1], x2
|
||||
uaddl2 v20.8h, v0.16b, v30.16b
|
||||
st1 {v28.16b}, [x0], x2
|
||||
add v24.8h, v16.8h, v18.8h
|
||||
NRND add v24.8H, v24.8H, v26.8H
|
||||
ext v3.16B, v2.16B, v3.16B, #1
|
||||
add v0.8H, v20.8H, v22.8H
|
||||
mshrn v30.8B, v24.8H, #2
|
||||
ext v3.16b, v2.16b, v3.16b, #1
|
||||
add v0.8h, v20.8h, v22.8h
|
||||
mshrn v30.8b, v24.8h, #2
|
||||
NRND add v0.8H, v0.8H, v26.8H
|
||||
mshrn2 v30.16B, v0.8H, #2
|
||||
mshrn2 v30.16b, v0.8h, #2
|
||||
.if \avg
|
||||
ld1 {v18.16B}, [x0]
|
||||
urhadd v30.16B, v30.16B, v18.16B
|
||||
ld1 {v18.16b}, [x0]
|
||||
urhadd v30.16b, v30.16b, v18.16b
|
||||
.endif
|
||||
uaddl v18.8H, v2.8B, v3.8B
|
||||
uaddl2 v22.8H, v2.16B, v3.16B
|
||||
st1 {v30.16B}, [x0], x2
|
||||
uaddl v18.8h, v2.8b, v3.8b
|
||||
uaddl2 v22.8h, v2.16b, v3.16b
|
||||
st1 {v30.16b}, [x0], x2
|
||||
b.gt 1b
|
||||
|
||||
ld1 {v0.16B, v1.16B}, [x1], x2
|
||||
add v24.8H, v16.8H, v18.8H
|
||||
ld1 {v0.16b, v1.16b}, [x1], x2
|
||||
add v24.8h, v16.8h, v18.8h
|
||||
NRND add v24.8H, v24.8H, v26.8H
|
||||
ext v30.16B, v0.16B, v1.16B, #1
|
||||
add v1.8H, v20.8H, v22.8H
|
||||
mshrn v28.8B, v24.8H, #2
|
||||
ext v30.16b, v0.16b, v1.16b, #1
|
||||
add v1.8h, v20.8h, v22.8h
|
||||
mshrn v28.8b, v24.8h, #2
|
||||
NRND add v1.8H, v1.8H, v26.8H
|
||||
mshrn2 v28.16B, v1.8H, #2
|
||||
mshrn2 v28.16b, v1.8h, #2
|
||||
.if \avg
|
||||
ld1 {v16.16B}, [x0]
|
||||
urhadd v28.16B, v28.16B, v16.16B
|
||||
ld1 {v16.16b}, [x0]
|
||||
urhadd v28.16b, v28.16b, v16.16b
|
||||
.endif
|
||||
uaddl v16.8H, v0.8B, v30.8B
|
||||
uaddl2 v20.8H, v0.16B, v30.16B
|
||||
st1 {v28.16B}, [x0], x2
|
||||
add v24.8H, v16.8H, v18.8H
|
||||
uaddl v16.8h, v0.8b, v30.8b
|
||||
uaddl2 v20.8h, v0.16b, v30.16b
|
||||
st1 {v28.16b}, [x0], x2
|
||||
add v24.8h, v16.8h, v18.8h
|
||||
NRND add v24.8H, v24.8H, v26.8H
|
||||
add v0.8H, v20.8H, v22.8H
|
||||
mshrn v30.8B, v24.8H, #2
|
||||
add v0.8h, v20.8h, v22.8h
|
||||
mshrn v30.8b, v24.8h, #2
|
||||
NRND add v0.8H, v0.8H, v26.8H
|
||||
mshrn2 v30.16B, v0.8H, #2
|
||||
mshrn2 v30.16b, v0.8h, #2
|
||||
.if \avg
|
||||
ld1 {v18.16B}, [x0]
|
||||
urhadd v30.16B, v30.16B, v18.16B
|
||||
ld1 {v18.16b}, [x0]
|
||||
urhadd v30.16b, v30.16b, v18.16b
|
||||
.endif
|
||||
st1 {v30.16B}, [x0], x2
|
||||
st1 {v30.16b}, [x0], x2
|
||||
|
||||
ret
|
||||
.endm
|
||||
|
||||
.macro pixels8 rnd=1, avg=0
|
||||
1: ld1 {v0.8B}, [x1], x2
|
||||
ld1 {v1.8B}, [x1], x2
|
||||
ld1 {v2.8B}, [x1], x2
|
||||
ld1 {v3.8B}, [x1], x2
|
||||
1: ld1 {v0.8b}, [x1], x2
|
||||
ld1 {v1.8b}, [x1], x2
|
||||
ld1 {v2.8b}, [x1], x2
|
||||
ld1 {v3.8b}, [x1], x2
|
||||
.if \avg
|
||||
ld1 {v4.8B}, [x0], x2
|
||||
urhadd v0.8B, v0.8B, v4.8B
|
||||
ld1 {v5.8B}, [x0], x2
|
||||
urhadd v1.8B, v1.8B, v5.8B
|
||||
ld1 {v6.8B}, [x0], x2
|
||||
urhadd v2.8B, v2.8B, v6.8B
|
||||
ld1 {v7.8B}, [x0], x2
|
||||
urhadd v3.8B, v3.8B, v7.8B
|
||||
ld1 {v4.8b}, [x0], x2
|
||||
urhadd v0.8b, v0.8b, v4.8b
|
||||
ld1 {v5.8b}, [x0], x2
|
||||
urhadd v1.8b, v1.8b, v5.8b
|
||||
ld1 {v6.8b}, [x0], x2
|
||||
urhadd v2.8b, v2.8b, v6.8b
|
||||
ld1 {v7.8b}, [x0], x2
|
||||
urhadd v3.8b, v3.8b, v7.8b
|
||||
sub x0, x0, x2, lsl #2
|
||||
.endif
|
||||
subs w3, w3, #4
|
||||
st1 {v0.8B}, [x0], x2
|
||||
st1 {v1.8B}, [x0], x2
|
||||
st1 {v2.8B}, [x0], x2
|
||||
st1 {v3.8B}, [x0], x2
|
||||
st1 {v0.8b}, [x0], x2
|
||||
st1 {v1.8b}, [x0], x2
|
||||
st1 {v2.8b}, [x0], x2
|
||||
st1 {v3.8b}, [x0], x2
|
||||
b.ne 1b
|
||||
ret
|
||||
.endm
|
||||
|
||||
.macro pixels8_x2 rnd=1, avg=0
|
||||
1: ld1 {v0.8B, v1.8B}, [x1], x2
|
||||
ext v1.8B, v0.8B, v1.8B, #1
|
||||
ld1 {v2.8B, v3.8B}, [x1], x2
|
||||
ext v3.8B, v2.8B, v3.8B, #1
|
||||
1: ld1 {v0.8b, v1.8b}, [x1], x2
|
||||
ext v1.8b, v0.8b, v1.8b, #1
|
||||
ld1 {v2.8b, v3.8b}, [x1], x2
|
||||
ext v3.8b, v2.8b, v3.8b, #1
|
||||
subs w3, w3, #2
|
||||
avg v0.8B, v0.8B, v1.8B
|
||||
avg v2.8B, v2.8B, v3.8B
|
||||
avg v0.8b, v0.8b, v1.8b
|
||||
avg v2.8b, v2.8b, v3.8b
|
||||
.if \avg
|
||||
ld1 {v4.8B}, [x0], x2
|
||||
ld1 {v5.8B}, [x0]
|
||||
urhadd v0.8B, v0.8B, v4.8B
|
||||
urhadd v2.8B, v2.8B, v5.8B
|
||||
ld1 {v4.8b}, [x0], x2
|
||||
ld1 {v5.8b}, [x0]
|
||||
urhadd v0.8b, v0.8b, v4.8b
|
||||
urhadd v2.8b, v2.8b, v5.8b
|
||||
sub x0, x0, x2
|
||||
.endif
|
||||
st1 {v0.8B}, [x0], x2
|
||||
st1 {v2.8B}, [x0], x2
|
||||
st1 {v0.8b}, [x0], x2
|
||||
st1 {v2.8b}, [x0], x2
|
||||
b.ne 1b
|
||||
ret
|
||||
.endm
|
||||
|
||||
.macro pixels8_y2 rnd=1, avg=0
|
||||
sub w3, w3, #2
|
||||
ld1 {v0.8B}, [x1], x2
|
||||
ld1 {v1.8B}, [x1], x2
|
||||
ld1 {v0.8b}, [x1], x2
|
||||
ld1 {v1.8b}, [x1], x2
|
||||
1: subs w3, w3, #2
|
||||
avg v4.8B, v0.8B, v1.8B
|
||||
ld1 {v0.8B}, [x1], x2
|
||||
avg v5.8B, v0.8B, v1.8B
|
||||
ld1 {v1.8B}, [x1], x2
|
||||
avg v4.8b, v0.8b, v1.8b
|
||||
ld1 {v0.8b}, [x1], x2
|
||||
avg v5.8b, v0.8b, v1.8b
|
||||
ld1 {v1.8b}, [x1], x2
|
||||
.if \avg
|
||||
ld1 {v2.8B}, [x0], x2
|
||||
ld1 {v3.8B}, [x0]
|
||||
urhadd v4.8B, v4.8B, v2.8B
|
||||
urhadd v5.8B, v5.8B, v3.8B
|
||||
ld1 {v2.8b}, [x0], x2
|
||||
ld1 {v3.8b}, [x0]
|
||||
urhadd v4.8b, v4.8b, v2.8b
|
||||
urhadd v5.8b, v5.8b, v3.8b
|
||||
sub x0, x0, x2
|
||||
.endif
|
||||
st1 {v4.8B}, [x0], x2
|
||||
st1 {v5.8B}, [x0], x2
|
||||
st1 {v4.8b}, [x0], x2
|
||||
st1 {v5.8b}, [x0], x2
|
||||
b.ne 1b
|
||||
|
||||
avg v4.8B, v0.8B, v1.8B
|
||||
ld1 {v0.8B}, [x1], x2
|
||||
avg v5.8B, v0.8B, v1.8B
|
||||
avg v4.8b, v0.8b, v1.8b
|
||||
ld1 {v0.8b}, [x1], x2
|
||||
avg v5.8b, v0.8b, v1.8b
|
||||
.if \avg
|
||||
ld1 {v2.8B}, [x0], x2
|
||||
ld1 {v3.8B}, [x0]
|
||||
urhadd v4.8B, v4.8B, v2.8B
|
||||
urhadd v5.8B, v5.8B, v3.8B
|
||||
ld1 {v2.8b}, [x0], x2
|
||||
ld1 {v3.8b}, [x0]
|
||||
urhadd v4.8b, v4.8b, v2.8b
|
||||
urhadd v5.8b, v5.8b, v3.8b
|
||||
sub x0, x0, x2
|
||||
.endif
|
||||
st1 {v4.8B}, [x0], x2
|
||||
st1 {v5.8B}, [x0], x2
|
||||
st1 {v4.8b}, [x0], x2
|
||||
st1 {v5.8b}, [x0], x2
|
||||
|
||||
ret
|
||||
.endm
|
||||
|
||||
.macro pixels8_xy2 rnd=1, avg=0
|
||||
sub w3, w3, #2
|
||||
ld1 {v0.16B}, [x1], x2
|
||||
ld1 {v1.16B}, [x1], x2
|
||||
ld1 {v0.16b}, [x1], x2
|
||||
ld1 {v1.16b}, [x1], x2
|
||||
NRND movi v19.8H, #1
|
||||
ext v4.16B, v0.16B, v4.16B, #1
|
||||
ext v6.16B, v1.16B, v6.16B, #1
|
||||
uaddl v16.8H, v0.8B, v4.8B
|
||||
uaddl v17.8H, v1.8B, v6.8B
|
||||
ext v4.16b, v0.16b, v4.16b, #1
|
||||
ext v6.16b, v1.16b, v6.16b, #1
|
||||
uaddl v16.8h, v0.8b, v4.8b
|
||||
uaddl v17.8h, v1.8b, v6.8b
|
||||
1: subs w3, w3, #2
|
||||
ld1 {v0.16B}, [x1], x2
|
||||
add v18.8H, v16.8H, v17.8H
|
||||
ext v4.16B, v0.16B, v4.16B, #1
|
||||
ld1 {v0.16b}, [x1], x2
|
||||
add v18.8h, v16.8h, v17.8h
|
||||
ext v4.16b, v0.16b, v4.16b, #1
|
||||
NRND add v18.8H, v18.8H, v19.8H
|
||||
uaddl v16.8H, v0.8B, v4.8B
|
||||
mshrn v5.8B, v18.8H, #2
|
||||
ld1 {v1.16B}, [x1], x2
|
||||
add v18.8H, v16.8H, v17.8H
|
||||
uaddl v16.8h, v0.8b, v4.8b
|
||||
mshrn v5.8b, v18.8h, #2
|
||||
ld1 {v1.16b}, [x1], x2
|
||||
add v18.8h, v16.8h, v17.8h
|
||||
.if \avg
|
||||
ld1 {v7.8B}, [x0]
|
||||
urhadd v5.8B, v5.8B, v7.8B
|
||||
ld1 {v7.8b}, [x0]
|
||||
urhadd v5.8b, v5.8b, v7.8b
|
||||
.endif
|
||||
NRND add v18.8H, v18.8H, v19.8H
|
||||
st1 {v5.8B}, [x0], x2
|
||||
mshrn v7.8B, v18.8H, #2
|
||||
st1 {v5.8b}, [x0], x2
|
||||
mshrn v7.8b, v18.8h, #2
|
||||
.if \avg
|
||||
ld1 {v5.8B}, [x0]
|
||||
urhadd v7.8B, v7.8B, v5.8B
|
||||
ld1 {v5.8b}, [x0]
|
||||
urhadd v7.8b, v7.8b, v5.8b
|
||||
.endif
|
||||
ext v6.16B, v1.16B, v6.16B, #1
|
||||
uaddl v17.8H, v1.8B, v6.8B
|
||||
st1 {v7.8B}, [x0], x2
|
||||
ext v6.16b, v1.16b, v6.16b, #1
|
||||
uaddl v17.8h, v1.8b, v6.8b
|
||||
st1 {v7.8b}, [x0], x2
|
||||
b.gt 1b
|
||||
|
||||
ld1 {v0.16B}, [x1], x2
|
||||
add v18.8H, v16.8H, v17.8H
|
||||
ext v4.16B, v0.16B, v4.16B, #1
|
||||
ld1 {v0.16b}, [x1], x2
|
||||
add v18.8h, v16.8h, v17.8h
|
||||
ext v4.16b, v0.16b, v4.16b, #1
|
||||
NRND add v18.8H, v18.8H, v19.8H
|
||||
uaddl v16.8H, v0.8B, v4.8B
|
||||
mshrn v5.8B, v18.8H, #2
|
||||
add v18.8H, v16.8H, v17.8H
|
||||
uaddl v16.8h, v0.8b, v4.8b
|
||||
mshrn v5.8b, v18.8h, #2
|
||||
add v18.8h, v16.8h, v17.8h
|
||||
.if \avg
|
||||
ld1 {v7.8B}, [x0]
|
||||
urhadd v5.8B, v5.8B, v7.8B
|
||||
ld1 {v7.8b}, [x0]
|
||||
urhadd v5.8b, v5.8b, v7.8b
|
||||
.endif
|
||||
NRND add v18.8H, v18.8H, v19.8H
|
||||
st1 {v5.8B}, [x0], x2
|
||||
mshrn v7.8B, v18.8H, #2
|
||||
st1 {v5.8b}, [x0], x2
|
||||
mshrn v7.8b, v18.8h, #2
|
||||
.if \avg
|
||||
ld1 {v5.8B}, [x0]
|
||||
urhadd v7.8B, v7.8B, v5.8B
|
||||
ld1 {v5.8b}, [x0]
|
||||
urhadd v7.8b, v7.8b, v5.8b
|
||||
.endif
|
||||
st1 {v7.8B}, [x0], x2
|
||||
st1 {v7.8b}, [x0], x2
|
||||
|
||||
ret
|
||||
.endm
|
||||
|
||||
@@ -19,6 +19,7 @@
|
||||
#ifndef AVCODEC_AARCH64_IDCT_H
|
||||
#define AVCODEC_AARCH64_IDCT_H
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
|
||||
void ff_simple_idct_neon(int16_t *data);
|
||||
|
||||
+96
-96
@@ -17,133 +17,133 @@
|
||||
*/
|
||||
|
||||
.macro transpose_8x8B r0, r1, r2, r3, r4, r5, r6, r7, r8, r9
|
||||
trn1 \r8\().8B, \r0\().8B, \r1\().8B
|
||||
trn2 \r9\().8B, \r0\().8B, \r1\().8B
|
||||
trn1 \r1\().8B, \r2\().8B, \r3\().8B
|
||||
trn2 \r3\().8B, \r2\().8B, \r3\().8B
|
||||
trn1 \r0\().8B, \r4\().8B, \r5\().8B
|
||||
trn2 \r5\().8B, \r4\().8B, \r5\().8B
|
||||
trn1 \r2\().8B, \r6\().8B, \r7\().8B
|
||||
trn2 \r7\().8B, \r6\().8B, \r7\().8B
|
||||
trn1 \r8\().8b, \r0\().8b, \r1\().8b
|
||||
trn2 \r9\().8b, \r0\().8b, \r1\().8b
|
||||
trn1 \r1\().8b, \r2\().8b, \r3\().8b
|
||||
trn2 \r3\().8b, \r2\().8b, \r3\().8b
|
||||
trn1 \r0\().8b, \r4\().8b, \r5\().8b
|
||||
trn2 \r5\().8b, \r4\().8b, \r5\().8b
|
||||
trn1 \r2\().8b, \r6\().8b, \r7\().8b
|
||||
trn2 \r7\().8b, \r6\().8b, \r7\().8b
|
||||
|
||||
trn1 \r4\().4H, \r0\().4H, \r2\().4H
|
||||
trn2 \r2\().4H, \r0\().4H, \r2\().4H
|
||||
trn1 \r6\().4H, \r5\().4H, \r7\().4H
|
||||
trn2 \r7\().4H, \r5\().4H, \r7\().4H
|
||||
trn1 \r5\().4H, \r9\().4H, \r3\().4H
|
||||
trn2 \r9\().4H, \r9\().4H, \r3\().4H
|
||||
trn1 \r3\().4H, \r8\().4H, \r1\().4H
|
||||
trn2 \r8\().4H, \r8\().4H, \r1\().4H
|
||||
trn1 \r4\().4h, \r0\().4h, \r2\().4h
|
||||
trn2 \r2\().4h, \r0\().4h, \r2\().4h
|
||||
trn1 \r6\().4h, \r5\().4h, \r7\().4h
|
||||
trn2 \r7\().4h, \r5\().4h, \r7\().4h
|
||||
trn1 \r5\().4h, \r9\().4h, \r3\().4h
|
||||
trn2 \r9\().4h, \r9\().4h, \r3\().4h
|
||||
trn1 \r3\().4h, \r8\().4h, \r1\().4h
|
||||
trn2 \r8\().4h, \r8\().4h, \r1\().4h
|
||||
|
||||
trn1 \r0\().2S, \r3\().2S, \r4\().2S
|
||||
trn2 \r4\().2S, \r3\().2S, \r4\().2S
|
||||
trn1 \r0\().2s, \r3\().2s, \r4\().2s
|
||||
trn2 \r4\().2s, \r3\().2s, \r4\().2s
|
||||
|
||||
trn1 \r1\().2S, \r5\().2S, \r6\().2S
|
||||
trn2 \r5\().2S, \r5\().2S, \r6\().2S
|
||||
trn1 \r1\().2s, \r5\().2s, \r6\().2s
|
||||
trn2 \r5\().2s, \r5\().2s, \r6\().2s
|
||||
|
||||
trn2 \r6\().2S, \r8\().2S, \r2\().2S
|
||||
trn1 \r2\().2S, \r8\().2S, \r2\().2S
|
||||
trn2 \r6\().2s, \r8\().2s, \r2\().2s
|
||||
trn1 \r2\().2s, \r8\().2s, \r2\().2s
|
||||
|
||||
trn1 \r3\().2S, \r9\().2S, \r7\().2S
|
||||
trn2 \r7\().2S, \r9\().2S, \r7\().2S
|
||||
trn1 \r3\().2s, \r9\().2s, \r7\().2s
|
||||
trn2 \r7\().2s, \r9\().2s, \r7\().2s
|
||||
.endm
|
||||
|
||||
.macro transpose_8x16B r0, r1, r2, r3, r4, r5, r6, r7, t0, t1
|
||||
trn1 \t0\().16B, \r0\().16B, \r1\().16B
|
||||
trn2 \t1\().16B, \r0\().16B, \r1\().16B
|
||||
trn1 \r1\().16B, \r2\().16B, \r3\().16B
|
||||
trn2 \r3\().16B, \r2\().16B, \r3\().16B
|
||||
trn1 \r0\().16B, \r4\().16B, \r5\().16B
|
||||
trn2 \r5\().16B, \r4\().16B, \r5\().16B
|
||||
trn1 \r2\().16B, \r6\().16B, \r7\().16B
|
||||
trn2 \r7\().16B, \r6\().16B, \r7\().16B
|
||||
trn1 \t0\().16b, \r0\().16b, \r1\().16b
|
||||
trn2 \t1\().16b, \r0\().16b, \r1\().16b
|
||||
trn1 \r1\().16b, \r2\().16b, \r3\().16b
|
||||
trn2 \r3\().16b, \r2\().16b, \r3\().16b
|
||||
trn1 \r0\().16b, \r4\().16b, \r5\().16b
|
||||
trn2 \r5\().16b, \r4\().16b, \r5\().16b
|
||||
trn1 \r2\().16b, \r6\().16b, \r7\().16b
|
||||
trn2 \r7\().16b, \r6\().16b, \r7\().16b
|
||||
|
||||
trn1 \r4\().8H, \r0\().8H, \r2\().8H
|
||||
trn2 \r2\().8H, \r0\().8H, \r2\().8H
|
||||
trn1 \r6\().8H, \r5\().8H, \r7\().8H
|
||||
trn2 \r7\().8H, \r5\().8H, \r7\().8H
|
||||
trn1 \r5\().8H, \t1\().8H, \r3\().8H
|
||||
trn2 \t1\().8H, \t1\().8H, \r3\().8H
|
||||
trn1 \r3\().8H, \t0\().8H, \r1\().8H
|
||||
trn2 \t0\().8H, \t0\().8H, \r1\().8H
|
||||
trn1 \r4\().8h, \r0\().8h, \r2\().8h
|
||||
trn2 \r2\().8h, \r0\().8h, \r2\().8h
|
||||
trn1 \r6\().8h, \r5\().8h, \r7\().8h
|
||||
trn2 \r7\().8h, \r5\().8h, \r7\().8h
|
||||
trn1 \r5\().8h, \t1\().8h, \r3\().8h
|
||||
trn2 \t1\().8h, \t1\().8h, \r3\().8h
|
||||
trn1 \r3\().8h, \t0\().8h, \r1\().8h
|
||||
trn2 \t0\().8h, \t0\().8h, \r1\().8h
|
||||
|
||||
trn1 \r0\().4S, \r3\().4S, \r4\().4S
|
||||
trn2 \r4\().4S, \r3\().4S, \r4\().4S
|
||||
trn1 \r0\().4s, \r3\().4s, \r4\().4s
|
||||
trn2 \r4\().4s, \r3\().4s, \r4\().4s
|
||||
|
||||
trn1 \r1\().4S, \r5\().4S, \r6\().4S
|
||||
trn2 \r5\().4S, \r5\().4S, \r6\().4S
|
||||
trn1 \r1\().4s, \r5\().4s, \r6\().4s
|
||||
trn2 \r5\().4s, \r5\().4s, \r6\().4s
|
||||
|
||||
trn2 \r6\().4S, \t0\().4S, \r2\().4S
|
||||
trn1 \r2\().4S, \t0\().4S, \r2\().4S
|
||||
trn2 \r6\().4s, \t0\().4s, \r2\().4s
|
||||
trn1 \r2\().4s, \t0\().4s, \r2\().4s
|
||||
|
||||
trn1 \r3\().4S, \t1\().4S, \r7\().4S
|
||||
trn2 \r7\().4S, \t1\().4S, \r7\().4S
|
||||
trn1 \r3\().4s, \t1\().4s, \r7\().4s
|
||||
trn2 \r7\().4s, \t1\().4s, \r7\().4s
|
||||
.endm
|
||||
|
||||
.macro transpose_4x16B r0, r1, r2, r3, t4, t5, t6, t7
|
||||
trn1 \t4\().16B, \r0\().16B, \r1\().16B
|
||||
trn2 \t5\().16B, \r0\().16B, \r1\().16B
|
||||
trn1 \t6\().16B, \r2\().16B, \r3\().16B
|
||||
trn2 \t7\().16B, \r2\().16B, \r3\().16B
|
||||
trn1 \t4\().16b, \r0\().16b, \r1\().16b
|
||||
trn2 \t5\().16b, \r0\().16b, \r1\().16b
|
||||
trn1 \t6\().16b, \r2\().16b, \r3\().16b
|
||||
trn2 \t7\().16b, \r2\().16b, \r3\().16b
|
||||
|
||||
trn1 \r0\().8H, \t4\().8H, \t6\().8H
|
||||
trn2 \r2\().8H, \t4\().8H, \t6\().8H
|
||||
trn1 \r1\().8H, \t5\().8H, \t7\().8H
|
||||
trn2 \r3\().8H, \t5\().8H, \t7\().8H
|
||||
trn1 \r0\().8h, \t4\().8h, \t6\().8h
|
||||
trn2 \r2\().8h, \t4\().8h, \t6\().8h
|
||||
trn1 \r1\().8h, \t5\().8h, \t7\().8h
|
||||
trn2 \r3\().8h, \t5\().8h, \t7\().8h
|
||||
.endm
|
||||
|
||||
.macro transpose_4x8B r0, r1, r2, r3, t4, t5, t6, t7
|
||||
trn1 \t4\().8B, \r0\().8B, \r1\().8B
|
||||
trn2 \t5\().8B, \r0\().8B, \r1\().8B
|
||||
trn1 \t6\().8B, \r2\().8B, \r3\().8B
|
||||
trn2 \t7\().8B, \r2\().8B, \r3\().8B
|
||||
trn1 \t4\().8b, \r0\().8b, \r1\().8b
|
||||
trn2 \t5\().8b, \r0\().8b, \r1\().8b
|
||||
trn1 \t6\().8b, \r2\().8b, \r3\().8b
|
||||
trn2 \t7\().8b, \r2\().8b, \r3\().8b
|
||||
|
||||
trn1 \r0\().4H, \t4\().4H, \t6\().4H
|
||||
trn2 \r2\().4H, \t4\().4H, \t6\().4H
|
||||
trn1 \r1\().4H, \t5\().4H, \t7\().4H
|
||||
trn2 \r3\().4H, \t5\().4H, \t7\().4H
|
||||
trn1 \r0\().4h, \t4\().4h, \t6\().4h
|
||||
trn2 \r2\().4h, \t4\().4h, \t6\().4h
|
||||
trn1 \r1\().4h, \t5\().4h, \t7\().4h
|
||||
trn2 \r3\().4h, \t5\().4h, \t7\().4h
|
||||
.endm
|
||||
|
||||
.macro transpose_4x4H r0, r1, r2, r3, r4, r5, r6, r7
|
||||
trn1 \r4\().4H, \r0\().4H, \r1\().4H
|
||||
trn2 \r5\().4H, \r0\().4H, \r1\().4H
|
||||
trn1 \r6\().4H, \r2\().4H, \r3\().4H
|
||||
trn2 \r7\().4H, \r2\().4H, \r3\().4H
|
||||
trn1 \r0\().2S, \r4\().2S, \r6\().2S
|
||||
trn2 \r2\().2S, \r4\().2S, \r6\().2S
|
||||
trn1 \r1\().2S, \r5\().2S, \r7\().2S
|
||||
trn2 \r3\().2S, \r5\().2S, \r7\().2S
|
||||
trn1 \r4\().4h, \r0\().4h, \r1\().4h
|
||||
trn2 \r5\().4h, \r0\().4h, \r1\().4h
|
||||
trn1 \r6\().4h, \r2\().4h, \r3\().4h
|
||||
trn2 \r7\().4h, \r2\().4h, \r3\().4h
|
||||
trn1 \r0\().2s, \r4\().2s, \r6\().2s
|
||||
trn2 \r2\().2s, \r4\().2s, \r6\().2s
|
||||
trn1 \r1\().2s, \r5\().2s, \r7\().2s
|
||||
trn2 \r3\().2s, \r5\().2s, \r7\().2s
|
||||
.endm
|
||||
|
||||
.macro transpose_8x8H r0, r1, r2, r3, r4, r5, r6, r7, r8, r9
|
||||
trn1 \r8\().8H, \r0\().8H, \r1\().8H
|
||||
trn2 \r9\().8H, \r0\().8H, \r1\().8H
|
||||
trn1 \r1\().8H, \r2\().8H, \r3\().8H
|
||||
trn2 \r3\().8H, \r2\().8H, \r3\().8H
|
||||
trn1 \r0\().8H, \r4\().8H, \r5\().8H
|
||||
trn2 \r5\().8H, \r4\().8H, \r5\().8H
|
||||
trn1 \r2\().8H, \r6\().8H, \r7\().8H
|
||||
trn2 \r7\().8H, \r6\().8H, \r7\().8H
|
||||
trn1 \r8\().8h, \r0\().8h, \r1\().8h
|
||||
trn2 \r9\().8h, \r0\().8h, \r1\().8h
|
||||
trn1 \r1\().8h, \r2\().8h, \r3\().8h
|
||||
trn2 \r3\().8h, \r2\().8h, \r3\().8h
|
||||
trn1 \r0\().8h, \r4\().8h, \r5\().8h
|
||||
trn2 \r5\().8h, \r4\().8h, \r5\().8h
|
||||
trn1 \r2\().8h, \r6\().8h, \r7\().8h
|
||||
trn2 \r7\().8h, \r6\().8h, \r7\().8h
|
||||
|
||||
trn1 \r4\().4S, \r0\().4S, \r2\().4S
|
||||
trn2 \r2\().4S, \r0\().4S, \r2\().4S
|
||||
trn1 \r6\().4S, \r5\().4S, \r7\().4S
|
||||
trn2 \r7\().4S, \r5\().4S, \r7\().4S
|
||||
trn1 \r5\().4S, \r9\().4S, \r3\().4S
|
||||
trn2 \r9\().4S, \r9\().4S, \r3\().4S
|
||||
trn1 \r3\().4S, \r8\().4S, \r1\().4S
|
||||
trn2 \r8\().4S, \r8\().4S, \r1\().4S
|
||||
trn1 \r4\().4s, \r0\().4s, \r2\().4s
|
||||
trn2 \r2\().4s, \r0\().4s, \r2\().4s
|
||||
trn1 \r6\().4s, \r5\().4s, \r7\().4s
|
||||
trn2 \r7\().4s, \r5\().4s, \r7\().4s
|
||||
trn1 \r5\().4s, \r9\().4s, \r3\().4s
|
||||
trn2 \r9\().4s, \r9\().4s, \r3\().4s
|
||||
trn1 \r3\().4s, \r8\().4s, \r1\().4s
|
||||
trn2 \r8\().4s, \r8\().4s, \r1\().4s
|
||||
|
||||
trn1 \r0\().2D, \r3\().2D, \r4\().2D
|
||||
trn2 \r4\().2D, \r3\().2D, \r4\().2D
|
||||
trn1 \r0\().2d, \r3\().2d, \r4\().2d
|
||||
trn2 \r4\().2d, \r3\().2d, \r4\().2d
|
||||
|
||||
trn1 \r1\().2D, \r5\().2D, \r6\().2D
|
||||
trn2 \r5\().2D, \r5\().2D, \r6\().2D
|
||||
trn1 \r1\().2d, \r5\().2d, \r6\().2d
|
||||
trn2 \r5\().2d, \r5\().2d, \r6\().2d
|
||||
|
||||
trn2 \r6\().2D, \r8\().2D, \r2\().2D
|
||||
trn1 \r2\().2D, \r8\().2D, \r2\().2D
|
||||
trn2 \r6\().2d, \r8\().2d, \r2\().2d
|
||||
trn1 \r2\().2d, \r8\().2d, \r2\().2d
|
||||
|
||||
trn1 \r3\().2D, \r9\().2D, \r7\().2D
|
||||
trn2 \r7\().2D, \r9\().2D, \r7\().2D
|
||||
trn1 \r3\().2d, \r9\().2d, \r7\().2d
|
||||
trn2 \r7\().2d, \r9\().2d, \r7\().2d
|
||||
|
||||
.endm
|
||||
|
||||
@@ -33,81 +33,81 @@ const tab_x2, align=4
|
||||
endconst
|
||||
|
||||
function ff_opus_deemphasis_neon, export=1
|
||||
movrel x4, tab_st
|
||||
ld1 {v4.4s}, [x4]
|
||||
movrel x4, tab_x0
|
||||
ld1 {v5.4s}, [x4]
|
||||
movrel x4, tab_x1
|
||||
ld1 {v6.4s}, [x4]
|
||||
movrel x4, tab_x2
|
||||
ld1 {v7.4s}, [x4]
|
||||
movrel x4, tab_st
|
||||
ld1 {v4.4s}, [x4]
|
||||
movrel x4, tab_x0
|
||||
ld1 {v5.4s}, [x4]
|
||||
movrel x4, tab_x1
|
||||
ld1 {v6.4s}, [x4]
|
||||
movrel x4, tab_x2
|
||||
ld1 {v7.4s}, [x4]
|
||||
|
||||
fmul v0.4s, v4.4s, v0.s[0]
|
||||
fmul v0.4s, v4.4s, v0.s[0]
|
||||
|
||||
1: ld1 {v1.4s, v2.4s}, [x1], #32
|
||||
1: ld1 {v1.4s, v2.4s}, [x1], #32
|
||||
|
||||
fmla v0.4s, v5.4s, v1.s[0]
|
||||
fmul v3.4s, v7.4s, v2.s[2]
|
||||
fmla v0.4s, v5.4s, v1.s[0]
|
||||
fmul v3.4s, v7.4s, v2.s[2]
|
||||
|
||||
fmla v0.4s, v6.4s, v1.s[1]
|
||||
fmla v3.4s, v6.4s, v2.s[1]
|
||||
fmla v0.4s, v6.4s, v1.s[1]
|
||||
fmla v3.4s, v6.4s, v2.s[1]
|
||||
|
||||
fmla v0.4s, v7.4s, v1.s[2]
|
||||
fmla v3.4s, v5.4s, v2.s[0]
|
||||
fmla v0.4s, v7.4s, v1.s[2]
|
||||
fmla v3.4s, v5.4s, v2.s[0]
|
||||
|
||||
fadd v1.4s, v1.4s, v0.4s
|
||||
fadd v2.4s, v2.4s, v3.4s
|
||||
fadd v1.4s, v1.4s, v0.4s
|
||||
fadd v2.4s, v2.4s, v3.4s
|
||||
|
||||
fmla v2.4s, v4.4s, v1.s[3]
|
||||
fmla v2.4s, v4.4s, v1.s[3]
|
||||
|
||||
st1 {v1.4s, v2.4s}, [x0], #32
|
||||
fmul v0.4s, v4.4s, v2.s[3]
|
||||
st1 {v1.4s, v2.4s}, [x0], #32
|
||||
fmul v0.4s, v4.4s, v2.s[3]
|
||||
|
||||
subs w2, w2, #8
|
||||
b.gt 1b
|
||||
subs w2, w2, #8
|
||||
b.gt 1b
|
||||
|
||||
mov s0, v2.s[3]
|
||||
mov s0, v2.s[3]
|
||||
|
||||
ret
|
||||
endfunc
|
||||
|
||||
function ff_opus_postfilter_neon, export=1
|
||||
ld1 {v0.4s}, [x2]
|
||||
dup v1.4s, v0.s[1]
|
||||
dup v2.4s, v0.s[2]
|
||||
dup v0.4s, v0.s[0]
|
||||
ld1 {v0.4s}, [x2]
|
||||
dup v1.4s, v0.s[1]
|
||||
dup v2.4s, v0.s[2]
|
||||
dup v0.4s, v0.s[0]
|
||||
|
||||
add w1, w1, #2
|
||||
sub x1, x0, x1, lsl #2
|
||||
add w1, w1, #2
|
||||
sub x1, x0, x1, lsl #2
|
||||
|
||||
ld1 {v3.4s}, [x1]
|
||||
fmul v3.4s, v3.4s, v2.4s
|
||||
ld1 {v3.4s}, [x1]
|
||||
fmul v3.4s, v3.4s, v2.4s
|
||||
|
||||
1: add x1, x1, #4
|
||||
ld1 {v4.4s}, [x1]
|
||||
add x1, x1, #4
|
||||
ld1 {v5.4s}, [x1]
|
||||
add x1, x1, #4
|
||||
ld1 {v6.4s}, [x1]
|
||||
add x1, x1, #4
|
||||
ld1 {v7.4s}, [x1]
|
||||
1: add x1, x1, #4
|
||||
ld1 {v4.4s}, [x1]
|
||||
add x1, x1, #4
|
||||
ld1 {v5.4s}, [x1]
|
||||
add x1, x1, #4
|
||||
ld1 {v6.4s}, [x1]
|
||||
add x1, x1, #4
|
||||
ld1 {v7.4s}, [x1]
|
||||
|
||||
fmla v3.4s, v7.4s, v2.4s
|
||||
fadd v6.4s, v6.4s, v4.4s
|
||||
fmla v3.4s, v7.4s, v2.4s
|
||||
fadd v6.4s, v6.4s, v4.4s
|
||||
|
||||
ld1 {v4.4s}, [x0]
|
||||
fmla v4.4s, v5.4s, v0.4s
|
||||
ld1 {v4.4s}, [x0]
|
||||
fmla v4.4s, v5.4s, v0.4s
|
||||
|
||||
fmul v6.4s, v6.4s, v1.4s
|
||||
fadd v6.4s, v6.4s, v3.4s
|
||||
fmul v6.4s, v6.4s, v1.4s
|
||||
fadd v6.4s, v6.4s, v3.4s
|
||||
|
||||
fadd v4.4s, v4.4s, v6.4s
|
||||
fmul v3.4s, v7.4s, v2.4s
|
||||
fadd v4.4s, v4.4s, v6.4s
|
||||
fmul v3.4s, v7.4s, v2.4s
|
||||
|
||||
st1 {v4.4s}, [x0], #16
|
||||
st1 {v4.4s}, [x0], #16
|
||||
|
||||
subs w3, w3, #4
|
||||
b.gt 1b
|
||||
subs w3, w3, #4
|
||||
b.gt 1b
|
||||
|
||||
ret
|
||||
endfunc
|
||||
|
||||
+147
-147
@@ -46,49 +46,49 @@ function ff_sbr_sum64x5_neon, export=1
|
||||
add x3, x0, #192*4
|
||||
add x4, x0, #256*4
|
||||
mov x5, #64
|
||||
1: ld1 {v0.4S}, [x0]
|
||||
ld1 {v1.4S}, [x1], #16
|
||||
fadd v0.4S, v0.4S, v1.4S
|
||||
ld1 {v2.4S}, [x2], #16
|
||||
fadd v0.4S, v0.4S, v2.4S
|
||||
ld1 {v3.4S}, [x3], #16
|
||||
fadd v0.4S, v0.4S, v3.4S
|
||||
ld1 {v4.4S}, [x4], #16
|
||||
fadd v0.4S, v0.4S, v4.4S
|
||||
st1 {v0.4S}, [x0], #16
|
||||
1: ld1 {v0.4s}, [x0]
|
||||
ld1 {v1.4s}, [x1], #16
|
||||
fadd v0.4s, v0.4s, v1.4s
|
||||
ld1 {v2.4s}, [x2], #16
|
||||
fadd v0.4s, v0.4s, v2.4s
|
||||
ld1 {v3.4s}, [x3], #16
|
||||
fadd v0.4s, v0.4s, v3.4s
|
||||
ld1 {v4.4s}, [x4], #16
|
||||
fadd v0.4s, v0.4s, v4.4s
|
||||
st1 {v0.4s}, [x0], #16
|
||||
subs x5, x5, #4
|
||||
b.gt 1b
|
||||
ret
|
||||
endfunc
|
||||
|
||||
function ff_sbr_sum_square_neon, export=1
|
||||
movi v0.4S, #0
|
||||
1: ld1 {v1.4S}, [x0], #16
|
||||
fmla v0.4S, v1.4S, v1.4S
|
||||
movi v0.4s, #0
|
||||
1: ld1 {v1.4s}, [x0], #16
|
||||
fmla v0.4s, v1.4s, v1.4s
|
||||
subs w1, w1, #2
|
||||
b.gt 1b
|
||||
faddp v0.4S, v0.4S, v0.4S
|
||||
faddp v0.4S, v0.4S, v0.4S
|
||||
faddp v0.4s, v0.4s, v0.4s
|
||||
faddp v0.4s, v0.4s, v0.4s
|
||||
ret
|
||||
endfunc
|
||||
|
||||
function ff_sbr_neg_odd_64_neon, export=1
|
||||
mov x1, x0
|
||||
movi v5.4S, #1<<7, lsl #24
|
||||
ld2 {v0.4S, v1.4S}, [x0], #32
|
||||
eor v1.16B, v1.16B, v5.16B
|
||||
ld2 {v2.4S, v3.4S}, [x0], #32
|
||||
movi v5.4s, #1<<7, lsl #24
|
||||
ld2 {v0.4s, v1.4s}, [x0], #32
|
||||
eor v1.16b, v1.16b, v5.16b
|
||||
ld2 {v2.4s, v3.4s}, [x0], #32
|
||||
.rept 3
|
||||
st2 {v0.4S, v1.4S}, [x1], #32
|
||||
eor v3.16B, v3.16B, v5.16B
|
||||
ld2 {v0.4S, v1.4S}, [x0], #32
|
||||
st2 {v2.4S, v3.4S}, [x1], #32
|
||||
eor v1.16B, v1.16B, v5.16B
|
||||
ld2 {v2.4S, v3.4S}, [x0], #32
|
||||
st2 {v0.4s, v1.4s}, [x1], #32
|
||||
eor v3.16b, v3.16b, v5.16b
|
||||
ld2 {v0.4s, v1.4s}, [x0], #32
|
||||
st2 {v2.4s, v3.4s}, [x1], #32
|
||||
eor v1.16b, v1.16b, v5.16b
|
||||
ld2 {v2.4s, v3.4s}, [x0], #32
|
||||
.endr
|
||||
eor v3.16B, v3.16B, v5.16B
|
||||
st2 {v0.4S, v1.4S}, [x1], #32
|
||||
st2 {v2.4S, v3.4S}, [x1], #32
|
||||
eor v3.16b, v3.16b, v5.16b
|
||||
st2 {v0.4s, v1.4s}, [x1], #32
|
||||
st2 {v2.4s, v3.4s}, [x1], #32
|
||||
ret
|
||||
endfunc
|
||||
|
||||
@@ -97,26 +97,26 @@ function ff_sbr_qmf_pre_shuffle_neon, export=1
|
||||
add x2, x0, #64*4
|
||||
mov x3, #-16
|
||||
mov x4, #-4
|
||||
movi v6.4S, #1<<7, lsl #24
|
||||
ld1 {v0.2S}, [x0], #8
|
||||
st1 {v0.2S}, [x2], #8
|
||||
movi v6.4s, #1<<7, lsl #24
|
||||
ld1 {v0.2s}, [x0], #8
|
||||
st1 {v0.2s}, [x2], #8
|
||||
.rept 7
|
||||
ld1 {v1.4S}, [x1], x3
|
||||
ld1 {v2.4S}, [x0], #16
|
||||
eor v1.16B, v1.16B, v6.16B
|
||||
rev64 v1.4S, v1.4S
|
||||
ext v1.16B, v1.16B, v1.16B, #8
|
||||
st2 {v1.4S, v2.4S}, [x2], #32
|
||||
ld1 {v1.4s}, [x1], x3
|
||||
ld1 {v2.4s}, [x0], #16
|
||||
eor v1.16b, v1.16b, v6.16b
|
||||
rev64 v1.4s, v1.4s
|
||||
ext v1.16b, v1.16b, v1.16b, #8
|
||||
st2 {v1.4s, v2.4s}, [x2], #32
|
||||
.endr
|
||||
add x1, x1, #8
|
||||
ld1 {v1.2S}, [x1], x4
|
||||
ld1 {v2.2S}, [x0], #8
|
||||
ld1 {v1.S}[3], [x1]
|
||||
ld1 {v2.S}[2], [x0]
|
||||
eor v1.16B, v1.16B, v6.16B
|
||||
rev64 v1.4S, v1.4S
|
||||
st2 {v1.2S, v2.2S}, [x2], #16
|
||||
st2 {v1.S, v2.S}[2], [x2]
|
||||
ld1 {v1.2s}, [x1], x4
|
||||
ld1 {v2.2s}, [x0], #8
|
||||
ld1 {v1.s}[3], [x1]
|
||||
ld1 {v2.s}[2], [x0]
|
||||
eor v1.16b, v1.16b, v6.16b
|
||||
rev64 v1.4s, v1.4s
|
||||
st2 {v1.2s, v2.2s}, [x2], #16
|
||||
st2 {v1.s, v2.s}[2], [x2]
|
||||
ret
|
||||
endfunc
|
||||
|
||||
@@ -124,13 +124,13 @@ function ff_sbr_qmf_post_shuffle_neon, export=1
|
||||
add x2, x1, #60*4
|
||||
mov x3, #-16
|
||||
mov x4, #32
|
||||
movi v6.4S, #1<<7, lsl #24
|
||||
1: ld1 {v0.4S}, [x2], x3
|
||||
ld1 {v1.4S}, [x1], #16
|
||||
eor v0.16B, v0.16B, v6.16B
|
||||
rev64 v0.4S, v0.4S
|
||||
ext v0.16B, v0.16B, v0.16B, #8
|
||||
st2 {v0.4S, v1.4S}, [x0], #32
|
||||
movi v6.4s, #1<<7, lsl #24
|
||||
1: ld1 {v0.4s}, [x2], x3
|
||||
ld1 {v1.4s}, [x1], #16
|
||||
eor v0.16b, v0.16b, v6.16b
|
||||
rev64 v0.4s, v0.4s
|
||||
ext v0.16b, v0.16b, v0.16b, #8
|
||||
st2 {v0.4s, v1.4s}, [x0], #32
|
||||
subs x4, x4, #4
|
||||
b.gt 1b
|
||||
ret
|
||||
@@ -141,13 +141,13 @@ function ff_sbr_qmf_deint_neg_neon, export=1
|
||||
add x2, x0, #60*4
|
||||
mov x3, #-32
|
||||
mov x4, #32
|
||||
movi v2.4S, #1<<7, lsl #24
|
||||
1: ld2 {v0.4S, v1.4S}, [x1], x3
|
||||
eor v0.16B, v0.16B, v2.16B
|
||||
rev64 v1.4S, v1.4S
|
||||
ext v1.16B, v1.16B, v1.16B, #8
|
||||
st1 {v0.4S}, [x2]
|
||||
st1 {v1.4S}, [x0], #16
|
||||
movi v2.4s, #1<<7, lsl #24
|
||||
1: ld2 {v0.4s, v1.4s}, [x1], x3
|
||||
eor v0.16b, v0.16b, v2.16b
|
||||
rev64 v1.4s, v1.4s
|
||||
ext v1.16b, v1.16b, v1.16b, #8
|
||||
st1 {v0.4s}, [x2]
|
||||
st1 {v1.4s}, [x0], #16
|
||||
sub x2, x2, #16
|
||||
subs x4, x4, #4
|
||||
b.gt 1b
|
||||
@@ -159,16 +159,16 @@ function ff_sbr_qmf_deint_bfly_neon, export=1
|
||||
add x3, x0, #124*4
|
||||
mov x4, #64
|
||||
mov x5, #-16
|
||||
1: ld1 {v0.4S}, [x1], #16
|
||||
ld1 {v1.4S}, [x2], x5
|
||||
rev64 v2.4S, v0.4S
|
||||
ext v2.16B, v2.16B, v2.16B, #8
|
||||
rev64 v3.4S, v1.4S
|
||||
ext v3.16B, v3.16B, v3.16B, #8
|
||||
fadd v1.4S, v1.4S, v2.4S
|
||||
fsub v0.4S, v0.4S, v3.4S
|
||||
st1 {v0.4S}, [x0], #16
|
||||
st1 {v1.4S}, [x3], x5
|
||||
1: ld1 {v0.4s}, [x1], #16
|
||||
ld1 {v1.4s}, [x2], x5
|
||||
rev64 v2.4s, v0.4s
|
||||
ext v2.16b, v2.16b, v2.16b, #8
|
||||
rev64 v3.4s, v1.4s
|
||||
ext v3.16b, v3.16b, v3.16b, #8
|
||||
fadd v1.4s, v1.4s, v2.4s
|
||||
fsub v0.4s, v0.4s, v3.4s
|
||||
st1 {v0.4s}, [x0], #16
|
||||
st1 {v1.4s}, [x3], x5
|
||||
subs x4, x4, #4
|
||||
b.gt 1b
|
||||
ret
|
||||
@@ -178,32 +178,32 @@ function ff_sbr_hf_gen_neon, export=1
|
||||
sxtw x4, w4
|
||||
sxtw x5, w5
|
||||
movrel x6, factors
|
||||
ld1 {v7.4S}, [x6]
|
||||
dup v1.4S, v0.S[0]
|
||||
mov v2.8B, v1.8B
|
||||
mov v2.S[2], v7.S[0]
|
||||
mov v2.S[3], v7.S[0]
|
||||
fmul v1.4S, v1.4S, v2.4S
|
||||
ld1 {v0.D}[0], [x3]
|
||||
ld1 {v0.D}[1], [x2]
|
||||
fmul v0.4S, v0.4S, v1.4S
|
||||
fmul v1.4S, v0.4S, v7.4S
|
||||
rev64 v0.4S, v0.4S
|
||||
ld1 {v7.4s}, [x6]
|
||||
dup v1.4s, v0.s[0]
|
||||
mov v2.8b, v1.8b
|
||||
mov v2.s[2], v7.s[0]
|
||||
mov v2.s[3], v7.s[0]
|
||||
fmul v1.4s, v1.4s, v2.4s
|
||||
ld1 {v0.d}[0], [x3]
|
||||
ld1 {v0.d}[1], [x2]
|
||||
fmul v0.4s, v0.4s, v1.4s
|
||||
fmul v1.4s, v0.4s, v7.4s
|
||||
rev64 v0.4s, v0.4s
|
||||
sub x7, x5, x4
|
||||
add x0, x0, x4, lsl #3
|
||||
add x1, x1, x4, lsl #3
|
||||
sub x1, x1, #16
|
||||
1: ld1 {v2.4S}, [x1], #16
|
||||
ld1 {v3.2S}, [x1]
|
||||
fmul v4.4S, v2.4S, v1.4S
|
||||
fmul v5.4S, v2.4S, v0.4S
|
||||
faddp v4.4S, v4.4S, v4.4S
|
||||
faddp v5.4S, v5.4S, v5.4S
|
||||
faddp v4.4S, v4.4S, v4.4S
|
||||
faddp v5.4S, v5.4S, v5.4S
|
||||
mov v4.S[1], v5.S[0]
|
||||
fadd v4.2S, v4.2S, v3.2S
|
||||
st1 {v4.2S}, [x0], #8
|
||||
1: ld1 {v2.4s}, [x1], #16
|
||||
ld1 {v3.2s}, [x1]
|
||||
fmul v4.4s, v2.4s, v1.4s
|
||||
fmul v5.4s, v2.4s, v0.4s
|
||||
faddp v4.4s, v4.4s, v4.4s
|
||||
faddp v5.4s, v5.4s, v5.4s
|
||||
faddp v4.4s, v4.4s, v4.4s
|
||||
faddp v5.4s, v5.4s, v5.4s
|
||||
mov v4.s[1], v5.s[0]
|
||||
fadd v4.2s, v4.2s, v3.2s
|
||||
st1 {v4.2s}, [x0], #8
|
||||
sub x1, x1, #8
|
||||
subs x7, x7, #1
|
||||
b.gt 1b
|
||||
@@ -215,10 +215,10 @@ function ff_sbr_hf_g_filt_neon, export=1
|
||||
sxtw x4, w4
|
||||
mov x5, #40*2*4
|
||||
add x1, x1, x4, lsl #3
|
||||
1: ld1 {v0.2S}, [x1], x5
|
||||
ld1 {v1.S}[0], [x2], #4
|
||||
fmul v2.4S, v0.4S, v1.S[0]
|
||||
st1 {v2.2S}, [x0], #8
|
||||
1: ld1 {v0.2s}, [x1], x5
|
||||
ld1 {v1.s}[0], [x2], #4
|
||||
fmul v2.4s, v0.4s, v1.s[0]
|
||||
st1 {v2.2s}, [x0], #8
|
||||
subs x3, x3, #1
|
||||
b.gt 1b
|
||||
ret
|
||||
@@ -227,46 +227,46 @@ endfunc
|
||||
function ff_sbr_autocorrelate_neon, export=1
|
||||
mov x2, #38
|
||||
movrel x3, factors
|
||||
ld1 {v0.4S}, [x3]
|
||||
movi v1.4S, #0
|
||||
movi v2.4S, #0
|
||||
movi v3.4S, #0
|
||||
ld1 {v4.2S}, [x0], #8
|
||||
ld1 {v5.2S}, [x0], #8
|
||||
fmul v16.2S, v4.2S, v4.2S
|
||||
fmul v17.2S, v5.2S, v4.S[0]
|
||||
fmul v18.2S, v5.2S, v4.S[1]
|
||||
1: ld1 {v5.D}[1], [x0], #8
|
||||
fmla v1.2S, v4.2S, v4.2S
|
||||
fmla v2.4S, v5.4S, v4.S[0]
|
||||
fmla v3.4S, v5.4S, v4.S[1]
|
||||
mov v4.D[0], v5.D[0]
|
||||
mov v5.D[0], v5.D[1]
|
||||
ld1 {v0.4s}, [x3]
|
||||
movi v1.4s, #0
|
||||
movi v2.4s, #0
|
||||
movi v3.4s, #0
|
||||
ld1 {v4.2s}, [x0], #8
|
||||
ld1 {v5.2s}, [x0], #8
|
||||
fmul v16.2s, v4.2s, v4.2s
|
||||
fmul v17.2s, v5.2s, v4.s[0]
|
||||
fmul v18.2s, v5.2s, v4.s[1]
|
||||
1: ld1 {v5.d}[1], [x0], #8
|
||||
fmla v1.2s, v4.2s, v4.2s
|
||||
fmla v2.4s, v5.4s, v4.s[0]
|
||||
fmla v3.4s, v5.4s, v4.s[1]
|
||||
mov v4.d[0], v5.d[0]
|
||||
mov v5.d[0], v5.d[1]
|
||||
subs x2, x2, #1
|
||||
b.gt 1b
|
||||
fmul v19.2S, v4.2S, v4.2S
|
||||
fmul v20.2S, v5.2S, v4.S[0]
|
||||
fmul v21.2S, v5.2S, v4.S[1]
|
||||
fadd v22.4S, v2.4S, v20.4S
|
||||
fsub v22.4S, v22.4S, v17.4S
|
||||
fadd v23.4S, v3.4S, v21.4S
|
||||
fsub v23.4S, v23.4S, v18.4S
|
||||
rev64 v23.4S, v23.4S
|
||||
fmul v23.4S, v23.4S, v0.4S
|
||||
fadd v22.4S, v22.4S, v23.4S
|
||||
st1 {v22.4S}, [x1], #16
|
||||
fadd v23.2S, v1.2S, v19.2S
|
||||
fsub v23.2S, v23.2S, v16.2S
|
||||
faddp v23.2S, v23.2S, v23.2S
|
||||
st1 {v23.S}[0], [x1]
|
||||
fmul v19.2s, v4.2s, v4.2s
|
||||
fmul v20.2s, v5.2s, v4.s[0]
|
||||
fmul v21.2s, v5.2s, v4.s[1]
|
||||
fadd v22.4s, v2.4s, v20.4s
|
||||
fsub v22.4s, v22.4s, v17.4s
|
||||
fadd v23.4s, v3.4s, v21.4s
|
||||
fsub v23.4s, v23.4s, v18.4s
|
||||
rev64 v23.4s, v23.4s
|
||||
fmul v23.4s, v23.4s, v0.4s
|
||||
fadd v22.4s, v22.4s, v23.4s
|
||||
st1 {v22.4s}, [x1], #16
|
||||
fadd v23.2s, v1.2s, v19.2s
|
||||
fsub v23.2s, v23.2s, v16.2s
|
||||
faddp v23.2s, v23.2s, v23.2s
|
||||
st1 {v23.s}[0], [x1]
|
||||
add x1, x1, #8
|
||||
rev64 v3.2S, v3.2S
|
||||
fmul v3.2S, v3.2S, v0.2S
|
||||
fadd v2.2S, v2.2S, v3.2S
|
||||
st1 {v2.2S}, [x1]
|
||||
rev64 v3.2s, v3.2s
|
||||
fmul v3.2s, v3.2s, v0.2s
|
||||
fadd v2.2s, v2.2s, v3.2s
|
||||
st1 {v2.2s}, [x1]
|
||||
add x1, x1, #16
|
||||
faddp v1.2S, v1.2S, v1.2S
|
||||
st1 {v1.S}[0], [x1]
|
||||
faddp v1.2s, v1.2s, v1.2s
|
||||
st1 {v1.s}[0], [x1]
|
||||
ret
|
||||
endfunc
|
||||
|
||||
@@ -278,25 +278,25 @@ endfunc
|
||||
1: and x3, x3, #0x1ff
|
||||
add x8, x7, x3, lsl #3
|
||||
add x3, x3, #2
|
||||
ld1 {v2.4S}, [x0]
|
||||
ld1 {v3.2S}, [x1], #8
|
||||
ld1 {v4.2S}, [x2], #8
|
||||
ld1 {v5.4S}, [x8]
|
||||
mov v6.16B, v2.16B
|
||||
zip1 v3.4S, v3.4S, v3.4S
|
||||
zip1 v4.4S, v4.4S, v4.4S
|
||||
fmla v6.4S, v1.4S, v3.4S
|
||||
fmla v2.4S, v5.4S, v4.4S
|
||||
fcmeq v7.4S, v3.4S, #0
|
||||
bif v2.16B, v6.16B, v7.16B
|
||||
st1 {v2.4S}, [x0], #16
|
||||
ld1 {v2.4s}, [x0]
|
||||
ld1 {v3.2s}, [x1], #8
|
||||
ld1 {v4.2s}, [x2], #8
|
||||
ld1 {v5.4s}, [x8]
|
||||
mov v6.16b, v2.16b
|
||||
zip1 v3.4s, v3.4s, v3.4s
|
||||
zip1 v4.4s, v4.4s, v4.4s
|
||||
fmla v6.4s, v1.4s, v3.4s
|
||||
fmla v2.4s, v5.4s, v4.4s
|
||||
fcmeq v7.4s, v3.4s, #0
|
||||
bif v2.16b, v6.16b, v7.16b
|
||||
st1 {v2.4s}, [x0], #16
|
||||
subs x5, x5, #2
|
||||
b.gt 1b
|
||||
.endm
|
||||
|
||||
function ff_sbr_hf_apply_noise_0_neon, export=1
|
||||
movrel x9, phi_noise_0
|
||||
ld1 {v1.4S}, [x9]
|
||||
ld1 {v1.4s}, [x9]
|
||||
apply_noise_common
|
||||
ret
|
||||
endfunc
|
||||
@@ -305,14 +305,14 @@ function ff_sbr_hf_apply_noise_1_neon, export=1
|
||||
movrel x9, phi_noise_1
|
||||
and x4, x4, #1
|
||||
add x9, x9, x4, lsl #4
|
||||
ld1 {v1.4S}, [x9]
|
||||
ld1 {v1.4s}, [x9]
|
||||
apply_noise_common
|
||||
ret
|
||||
endfunc
|
||||
|
||||
function ff_sbr_hf_apply_noise_2_neon, export=1
|
||||
movrel x9, phi_noise_2
|
||||
ld1 {v1.4S}, [x9]
|
||||
ld1 {v1.4s}, [x9]
|
||||
apply_noise_common
|
||||
ret
|
||||
endfunc
|
||||
@@ -321,7 +321,7 @@ function ff_sbr_hf_apply_noise_3_neon, export=1
|
||||
movrel x9, phi_noise_3
|
||||
and x4, x4, #1
|
||||
add x9, x9, x4, lsl #4
|
||||
ld1 {v1.4S}, [x9]
|
||||
ld1 {v1.4s}, [x9]
|
||||
apply_noise_common
|
||||
ret
|
||||
endfunc
|
||||
|
||||
@@ -54,7 +54,7 @@ endconst
|
||||
prfm pldl1keep, [\data]
|
||||
mov x10, x30
|
||||
movrel x3, idct_coeff_neon
|
||||
ld1 {v0.2D}, [x3]
|
||||
ld1 {v0.2d}, [x3]
|
||||
.endm
|
||||
|
||||
.macro idct_end
|
||||
@@ -74,146 +74,146 @@ endconst
|
||||
.endm
|
||||
|
||||
.macro idct_col4_top y1, y2, y3, y4, i, l
|
||||
smull\i v7.4S, \y3\l, z2
|
||||
smull\i v16.4S, \y3\l, z6
|
||||
smull\i v17.4S, \y2\l, z1
|
||||
add v19.4S, v23.4S, v7.4S
|
||||
smull\i v18.4S, \y2\l, z3
|
||||
add v20.4S, v23.4S, v16.4S
|
||||
smull\i v5.4S, \y2\l, z5
|
||||
sub v21.4S, v23.4S, v16.4S
|
||||
smull\i v6.4S, \y2\l, z7
|
||||
sub v22.4S, v23.4S, v7.4S
|
||||
smull\i v7.4s, \y3\l, z2
|
||||
smull\i v16.4s, \y3\l, z6
|
||||
smull\i v17.4s, \y2\l, z1
|
||||
add v19.4s, v23.4s, v7.4s
|
||||
smull\i v18.4s, \y2\l, z3
|
||||
add v20.4s, v23.4s, v16.4s
|
||||
smull\i v5.4s, \y2\l, z5
|
||||
sub v21.4s, v23.4s, v16.4s
|
||||
smull\i v6.4s, \y2\l, z7
|
||||
sub v22.4s, v23.4s, v7.4s
|
||||
|
||||
smlal\i v17.4S, \y4\l, z3
|
||||
smlsl\i v18.4S, \y4\l, z7
|
||||
smlsl\i v5.4S, \y4\l, z1
|
||||
smlsl\i v6.4S, \y4\l, z5
|
||||
smlal\i v17.4s, \y4\l, z3
|
||||
smlsl\i v18.4s, \y4\l, z7
|
||||
smlsl\i v5.4s, \y4\l, z1
|
||||
smlsl\i v6.4s, \y4\l, z5
|
||||
.endm
|
||||
|
||||
.macro idct_row4_neon y1, y2, y3, y4, pass
|
||||
ld1 {\y1\().2D,\y2\().2D}, [x2], #32
|
||||
movi v23.4S, #1<<2, lsl #8
|
||||
orr v5.16B, \y1\().16B, \y2\().16B
|
||||
ld1 {\y3\().2D,\y4\().2D}, [x2], #32
|
||||
orr v6.16B, \y3\().16B, \y4\().16B
|
||||
orr v5.16B, v5.16B, v6.16B
|
||||
mov x3, v5.D[1]
|
||||
smlal v23.4S, \y1\().4H, z4
|
||||
ld1 {\y1\().2d,\y2\().2d}, [x2], #32
|
||||
movi v23.4s, #1<<2, lsl #8
|
||||
orr v5.16b, \y1\().16b, \y2\().16b
|
||||
ld1 {\y3\().2d,\y4\().2d}, [x2], #32
|
||||
orr v6.16b, \y3\().16b, \y4\().16b
|
||||
orr v5.16b, v5.16b, v6.16b
|
||||
mov x3, v5.d[1]
|
||||
smlal v23.4s, \y1\().4h, z4
|
||||
|
||||
idct_col4_top \y1, \y2, \y3, \y4, 1, .4H
|
||||
idct_col4_top \y1, \y2, \y3, \y4, 1, .4h
|
||||
|
||||
cmp x3, #0
|
||||
b.eq \pass\()f
|
||||
|
||||
smull2 v7.4S, \y1\().8H, z4
|
||||
smlal2 v17.4S, \y2\().8H, z5
|
||||
smlsl2 v18.4S, \y2\().8H, z1
|
||||
smull2 v16.4S, \y3\().8H, z2
|
||||
smlal2 v5.4S, \y2\().8H, z7
|
||||
add v19.4S, v19.4S, v7.4S
|
||||
sub v20.4S, v20.4S, v7.4S
|
||||
sub v21.4S, v21.4S, v7.4S
|
||||
add v22.4S, v22.4S, v7.4S
|
||||
smlal2 v6.4S, \y2\().8H, z3
|
||||
smull2 v7.4S, \y3\().8H, z6
|
||||
smlal2 v17.4S, \y4\().8H, z7
|
||||
smlsl2 v18.4S, \y4\().8H, z5
|
||||
smlal2 v5.4S, \y4\().8H, z3
|
||||
smlsl2 v6.4S, \y4\().8H, z1
|
||||
add v19.4S, v19.4S, v7.4S
|
||||
sub v20.4S, v20.4S, v16.4S
|
||||
add v21.4S, v21.4S, v16.4S
|
||||
sub v22.4S, v22.4S, v7.4S
|
||||
smull2 v7.4s, \y1\().8h, z4
|
||||
smlal2 v17.4s, \y2\().8h, z5
|
||||
smlsl2 v18.4s, \y2\().8h, z1
|
||||
smull2 v16.4s, \y3\().8h, z2
|
||||
smlal2 v5.4s, \y2\().8h, z7
|
||||
add v19.4s, v19.4s, v7.4s
|
||||
sub v20.4s, v20.4s, v7.4s
|
||||
sub v21.4s, v21.4s, v7.4s
|
||||
add v22.4s, v22.4s, v7.4s
|
||||
smlal2 v6.4s, \y2\().8h, z3
|
||||
smull2 v7.4s, \y3\().8h, z6
|
||||
smlal2 v17.4s, \y4\().8h, z7
|
||||
smlsl2 v18.4s, \y4\().8h, z5
|
||||
smlal2 v5.4s, \y4\().8h, z3
|
||||
smlsl2 v6.4s, \y4\().8h, z1
|
||||
add v19.4s, v19.4s, v7.4s
|
||||
sub v20.4s, v20.4s, v16.4s
|
||||
add v21.4s, v21.4s, v16.4s
|
||||
sub v22.4s, v22.4s, v7.4s
|
||||
|
||||
\pass: add \y3\().4S, v19.4S, v17.4S
|
||||
add \y4\().4S, v20.4S, v18.4S
|
||||
shrn \y1\().4H, \y3\().4S, #ROW_SHIFT
|
||||
shrn \y2\().4H, \y4\().4S, #ROW_SHIFT
|
||||
add v7.4S, v21.4S, v5.4S
|
||||
add v16.4S, v22.4S, v6.4S
|
||||
shrn \y3\().4H, v7.4S, #ROW_SHIFT
|
||||
shrn \y4\().4H, v16.4S, #ROW_SHIFT
|
||||
sub v22.4S, v22.4S, v6.4S
|
||||
sub v19.4S, v19.4S, v17.4S
|
||||
sub v21.4S, v21.4S, v5.4S
|
||||
shrn2 \y1\().8H, v22.4S, #ROW_SHIFT
|
||||
sub v20.4S, v20.4S, v18.4S
|
||||
shrn2 \y2\().8H, v21.4S, #ROW_SHIFT
|
||||
shrn2 \y3\().8H, v20.4S, #ROW_SHIFT
|
||||
shrn2 \y4\().8H, v19.4S, #ROW_SHIFT
|
||||
add \y4\().4s, v20.4s, v18.4s
|
||||
shrn \y1\().4h, \y3\().4s, #ROW_SHIFT
|
||||
shrn \y2\().4h, \y4\().4s, #ROW_SHIFT
|
||||
add v7.4s, v21.4s, v5.4s
|
||||
add v16.4s, v22.4s, v6.4s
|
||||
shrn \y3\().4h, v7.4s, #ROW_SHIFT
|
||||
shrn \y4\().4h, v16.4s, #ROW_SHIFT
|
||||
sub v22.4s, v22.4s, v6.4s
|
||||
sub v19.4s, v19.4s, v17.4s
|
||||
sub v21.4s, v21.4s, v5.4s
|
||||
shrn2 \y1\().8h, v22.4s, #ROW_SHIFT
|
||||
sub v20.4s, v20.4s, v18.4s
|
||||
shrn2 \y2\().8h, v21.4s, #ROW_SHIFT
|
||||
shrn2 \y3\().8h, v20.4s, #ROW_SHIFT
|
||||
shrn2 \y4\().8h, v19.4s, #ROW_SHIFT
|
||||
|
||||
trn1 v16.8H, \y1\().8H, \y2\().8H
|
||||
trn2 v17.8H, \y1\().8H, \y2\().8H
|
||||
trn1 v18.8H, \y3\().8H, \y4\().8H
|
||||
trn2 v19.8H, \y3\().8H, \y4\().8H
|
||||
trn1 \y1\().4S, v16.4S, v18.4S
|
||||
trn1 \y2\().4S, v17.4S, v19.4S
|
||||
trn2 \y3\().4S, v16.4S, v18.4S
|
||||
trn2 \y4\().4S, v17.4S, v19.4S
|
||||
trn1 v16.8h, \y1\().8h, \y2\().8h
|
||||
trn2 v17.8h, \y1\().8h, \y2\().8h
|
||||
trn1 v18.8h, \y3\().8h, \y4\().8h
|
||||
trn2 v19.8h, \y3\().8h, \y4\().8h
|
||||
trn1 \y1\().4s, v16.4s, v18.4s
|
||||
trn1 \y2\().4s, v17.4s, v19.4s
|
||||
trn2 \y3\().4s, v16.4s, v18.4s
|
||||
trn2 \y4\().4s, v17.4s, v19.4s
|
||||
.endm
|
||||
|
||||
.macro declare_idct_col4_neon i, l
|
||||
function idct_col4_neon\i
|
||||
dup v23.4H, z4c
|
||||
dup v23.4h, z4c
|
||||
.if \i == 1
|
||||
add v23.4H, v23.4H, v24.4H
|
||||
add v23.4h, v23.4h, v24.4h
|
||||
.else
|
||||
mov v5.D[0], v24.D[1]
|
||||
add v23.4H, v23.4H, v5.4H
|
||||
mov v5.d[0], v24.d[1]
|
||||
add v23.4h, v23.4h, v5.4h
|
||||
.endif
|
||||
smull v23.4S, v23.4H, z4
|
||||
smull v23.4s, v23.4h, z4
|
||||
|
||||
idct_col4_top v24, v25, v26, v27, \i, \l
|
||||
|
||||
mov x4, v28.D[\i - 1]
|
||||
mov x5, v29.D[\i - 1]
|
||||
mov x4, v28.d[\i - 1]
|
||||
mov x5, v29.d[\i - 1]
|
||||
cmp x4, #0
|
||||
b.eq 1f
|
||||
|
||||
smull\i v7.4S, v28\l, z4
|
||||
add v19.4S, v19.4S, v7.4S
|
||||
sub v20.4S, v20.4S, v7.4S
|
||||
sub v21.4S, v21.4S, v7.4S
|
||||
add v22.4S, v22.4S, v7.4S
|
||||
smull\i v7.4s, v28\l, z4
|
||||
add v19.4s, v19.4s, v7.4s
|
||||
sub v20.4s, v20.4s, v7.4s
|
||||
sub v21.4s, v21.4s, v7.4s
|
||||
add v22.4s, v22.4s, v7.4s
|
||||
|
||||
1: mov x4, v30.D[\i - 1]
|
||||
1: mov x4, v30.d[\i - 1]
|
||||
cmp x5, #0
|
||||
b.eq 2f
|
||||
|
||||
smlal\i v17.4S, v29\l, z5
|
||||
smlsl\i v18.4S, v29\l, z1
|
||||
smlal\i v5.4S, v29\l, z7
|
||||
smlal\i v6.4S, v29\l, z3
|
||||
smlal\i v17.4s, v29\l, z5
|
||||
smlsl\i v18.4s, v29\l, z1
|
||||
smlal\i v5.4s, v29\l, z7
|
||||
smlal\i v6.4s, v29\l, z3
|
||||
|
||||
2: mov x5, v31.D[\i - 1]
|
||||
2: mov x5, v31.d[\i - 1]
|
||||
cmp x4, #0
|
||||
b.eq 3f
|
||||
|
||||
smull\i v7.4S, v30\l, z6
|
||||
smull\i v16.4S, v30\l, z2
|
||||
add v19.4S, v19.4S, v7.4S
|
||||
sub v22.4S, v22.4S, v7.4S
|
||||
sub v20.4S, v20.4S, v16.4S
|
||||
add v21.4S, v21.4S, v16.4S
|
||||
smull\i v7.4s, v30\l, z6
|
||||
smull\i v16.4s, v30\l, z2
|
||||
add v19.4s, v19.4s, v7.4s
|
||||
sub v22.4s, v22.4s, v7.4s
|
||||
sub v20.4s, v20.4s, v16.4s
|
||||
add v21.4s, v21.4s, v16.4s
|
||||
|
||||
3: cmp x5, #0
|
||||
b.eq 4f
|
||||
|
||||
smlal\i v17.4S, v31\l, z7
|
||||
smlsl\i v18.4S, v31\l, z5
|
||||
smlal\i v5.4S, v31\l, z3
|
||||
smlsl\i v6.4S, v31\l, z1
|
||||
smlal\i v17.4s, v31\l, z7
|
||||
smlsl\i v18.4s, v31\l, z5
|
||||
smlal\i v5.4s, v31\l, z3
|
||||
smlsl\i v6.4s, v31\l, z1
|
||||
|
||||
4: addhn v7.4H, v19.4S, v17.4S
|
||||
addhn2 v7.8H, v20.4S, v18.4S
|
||||
subhn v18.4H, v20.4S, v18.4S
|
||||
subhn2 v18.8H, v19.4S, v17.4S
|
||||
4: addhn v7.4h, v19.4s, v17.4s
|
||||
addhn2 v7.8h, v20.4s, v18.4s
|
||||
subhn v18.4h, v20.4s, v18.4s
|
||||
subhn2 v18.8h, v19.4s, v17.4s
|
||||
|
||||
addhn v16.4H, v21.4S, v5.4S
|
||||
addhn2 v16.8H, v22.4S, v6.4S
|
||||
subhn v17.4H, v22.4S, v6.4S
|
||||
subhn2 v17.8H, v21.4S, v5.4S
|
||||
addhn v16.4h, v21.4s, v5.4s
|
||||
addhn2 v16.8h, v22.4s, v6.4s
|
||||
subhn v17.4h, v22.4s, v6.4s
|
||||
subhn2 v17.8h, v21.4s, v5.4s
|
||||
|
||||
ret
|
||||
endfunc
|
||||
@@ -229,33 +229,33 @@ function ff_simple_idct_put_neon, export=1
|
||||
idct_row4_neon v28, v29, v30, v31, 2
|
||||
bl idct_col4_neon1
|
||||
|
||||
sqshrun v1.8B, v7.8H, #COL_SHIFT-16
|
||||
sqshrun2 v1.16B, v16.8H, #COL_SHIFT-16
|
||||
sqshrun v3.8B, v17.8H, #COL_SHIFT-16
|
||||
sqshrun2 v3.16B, v18.8H, #COL_SHIFT-16
|
||||
sqshrun v1.8b, v7.8h, #COL_SHIFT-16
|
||||
sqshrun2 v1.16b, v16.8h, #COL_SHIFT-16
|
||||
sqshrun v3.8b, v17.8h, #COL_SHIFT-16
|
||||
sqshrun2 v3.16b, v18.8h, #COL_SHIFT-16
|
||||
|
||||
bl idct_col4_neon2
|
||||
|
||||
sqshrun v2.8B, v7.8H, #COL_SHIFT-16
|
||||
sqshrun2 v2.16B, v16.8H, #COL_SHIFT-16
|
||||
sqshrun v4.8B, v17.8H, #COL_SHIFT-16
|
||||
sqshrun2 v4.16B, v18.8H, #COL_SHIFT-16
|
||||
sqshrun v2.8b, v7.8h, #COL_SHIFT-16
|
||||
sqshrun2 v2.16b, v16.8h, #COL_SHIFT-16
|
||||
sqshrun v4.8b, v17.8h, #COL_SHIFT-16
|
||||
sqshrun2 v4.16b, v18.8h, #COL_SHIFT-16
|
||||
|
||||
zip1 v16.4S, v1.4S, v2.4S
|
||||
zip2 v17.4S, v1.4S, v2.4S
|
||||
zip1 v16.4s, v1.4s, v2.4s
|
||||
zip2 v17.4s, v1.4s, v2.4s
|
||||
|
||||
st1 {v16.D}[0], [x0], x1
|
||||
st1 {v16.D}[1], [x0], x1
|
||||
st1 {v16.d}[0], [x0], x1
|
||||
st1 {v16.d}[1], [x0], x1
|
||||
|
||||
zip1 v18.4S, v3.4S, v4.4S
|
||||
zip2 v19.4S, v3.4S, v4.4S
|
||||
zip1 v18.4s, v3.4s, v4.4s
|
||||
zip2 v19.4s, v3.4s, v4.4s
|
||||
|
||||
st1 {v17.D}[0], [x0], x1
|
||||
st1 {v17.D}[1], [x0], x1
|
||||
st1 {v18.D}[0], [x0], x1
|
||||
st1 {v18.D}[1], [x0], x1
|
||||
st1 {v19.D}[0], [x0], x1
|
||||
st1 {v19.D}[1], [x0], x1
|
||||
st1 {v17.d}[0], [x0], x1
|
||||
st1 {v17.d}[1], [x0], x1
|
||||
st1 {v18.d}[0], [x0], x1
|
||||
st1 {v18.d}[1], [x0], x1
|
||||
st1 {v19.d}[0], [x0], x1
|
||||
st1 {v19.d}[1], [x0], x1
|
||||
|
||||
idct_end
|
||||
endfunc
|
||||
@@ -267,59 +267,59 @@ function ff_simple_idct_add_neon, export=1
|
||||
idct_row4_neon v28, v29, v30, v31, 2
|
||||
bl idct_col4_neon1
|
||||
|
||||
sshr v1.8H, v7.8H, #COL_SHIFT-16
|
||||
sshr v2.8H, v16.8H, #COL_SHIFT-16
|
||||
sshr v3.8H, v17.8H, #COL_SHIFT-16
|
||||
sshr v4.8H, v18.8H, #COL_SHIFT-16
|
||||
sshr v1.8h, v7.8h, #COL_SHIFT-16
|
||||
sshr v2.8h, v16.8h, #COL_SHIFT-16
|
||||
sshr v3.8h, v17.8h, #COL_SHIFT-16
|
||||
sshr v4.8h, v18.8h, #COL_SHIFT-16
|
||||
|
||||
bl idct_col4_neon2
|
||||
|
||||
sshr v7.8H, v7.8H, #COL_SHIFT-16
|
||||
sshr v16.8H, v16.8H, #COL_SHIFT-16
|
||||
sshr v17.8H, v17.8H, #COL_SHIFT-16
|
||||
sshr v18.8H, v18.8H, #COL_SHIFT-16
|
||||
sshr v7.8h, v7.8h, #COL_SHIFT-16
|
||||
sshr v16.8h, v16.8h, #COL_SHIFT-16
|
||||
sshr v17.8h, v17.8h, #COL_SHIFT-16
|
||||
sshr v18.8h, v18.8h, #COL_SHIFT-16
|
||||
|
||||
mov x9, x0
|
||||
ld1 {v19.D}[0], [x0], x1
|
||||
zip1 v23.2D, v1.2D, v7.2D
|
||||
zip2 v24.2D, v1.2D, v7.2D
|
||||
ld1 {v19.D}[1], [x0], x1
|
||||
zip1 v25.2D, v2.2D, v16.2D
|
||||
zip2 v26.2D, v2.2D, v16.2D
|
||||
ld1 {v20.D}[0], [x0], x1
|
||||
zip1 v27.2D, v3.2D, v17.2D
|
||||
zip2 v28.2D, v3.2D, v17.2D
|
||||
ld1 {v20.D}[1], [x0], x1
|
||||
zip1 v29.2D, v4.2D, v18.2D
|
||||
zip2 v30.2D, v4.2D, v18.2D
|
||||
ld1 {v21.D}[0], [x0], x1
|
||||
uaddw v23.8H, v23.8H, v19.8B
|
||||
uaddw2 v24.8H, v24.8H, v19.16B
|
||||
ld1 {v21.D}[1], [x0], x1
|
||||
sqxtun v23.8B, v23.8H
|
||||
sqxtun2 v23.16B, v24.8H
|
||||
ld1 {v22.D}[0], [x0], x1
|
||||
uaddw v24.8H, v25.8H, v20.8B
|
||||
uaddw2 v25.8H, v26.8H, v20.16B
|
||||
ld1 {v22.D}[1], [x0], x1
|
||||
sqxtun v24.8B, v24.8H
|
||||
sqxtun2 v24.16B, v25.8H
|
||||
st1 {v23.D}[0], [x9], x1
|
||||
uaddw v25.8H, v27.8H, v21.8B
|
||||
uaddw2 v26.8H, v28.8H, v21.16B
|
||||
st1 {v23.D}[1], [x9], x1
|
||||
sqxtun v25.8B, v25.8H
|
||||
sqxtun2 v25.16B, v26.8H
|
||||
st1 {v24.D}[0], [x9], x1
|
||||
uaddw v26.8H, v29.8H, v22.8B
|
||||
uaddw2 v27.8H, v30.8H, v22.16B
|
||||
st1 {v24.D}[1], [x9], x1
|
||||
sqxtun v26.8B, v26.8H
|
||||
sqxtun2 v26.16B, v27.8H
|
||||
st1 {v25.D}[0], [x9], x1
|
||||
st1 {v25.D}[1], [x9], x1
|
||||
st1 {v26.D}[0], [x9], x1
|
||||
st1 {v26.D}[1], [x9], x1
|
||||
ld1 {v19.d}[0], [x0], x1
|
||||
zip1 v23.2d, v1.2d, v7.2d
|
||||
zip2 v24.2d, v1.2d, v7.2d
|
||||
ld1 {v19.d}[1], [x0], x1
|
||||
zip1 v25.2d, v2.2d, v16.2d
|
||||
zip2 v26.2d, v2.2d, v16.2d
|
||||
ld1 {v20.d}[0], [x0], x1
|
||||
zip1 v27.2d, v3.2d, v17.2d
|
||||
zip2 v28.2d, v3.2d, v17.2d
|
||||
ld1 {v20.d}[1], [x0], x1
|
||||
zip1 v29.2d, v4.2d, v18.2d
|
||||
zip2 v30.2d, v4.2d, v18.2d
|
||||
ld1 {v21.d}[0], [x0], x1
|
||||
uaddw v23.8h, v23.8h, v19.8b
|
||||
uaddw2 v24.8h, v24.8h, v19.16b
|
||||
ld1 {v21.d}[1], [x0], x1
|
||||
sqxtun v23.8b, v23.8h
|
||||
sqxtun2 v23.16b, v24.8h
|
||||
ld1 {v22.d}[0], [x0], x1
|
||||
uaddw v24.8h, v25.8h, v20.8b
|
||||
uaddw2 v25.8h, v26.8h, v20.16b
|
||||
ld1 {v22.d}[1], [x0], x1
|
||||
sqxtun v24.8b, v24.8h
|
||||
sqxtun2 v24.16b, v25.8h
|
||||
st1 {v23.d}[0], [x9], x1
|
||||
uaddw v25.8h, v27.8h, v21.8b
|
||||
uaddw2 v26.8h, v28.8h, v21.16b
|
||||
st1 {v23.d}[1], [x9], x1
|
||||
sqxtun v25.8b, v25.8h
|
||||
sqxtun2 v25.16b, v26.8h
|
||||
st1 {v24.d}[0], [x9], x1
|
||||
uaddw v26.8h, v29.8h, v22.8b
|
||||
uaddw2 v27.8h, v30.8h, v22.16b
|
||||
st1 {v24.d}[1], [x9], x1
|
||||
sqxtun v26.8b, v26.8h
|
||||
sqxtun2 v26.16b, v27.8h
|
||||
st1 {v25.d}[0], [x9], x1
|
||||
st1 {v25.d}[1], [x9], x1
|
||||
st1 {v26.d}[0], [x9], x1
|
||||
st1 {v26.d}[1], [x9], x1
|
||||
|
||||
idct_end
|
||||
endfunc
|
||||
@@ -333,30 +333,30 @@ function ff_simple_idct_neon, export=1
|
||||
sub x2, x2, #128
|
||||
bl idct_col4_neon1
|
||||
|
||||
sshr v1.8H, v7.8H, #COL_SHIFT-16
|
||||
sshr v2.8H, v16.8H, #COL_SHIFT-16
|
||||
sshr v3.8H, v17.8H, #COL_SHIFT-16
|
||||
sshr v4.8H, v18.8H, #COL_SHIFT-16
|
||||
sshr v1.8h, v7.8h, #COL_SHIFT-16
|
||||
sshr v2.8h, v16.8h, #COL_SHIFT-16
|
||||
sshr v3.8h, v17.8h, #COL_SHIFT-16
|
||||
sshr v4.8h, v18.8h, #COL_SHIFT-16
|
||||
|
||||
bl idct_col4_neon2
|
||||
|
||||
sshr v7.8H, v7.8H, #COL_SHIFT-16
|
||||
sshr v16.8H, v16.8H, #COL_SHIFT-16
|
||||
sshr v17.8H, v17.8H, #COL_SHIFT-16
|
||||
sshr v18.8H, v18.8H, #COL_SHIFT-16
|
||||
sshr v7.8h, v7.8h, #COL_SHIFT-16
|
||||
sshr v16.8h, v16.8h, #COL_SHIFT-16
|
||||
sshr v17.8h, v17.8h, #COL_SHIFT-16
|
||||
sshr v18.8h, v18.8h, #COL_SHIFT-16
|
||||
|
||||
zip1 v23.2D, v1.2D, v7.2D
|
||||
zip2 v24.2D, v1.2D, v7.2D
|
||||
st1 {v23.2D,v24.2D}, [x2], #32
|
||||
zip1 v25.2D, v2.2D, v16.2D
|
||||
zip2 v26.2D, v2.2D, v16.2D
|
||||
st1 {v25.2D,v26.2D}, [x2], #32
|
||||
zip1 v27.2D, v3.2D, v17.2D
|
||||
zip2 v28.2D, v3.2D, v17.2D
|
||||
st1 {v27.2D,v28.2D}, [x2], #32
|
||||
zip1 v29.2D, v4.2D, v18.2D
|
||||
zip2 v30.2D, v4.2D, v18.2D
|
||||
st1 {v29.2D,v30.2D}, [x2], #32
|
||||
zip1 v23.2d, v1.2d, v7.2d
|
||||
zip2 v24.2d, v1.2d, v7.2d
|
||||
st1 {v23.2d,v24.2d}, [x2], #32
|
||||
zip1 v25.2d, v2.2d, v16.2d
|
||||
zip2 v26.2d, v2.2d, v16.2d
|
||||
st1 {v25.2d,v26.2d}, [x2], #32
|
||||
zip1 v27.2d, v3.2d, v17.2d
|
||||
zip2 v28.2d, v3.2d, v17.2d
|
||||
st1 {v27.2d,v28.2d}, [x2], #32
|
||||
zip1 v29.2d, v4.2d, v18.2d
|
||||
zip2 v30.2d, v4.2d, v18.2d
|
||||
st1 {v29.2d,v30.2d}, [x2], #32
|
||||
|
||||
idct_end
|
||||
endfunc
|
||||
|
||||
+151
-151
@@ -330,32 +330,32 @@ endfunc
|
||||
// v17: hev
|
||||
|
||||
// convert to signed value:
|
||||
eor v3.16b, v3.16b, v21.16b // PS0 = P0 ^ 0x80
|
||||
eor v4.16b, v4.16b, v21.16b // QS0 = Q0 ^ 0x80
|
||||
eor v3.16b, v3.16b, v21.16b // PS0 = P0 ^ 0x80
|
||||
eor v4.16b, v4.16b, v21.16b // QS0 = Q0 ^ 0x80
|
||||
|
||||
movi v20.8h, #3
|
||||
ssubl v18.8h, v4.8b, v3.8b // QS0 - PS0
|
||||
ssubl2 v19.8h, v4.16b, v3.16b // (widened to 16bit)
|
||||
eor v2.16b, v2.16b, v21.16b // PS1 = P1 ^ 0x80
|
||||
eor v5.16b, v5.16b, v21.16b // QS1 = Q1 ^ 0x80
|
||||
mul v18.8h, v18.8h, v20.8h // w = 3 * (QS0 - PS0)
|
||||
mul v19.8h, v19.8h, v20.8h
|
||||
movi v20.8h, #3
|
||||
ssubl v18.8h, v4.8b, v3.8b // QS0 - PS0
|
||||
ssubl2 v19.8h, v4.16b, v3.16b // (widened to 16bit)
|
||||
eor v2.16b, v2.16b, v21.16b // PS1 = P1 ^ 0x80
|
||||
eor v5.16b, v5.16b, v21.16b // QS1 = Q1 ^ 0x80
|
||||
mul v18.8h, v18.8h, v20.8h // w = 3 * (QS0 - PS0)
|
||||
mul v19.8h, v19.8h, v20.8h
|
||||
|
||||
sqsub v20.16b, v2.16b, v5.16b // clamp(PS1-QS1)
|
||||
movi v22.16b, #4
|
||||
movi v23.16b, #3
|
||||
sqsub v20.16b, v2.16b, v5.16b // clamp(PS1-QS1)
|
||||
movi v22.16b, #4
|
||||
movi v23.16b, #3
|
||||
.if \inner
|
||||
and v20.16b, v20.16b, v17.16b // if(hev) w += clamp(PS1-QS1)
|
||||
and v20.16b, v20.16b, v17.16b // if(hev) w += clamp(PS1-QS1)
|
||||
.endif
|
||||
saddw v18.8h, v18.8h, v20.8b // w += clamp(PS1-QS1)
|
||||
saddw2 v19.8h, v19.8h, v20.16b
|
||||
sqxtn v18.8b, v18.8h // narrow result back into v18
|
||||
sqxtn2 v18.16b, v19.8h
|
||||
saddw v18.8h, v18.8h, v20.8b // w += clamp(PS1-QS1)
|
||||
saddw2 v19.8h, v19.8h, v20.16b
|
||||
sqxtn v18.8b, v18.8h // narrow result back into v18
|
||||
sqxtn2 v18.16b, v19.8h
|
||||
.if !\inner && !\simple
|
||||
eor v1.16b, v1.16b, v21.16b // PS2 = P2 ^ 0x80
|
||||
eor v6.16b, v6.16b, v21.16b // QS2 = Q2 ^ 0x80
|
||||
eor v1.16b, v1.16b, v21.16b // PS2 = P2 ^ 0x80
|
||||
eor v6.16b, v6.16b, v21.16b // QS2 = Q2 ^ 0x80
|
||||
.endif
|
||||
and v18.16b, v18.16b, v16.16b // w &= normal_limit
|
||||
and v18.16b, v18.16b, v16.16b // w &= normal_limit
|
||||
|
||||
// registers used at this point..
|
||||
// v0 -> P3 (don't corrupt)
|
||||
@@ -375,44 +375,44 @@ endfunc
|
||||
// P0 = s2u(PS0 + c2);
|
||||
|
||||
.if \simple
|
||||
sqadd v19.16b, v18.16b, v22.16b // c1 = clamp((w&hev)+4)
|
||||
sqadd v20.16b, v18.16b, v23.16b // c2 = clamp((w&hev)+3)
|
||||
sshr v19.16b, v19.16b, #3 // c1 >>= 3
|
||||
sshr v20.16b, v20.16b, #3 // c2 >>= 3
|
||||
sqsub v4.16b, v4.16b, v19.16b // QS0 = clamp(QS0-c1)
|
||||
sqadd v3.16b, v3.16b, v20.16b // PS0 = clamp(PS0+c2)
|
||||
eor v4.16b, v4.16b, v21.16b // Q0 = QS0 ^ 0x80
|
||||
eor v3.16b, v3.16b, v21.16b // P0 = PS0 ^ 0x80
|
||||
eor v5.16b, v5.16b, v21.16b // Q1 = QS1 ^ 0x80
|
||||
eor v2.16b, v2.16b, v21.16b // P1 = PS1 ^ 0x80
|
||||
sqadd v19.16b, v18.16b, v22.16b // c1 = clamp((w&hev)+4)
|
||||
sqadd v20.16b, v18.16b, v23.16b // c2 = clamp((w&hev)+3)
|
||||
sshr v19.16b, v19.16b, #3 // c1 >>= 3
|
||||
sshr v20.16b, v20.16b, #3 // c2 >>= 3
|
||||
sqsub v4.16b, v4.16b, v19.16b // QS0 = clamp(QS0-c1)
|
||||
sqadd v3.16b, v3.16b, v20.16b // PS0 = clamp(PS0+c2)
|
||||
eor v4.16b, v4.16b, v21.16b // Q0 = QS0 ^ 0x80
|
||||
eor v3.16b, v3.16b, v21.16b // P0 = PS0 ^ 0x80
|
||||
eor v5.16b, v5.16b, v21.16b // Q1 = QS1 ^ 0x80
|
||||
eor v2.16b, v2.16b, v21.16b // P1 = PS1 ^ 0x80
|
||||
.elseif \inner
|
||||
// the !is4tap case of filter_common, only used for inner blocks
|
||||
// c3 = ((c1&~hev) + 1) >> 1;
|
||||
// Q1 = s2u(QS1 - c3);
|
||||
// P1 = s2u(PS1 + c3);
|
||||
sqadd v19.16b, v18.16b, v22.16b // c1 = clamp((w&hev)+4)
|
||||
sqadd v20.16b, v18.16b, v23.16b // c2 = clamp((w&hev)+3)
|
||||
sshr v19.16b, v19.16b, #3 // c1 >>= 3
|
||||
sshr v20.16b, v20.16b, #3 // c2 >>= 3
|
||||
sqsub v4.16b, v4.16b, v19.16b // QS0 = clamp(QS0-c1)
|
||||
sqadd v3.16b, v3.16b, v20.16b // PS0 = clamp(PS0+c2)
|
||||
bic v19.16b, v19.16b, v17.16b // c1 & ~hev
|
||||
eor v4.16b, v4.16b, v21.16b // Q0 = QS0 ^ 0x80
|
||||
srshr v19.16b, v19.16b, #1 // c3 >>= 1
|
||||
eor v3.16b, v3.16b, v21.16b // P0 = PS0 ^ 0x80
|
||||
sqsub v5.16b, v5.16b, v19.16b // QS1 = clamp(QS1-c3)
|
||||
sqadd v2.16b, v2.16b, v19.16b // PS1 = clamp(PS1+c3)
|
||||
eor v5.16b, v5.16b, v21.16b // Q1 = QS1 ^ 0x80
|
||||
eor v2.16b, v2.16b, v21.16b // P1 = PS1 ^ 0x80
|
||||
sqadd v19.16b, v18.16b, v22.16b // c1 = clamp((w&hev)+4)
|
||||
sqadd v20.16b, v18.16b, v23.16b // c2 = clamp((w&hev)+3)
|
||||
sshr v19.16b, v19.16b, #3 // c1 >>= 3
|
||||
sshr v20.16b, v20.16b, #3 // c2 >>= 3
|
||||
sqsub v4.16b, v4.16b, v19.16b // QS0 = clamp(QS0-c1)
|
||||
sqadd v3.16b, v3.16b, v20.16b // PS0 = clamp(PS0+c2)
|
||||
bic v19.16b, v19.16b, v17.16b // c1 & ~hev
|
||||
eor v4.16b, v4.16b, v21.16b // Q0 = QS0 ^ 0x80
|
||||
srshr v19.16b, v19.16b, #1 // c3 >>= 1
|
||||
eor v3.16b, v3.16b, v21.16b // P0 = PS0 ^ 0x80
|
||||
sqsub v5.16b, v5.16b, v19.16b // QS1 = clamp(QS1-c3)
|
||||
sqadd v2.16b, v2.16b, v19.16b // PS1 = clamp(PS1+c3)
|
||||
eor v5.16b, v5.16b, v21.16b // Q1 = QS1 ^ 0x80
|
||||
eor v2.16b, v2.16b, v21.16b // P1 = PS1 ^ 0x80
|
||||
.else
|
||||
and v20.16b, v18.16b, v17.16b // w & hev
|
||||
sqadd v19.16b, v20.16b, v22.16b // c1 = clamp((w&hev)+4)
|
||||
sqadd v20.16b, v20.16b, v23.16b // c2 = clamp((w&hev)+3)
|
||||
sshr v19.16b, v19.16b, #3 // c1 >>= 3
|
||||
sshr v20.16b, v20.16b, #3 // c2 >>= 3
|
||||
bic v18.16b, v18.16b, v17.16b // w &= ~hev
|
||||
sqsub v4.16b, v4.16b, v19.16b // QS0 = clamp(QS0-c1)
|
||||
sqadd v3.16b, v3.16b, v20.16b // PS0 = clamp(PS0+c2)
|
||||
and v20.16b, v18.16b, v17.16b // w & hev
|
||||
sqadd v19.16b, v20.16b, v22.16b // c1 = clamp((w&hev)+4)
|
||||
sqadd v20.16b, v20.16b, v23.16b // c2 = clamp((w&hev)+3)
|
||||
sshr v19.16b, v19.16b, #3 // c1 >>= 3
|
||||
sshr v20.16b, v20.16b, #3 // c2 >>= 3
|
||||
bic v18.16b, v18.16b, v17.16b // w &= ~hev
|
||||
sqsub v4.16b, v4.16b, v19.16b // QS0 = clamp(QS0-c1)
|
||||
sqadd v3.16b, v3.16b, v20.16b // PS0 = clamp(PS0+c2)
|
||||
|
||||
// filter_mbedge:
|
||||
// a = clamp((27*w + 63) >> 7);
|
||||
@@ -424,35 +424,35 @@ endfunc
|
||||
// a = clamp((9*w + 63) >> 7);
|
||||
// Q2 = s2u(QS2 - a);
|
||||
// P2 = s2u(PS2 + a);
|
||||
movi v17.8h, #63
|
||||
sshll v22.8h, v18.8b, #3
|
||||
sshll2 v23.8h, v18.16b, #3
|
||||
saddw v22.8h, v22.8h, v18.8b
|
||||
saddw2 v23.8h, v23.8h, v18.16b
|
||||
add v16.8h, v17.8h, v22.8h
|
||||
add v17.8h, v17.8h, v23.8h // 9*w + 63
|
||||
add v19.8h, v16.8h, v22.8h
|
||||
add v20.8h, v17.8h, v23.8h // 18*w + 63
|
||||
add v22.8h, v19.8h, v22.8h
|
||||
add v23.8h, v20.8h, v23.8h // 27*w + 63
|
||||
sqshrn v16.8b, v16.8h, #7
|
||||
sqshrn2 v16.16b, v17.8h, #7 // clamp(( 9*w + 63)>>7)
|
||||
sqshrn v19.8b, v19.8h, #7
|
||||
sqshrn2 v19.16b, v20.8h, #7 // clamp((18*w + 63)>>7)
|
||||
sqshrn v22.8b, v22.8h, #7
|
||||
sqshrn2 v22.16b, v23.8h, #7 // clamp((27*w + 63)>>7)
|
||||
sqadd v1.16b, v1.16b, v16.16b // PS2 = clamp(PS2+a)
|
||||
sqsub v6.16b, v6.16b, v16.16b // QS2 = clamp(QS2-a)
|
||||
sqadd v2.16b, v2.16b, v19.16b // PS1 = clamp(PS1+a)
|
||||
sqsub v5.16b, v5.16b, v19.16b // QS1 = clamp(QS1-a)
|
||||
sqadd v3.16b, v3.16b, v22.16b // PS0 = clamp(PS0+a)
|
||||
sqsub v4.16b, v4.16b, v22.16b // QS0 = clamp(QS0-a)
|
||||
eor v3.16b, v3.16b, v21.16b // P0 = PS0 ^ 0x80
|
||||
eor v4.16b, v4.16b, v21.16b // Q0 = QS0 ^ 0x80
|
||||
eor v2.16b, v2.16b, v21.16b // P1 = PS1 ^ 0x80
|
||||
eor v5.16b, v5.16b, v21.16b // Q1 = QS1 ^ 0x80
|
||||
eor v1.16b, v1.16b, v21.16b // P2 = PS2 ^ 0x80
|
||||
eor v6.16b, v6.16b, v21.16b // Q2 = QS2 ^ 0x80
|
||||
movi v17.8h, #63
|
||||
sshll v22.8h, v18.8b, #3
|
||||
sshll2 v23.8h, v18.16b, #3
|
||||
saddw v22.8h, v22.8h, v18.8b
|
||||
saddw2 v23.8h, v23.8h, v18.16b
|
||||
add v16.8h, v17.8h, v22.8h
|
||||
add v17.8h, v17.8h, v23.8h // 9*w + 63
|
||||
add v19.8h, v16.8h, v22.8h
|
||||
add v20.8h, v17.8h, v23.8h // 18*w + 63
|
||||
add v22.8h, v19.8h, v22.8h
|
||||
add v23.8h, v20.8h, v23.8h // 27*w + 63
|
||||
sqshrn v16.8b, v16.8h, #7
|
||||
sqshrn2 v16.16b, v17.8h, #7 // clamp(( 9*w + 63)>>7)
|
||||
sqshrn v19.8b, v19.8h, #7
|
||||
sqshrn2 v19.16b, v20.8h, #7 // clamp((18*w + 63)>>7)
|
||||
sqshrn v22.8b, v22.8h, #7
|
||||
sqshrn2 v22.16b, v23.8h, #7 // clamp((27*w + 63)>>7)
|
||||
sqadd v1.16b, v1.16b, v16.16b // PS2 = clamp(PS2+a)
|
||||
sqsub v6.16b, v6.16b, v16.16b // QS2 = clamp(QS2-a)
|
||||
sqadd v2.16b, v2.16b, v19.16b // PS1 = clamp(PS1+a)
|
||||
sqsub v5.16b, v5.16b, v19.16b // QS1 = clamp(QS1-a)
|
||||
sqadd v3.16b, v3.16b, v22.16b // PS0 = clamp(PS0+a)
|
||||
sqsub v4.16b, v4.16b, v22.16b // QS0 = clamp(QS0-a)
|
||||
eor v3.16b, v3.16b, v21.16b // P0 = PS0 ^ 0x80
|
||||
eor v4.16b, v4.16b, v21.16b // Q0 = QS0 ^ 0x80
|
||||
eor v2.16b, v2.16b, v21.16b // P1 = PS1 ^ 0x80
|
||||
eor v5.16b, v5.16b, v21.16b // Q1 = QS1 ^ 0x80
|
||||
eor v1.16b, v1.16b, v21.16b // P2 = PS2 ^ 0x80
|
||||
eor v6.16b, v6.16b, v21.16b // Q2 = QS2 ^ 0x80
|
||||
.endif
|
||||
.endm
|
||||
|
||||
@@ -507,48 +507,48 @@ function ff_vp8_v_loop_filter8uv\name\()_neon, export=1
|
||||
sub x0, x0, x2, lsl #2
|
||||
sub x1, x1, x2, lsl #2
|
||||
// Load pixels:
|
||||
ld1 {v0.d}[0], [x0], x2 // P3
|
||||
ld1 {v0.d}[1], [x1], x2 // P3
|
||||
ld1 {v1.d}[0], [x0], x2 // P2
|
||||
ld1 {v1.d}[1], [x1], x2 // P2
|
||||
ld1 {v2.d}[0], [x0], x2 // P1
|
||||
ld1 {v2.d}[1], [x1], x2 // P1
|
||||
ld1 {v3.d}[0], [x0], x2 // P0
|
||||
ld1 {v3.d}[1], [x1], x2 // P0
|
||||
ld1 {v4.d}[0], [x0], x2 // Q0
|
||||
ld1 {v4.d}[1], [x1], x2 // Q0
|
||||
ld1 {v5.d}[0], [x0], x2 // Q1
|
||||
ld1 {v5.d}[1], [x1], x2 // Q1
|
||||
ld1 {v6.d}[0], [x0], x2 // Q2
|
||||
ld1 {v6.d}[1], [x1], x2 // Q2
|
||||
ld1 {v7.d}[0], [x0] // Q3
|
||||
ld1 {v7.d}[1], [x1] // Q3
|
||||
ld1 {v0.d}[0], [x0], x2 // P3
|
||||
ld1 {v0.d}[1], [x1], x2 // P3
|
||||
ld1 {v1.d}[0], [x0], x2 // P2
|
||||
ld1 {v1.d}[1], [x1], x2 // P2
|
||||
ld1 {v2.d}[0], [x0], x2 // P1
|
||||
ld1 {v2.d}[1], [x1], x2 // P1
|
||||
ld1 {v3.d}[0], [x0], x2 // P0
|
||||
ld1 {v3.d}[1], [x1], x2 // P0
|
||||
ld1 {v4.d}[0], [x0], x2 // Q0
|
||||
ld1 {v4.d}[1], [x1], x2 // Q0
|
||||
ld1 {v5.d}[0], [x0], x2 // Q1
|
||||
ld1 {v5.d}[1], [x1], x2 // Q1
|
||||
ld1 {v6.d}[0], [x0], x2 // Q2
|
||||
ld1 {v6.d}[1], [x1], x2 // Q2
|
||||
ld1 {v7.d}[0], [x0] // Q3
|
||||
ld1 {v7.d}[1], [x1] // Q3
|
||||
|
||||
dup v22.16b, w3 // flim_E
|
||||
dup v23.16b, w4 // flim_I
|
||||
dup v22.16b, w3 // flim_E
|
||||
dup v23.16b, w4 // flim_I
|
||||
|
||||
vp8_loop_filter inner=\inner, hev_thresh=w5
|
||||
|
||||
// back up to P2: u,v -= stride * 6
|
||||
sub x0, x0, x2, lsl #2
|
||||
sub x1, x1, x2, lsl #2
|
||||
sub x0, x0, x2, lsl #1
|
||||
sub x1, x1, x2, lsl #1
|
||||
sub x0, x0, x2, lsl #2
|
||||
sub x1, x1, x2, lsl #2
|
||||
sub x0, x0, x2, lsl #1
|
||||
sub x1, x1, x2, lsl #1
|
||||
|
||||
// Store pixels:
|
||||
|
||||
st1 {v1.d}[0], [x0], x2 // P2
|
||||
st1 {v1.d}[1], [x1], x2 // P2
|
||||
st1 {v2.d}[0], [x0], x2 // P1
|
||||
st1 {v2.d}[1], [x1], x2 // P1
|
||||
st1 {v3.d}[0], [x0], x2 // P0
|
||||
st1 {v3.d}[1], [x1], x2 // P0
|
||||
st1 {v4.d}[0], [x0], x2 // Q0
|
||||
st1 {v4.d}[1], [x1], x2 // Q0
|
||||
st1 {v5.d}[0], [x0], x2 // Q1
|
||||
st1 {v5.d}[1], [x1], x2 // Q1
|
||||
st1 {v6.d}[0], [x0] // Q2
|
||||
st1 {v6.d}[1], [x1] // Q2
|
||||
st1 {v1.d}[0], [x0], x2 // P2
|
||||
st1 {v1.d}[1], [x1], x2 // P2
|
||||
st1 {v2.d}[0], [x0], x2 // P1
|
||||
st1 {v2.d}[1], [x1], x2 // P1
|
||||
st1 {v3.d}[0], [x0], x2 // P0
|
||||
st1 {v3.d}[1], [x1], x2 // P0
|
||||
st1 {v4.d}[0], [x0], x2 // Q0
|
||||
st1 {v4.d}[1], [x1], x2 // Q0
|
||||
st1 {v5.d}[0], [x0], x2 // Q1
|
||||
st1 {v5.d}[1], [x1], x2 // Q1
|
||||
st1 {v6.d}[0], [x0] // Q2
|
||||
st1 {v6.d}[1], [x1] // Q2
|
||||
|
||||
ret
|
||||
endfunc
|
||||
@@ -579,7 +579,7 @@ function ff_vp8_h_loop_filter16\name\()_neon, export=1
|
||||
ld1 {v6.d}[1], [x0], x1
|
||||
ld1 {v7.d}[1], [x0], x1
|
||||
|
||||
transpose_8x16B v0, v1, v2, v3, v4, v5, v6, v7, v30, v31
|
||||
transpose_8x16B v0, v1, v2, v3, v4, v5, v6, v7, v30, v31
|
||||
|
||||
dup v22.16b, w2 // flim_E
|
||||
.if !\simple
|
||||
@@ -590,7 +590,7 @@ function ff_vp8_h_loop_filter16\name\()_neon, export=1
|
||||
|
||||
sub x0, x0, x1, lsl #4 // backup 16 rows
|
||||
|
||||
transpose_8x16B v0, v1, v2, v3, v4, v5, v6, v7, v30, v31
|
||||
transpose_8x16B v0, v1, v2, v3, v4, v5, v6, v7, v30, v31
|
||||
|
||||
// Store pixels:
|
||||
st1 {v0.d}[0], [x0], x1
|
||||
@@ -624,24 +624,24 @@ function ff_vp8_h_loop_filter8uv\name\()_neon, export=1
|
||||
sub x1, x1, #4
|
||||
|
||||
// Load pixels:
|
||||
ld1 {v0.d}[0], [x0], x2 // load u
|
||||
ld1 {v0.d}[1], [x1], x2 // load v
|
||||
ld1 {v1.d}[0], [x0], x2
|
||||
ld1 {v1.d}[1], [x1], x2
|
||||
ld1 {v2.d}[0], [x0], x2
|
||||
ld1 {v2.d}[1], [x1], x2
|
||||
ld1 {v3.d}[0], [x0], x2
|
||||
ld1 {v3.d}[1], [x1], x2
|
||||
ld1 {v4.d}[0], [x0], x2
|
||||
ld1 {v4.d}[1], [x1], x2
|
||||
ld1 {v5.d}[0], [x0], x2
|
||||
ld1 {v5.d}[1], [x1], x2
|
||||
ld1 {v6.d}[0], [x0], x2
|
||||
ld1 {v6.d}[1], [x1], x2
|
||||
ld1 {v7.d}[0], [x0], x2
|
||||
ld1 {v7.d}[1], [x1], x2
|
||||
ld1 {v0.d}[0], [x0], x2 // load u
|
||||
ld1 {v0.d}[1], [x1], x2 // load v
|
||||
ld1 {v1.d}[0], [x0], x2
|
||||
ld1 {v1.d}[1], [x1], x2
|
||||
ld1 {v2.d}[0], [x0], x2
|
||||
ld1 {v2.d}[1], [x1], x2
|
||||
ld1 {v3.d}[0], [x0], x2
|
||||
ld1 {v3.d}[1], [x1], x2
|
||||
ld1 {v4.d}[0], [x0], x2
|
||||
ld1 {v4.d}[1], [x1], x2
|
||||
ld1 {v5.d}[0], [x0], x2
|
||||
ld1 {v5.d}[1], [x1], x2
|
||||
ld1 {v6.d}[0], [x0], x2
|
||||
ld1 {v6.d}[1], [x1], x2
|
||||
ld1 {v7.d}[0], [x0], x2
|
||||
ld1 {v7.d}[1], [x1], x2
|
||||
|
||||
transpose_8x16B v0, v1, v2, v3, v4, v5, v6, v7, v30, v31
|
||||
transpose_8x16B v0, v1, v2, v3, v4, v5, v6, v7, v30, v31
|
||||
|
||||
dup v22.16b, w3 // flim_E
|
||||
dup v23.16b, w4 // flim_I
|
||||
@@ -651,25 +651,25 @@ function ff_vp8_h_loop_filter8uv\name\()_neon, export=1
|
||||
sub x0, x0, x2, lsl #3 // backup u 8 rows
|
||||
sub x1, x1, x2, lsl #3 // backup v 8 rows
|
||||
|
||||
transpose_8x16B v0, v1, v2, v3, v4, v5, v6, v7, v30, v31
|
||||
transpose_8x16B v0, v1, v2, v3, v4, v5, v6, v7, v30, v31
|
||||
|
||||
// Store pixels:
|
||||
st1 {v0.d}[0], [x0], x2 // load u
|
||||
st1 {v0.d}[1], [x1], x2 // load v
|
||||
st1 {v1.d}[0], [x0], x2
|
||||
st1 {v1.d}[1], [x1], x2
|
||||
st1 {v2.d}[0], [x0], x2
|
||||
st1 {v2.d}[1], [x1], x2
|
||||
st1 {v3.d}[0], [x0], x2
|
||||
st1 {v3.d}[1], [x1], x2
|
||||
st1 {v4.d}[0], [x0], x2
|
||||
st1 {v4.d}[1], [x1], x2
|
||||
st1 {v5.d}[0], [x0], x2
|
||||
st1 {v5.d}[1], [x1], x2
|
||||
st1 {v6.d}[0], [x0], x2
|
||||
st1 {v6.d}[1], [x1], x2
|
||||
st1 {v7.d}[0], [x0]
|
||||
st1 {v7.d}[1], [x1]
|
||||
st1 {v0.d}[0], [x0], x2 // load u
|
||||
st1 {v0.d}[1], [x1], x2 // load v
|
||||
st1 {v1.d}[0], [x0], x2
|
||||
st1 {v1.d}[1], [x1], x2
|
||||
st1 {v2.d}[0], [x0], x2
|
||||
st1 {v2.d}[1], [x1], x2
|
||||
st1 {v3.d}[0], [x0], x2
|
||||
st1 {v3.d}[1], [x1], x2
|
||||
st1 {v4.d}[0], [x0], x2
|
||||
st1 {v4.d}[1], [x1], x2
|
||||
st1 {v5.d}[0], [x0], x2
|
||||
st1 {v5.d}[1], [x1], x2
|
||||
st1 {v6.d}[0], [x0], x2
|
||||
st1 {v6.d}[1], [x1], x2
|
||||
st1 {v7.d}[0], [x0]
|
||||
st1 {v7.d}[1], [x1]
|
||||
|
||||
ret
|
||||
|
||||
|
||||
@@ -230,6 +230,9 @@ function \type\()_8tap_\size\()h_\idx1\idx2
|
||||
// reduced dst stride
|
||||
.if \size >= 16
|
||||
sub x1, x1, x5
|
||||
.elseif \size == 4
|
||||
add x12, x2, #8
|
||||
add x13, x7, #8
|
||||
.endif
|
||||
// size >= 16 loads two qwords and increments x2,
|
||||
// for size 4/8 it's enough with one qword and no
|
||||
@@ -248,9 +251,14 @@ function \type\()_8tap_\size\()h_\idx1\idx2
|
||||
.if \size >= 16
|
||||
ld1 {v4.8b, v5.8b, v6.8b}, [x2], #24
|
||||
ld1 {v16.8b, v17.8b, v18.8b}, [x7], #24
|
||||
.else
|
||||
.elseif \size == 8
|
||||
ld1 {v4.8b, v5.8b}, [x2]
|
||||
ld1 {v16.8b, v17.8b}, [x7]
|
||||
.else // \size == 4
|
||||
ld1 {v4.8b}, [x2]
|
||||
ld1 {v16.8b}, [x7]
|
||||
ld1 {v5.s}[0], [x12], x3
|
||||
ld1 {v17.s}[0], [x13], x3
|
||||
.endif
|
||||
uxtl v4.8h, v4.8b
|
||||
uxtl v5.8h, v5.8b
|
||||
|
||||
+1
-2
@@ -470,8 +470,7 @@ static av_cold int aic_decode_init(AVCodecContext *avctx)
|
||||
}
|
||||
}
|
||||
|
||||
ctx->slice_data = av_malloc_array(ctx->slice_width, AIC_BAND_COEFFS
|
||||
* sizeof(*ctx->slice_data));
|
||||
ctx->slice_data = av_calloc(ctx->slice_width, AIC_BAND_COEFFS * sizeof(*ctx->slice_data));
|
||||
if (!ctx->slice_data) {
|
||||
av_log(avctx, AV_LOG_ERROR, "Error allocating slice buffer\n");
|
||||
|
||||
|
||||
+6
-2
@@ -2116,8 +2116,8 @@ static av_cold int decode_init(AVCodecContext *avctx)
|
||||
ctx->nbits = av_malloc_array(ctx->cur_frame_length, sizeof(*ctx->nbits));
|
||||
ctx->mlz = av_mallocz(sizeof(*ctx->mlz));
|
||||
|
||||
if (!ctx->mlz || !ctx->acf || !ctx->shift_value || !ctx->last_shift_value
|
||||
|| !ctx->last_acf_mantissa || !ctx->raw_mantissa) {
|
||||
if (!ctx->larray || !ctx->nbits || !ctx->mlz || !ctx->acf || !ctx->shift_value
|
||||
|| !ctx->last_shift_value || !ctx->last_acf_mantissa || !ctx->raw_mantissa) {
|
||||
av_log(avctx, AV_LOG_ERROR, "Allocating buffer memory failed.\n");
|
||||
ret = AVERROR(ENOMEM);
|
||||
goto fail;
|
||||
@@ -2128,6 +2128,10 @@ static av_cold int decode_init(AVCodecContext *avctx)
|
||||
|
||||
for (c = 0; c < avctx->channels; ++c) {
|
||||
ctx->raw_mantissa[c] = av_mallocz_array(ctx->cur_frame_length, sizeof(**ctx->raw_mantissa));
|
||||
if (!ctx->raw_mantissa[c]) {
|
||||
av_log(avctx, AV_LOG_ERROR, "Allocating buffer memory failed.\n");
|
||||
return AVERROR(ENOMEM);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -48,4 +48,3 @@ function ff_scalarproduct_int16_neon, export=1
|
||||
vmov.32 r0, d3[0]
|
||||
bx lr
|
||||
endfunc
|
||||
|
||||
|
||||
@@ -229,7 +229,7 @@ A .endif
|
||||
.endif
|
||||
|
||||
// Begin loop
|
||||
01:
|
||||
1:
|
||||
.if TOTAL_TAPS == 0
|
||||
// Things simplify a lot in this case
|
||||
// In fact this could be pipelined further if it's worth it...
|
||||
@@ -241,7 +241,7 @@ A .endif
|
||||
str ST0, [PST, #-4]!
|
||||
str ST0, [PST, #4 * (MAX_BLOCKSIZE + MAX_FIR_ORDER)]
|
||||
str ST0, [PSAMP], #4 * MAX_CHANNELS
|
||||
bne 01b
|
||||
bne 1b
|
||||
.else
|
||||
.if \fir_taps & 1
|
||||
.set LOAD_REG, 1
|
||||
@@ -333,7 +333,7 @@ T orr AC0, AC0, AC1
|
||||
str ST3, [PST, #-4]!
|
||||
str ST2, [PST, #4 * (MAX_BLOCKSIZE + MAX_FIR_ORDER)]
|
||||
str ST3, [PSAMP], #4 * MAX_CHANNELS
|
||||
bne 01b
|
||||
bne 1b
|
||||
.endif
|
||||
b 99f
|
||||
|
||||
|
||||
@@ -279,11 +279,13 @@ function \type\()_8tap_\size\()h_\idx1\idx2
|
||||
sub r1, r1, r5
|
||||
.endif
|
||||
@ size >= 16 loads two qwords and increments r2,
|
||||
@ for size 4/8 it's enough with one qword and no
|
||||
@ postincrement
|
||||
@ size 4 loads 1 d word, increments r2 and loads 1 32-bit lane
|
||||
@ for size 8 it's enough with one qword and no postincrement
|
||||
.if \size >= 16
|
||||
sub r3, r3, r5
|
||||
sub r3, r3, #8
|
||||
.elseif \size == 4
|
||||
sub r3, r3, #8
|
||||
.endif
|
||||
@ Load the filter vector
|
||||
vld1.16 {q0}, [r12,:128]
|
||||
@@ -295,9 +297,14 @@ function \type\()_8tap_\size\()h_\idx1\idx2
|
||||
.if \size >= 16
|
||||
vld1.8 {d18, d19, d20}, [r2]!
|
||||
vld1.8 {d24, d25, d26}, [r7]!
|
||||
.else
|
||||
.elseif \size == 8
|
||||
vld1.8 {q9}, [r2]
|
||||
vld1.8 {q12}, [r7]
|
||||
.else @ size == 4
|
||||
vld1.8 {d18}, [r2]!
|
||||
vld1.8 {d24}, [r7]!
|
||||
vld1.32 {d19[0]}, [r2]
|
||||
vld1.32 {d25[0]}, [r7]
|
||||
.endif
|
||||
vmovl.u8 q8, d18
|
||||
vmovl.u8 q9, d19
|
||||
|
||||
@@ -1294,6 +1294,10 @@ typedef struct AVCodecContext {
|
||||
* this callback and filled with the extra buffers if there are more
|
||||
* buffers than buf[] can hold. extended_buf will be freed in
|
||||
* av_frame_unref().
|
||||
* Decoders will generally initialize the whole buffer before it is output
|
||||
* but it can in rare error conditions happen that uninitialized data is passed
|
||||
* through. \important The buffers returned by get_buffer* should thus not contain sensitive
|
||||
* data.
|
||||
*
|
||||
* If AV_CODEC_CAP_DR1 is not set then get_buffer2() must call
|
||||
* avcodec_default_get_buffer2() instead of providing buffers allocated by
|
||||
|
||||
+13
-2
@@ -36,7 +36,7 @@ static int cbs_av1_read_uvlc(CodedBitstreamContext *ctx, GetBitContext *gbc,
|
||||
position = get_bits_count(gbc);
|
||||
|
||||
zeroes = 0;
|
||||
while (1) {
|
||||
while (zeroes < 32) {
|
||||
if (get_bits_left(gbc) < 1) {
|
||||
av_log(ctx->log_ctx, AV_LOG_ERROR, "Invalid uvlc code at "
|
||||
"%s: bitstream ended.\n", name);
|
||||
@@ -49,7 +49,18 @@ static int cbs_av1_read_uvlc(CodedBitstreamContext *ctx, GetBitContext *gbc,
|
||||
}
|
||||
|
||||
if (zeroes >= 32) {
|
||||
value = MAX_UINT_BITS(32);
|
||||
// The spec allows at least thirty-two zero bits followed by a
|
||||
// one to mean 2^32-1, with no constraint on the number of
|
||||
// zeroes. The libaom reference decoder does not match this,
|
||||
// instead reading thirty-two zeroes but not the following one
|
||||
// to mean 2^32-1. These two interpretations are incompatible
|
||||
// and other implementations may follow one or the other.
|
||||
// Therefore we reject thirty-two zeroes because the intended
|
||||
// behaviour is not clear.
|
||||
av_log(ctx->log_ctx, AV_LOG_ERROR, "Thirty-two zero bits in "
|
||||
"%s uvlc code: considered invalid due to conflicting "
|
||||
"standard and reference decoder behaviour.\n", name);
|
||||
return AVERROR_INVALIDDATA;
|
||||
} else {
|
||||
if (get_bits_left(gbc) < zeroes) {
|
||||
av_log(ctx->log_ctx, AV_LOG_ERROR, "Invalid uvlc code at "
|
||||
|
||||
@@ -166,13 +166,13 @@ static int cbs_jpeg_split_fragment(CodedBitstreamContext *ctx,
|
||||
}
|
||||
} else {
|
||||
i = start;
|
||||
if (i + 2 > frag->data_size) {
|
||||
if (i > frag->data_size - 2) {
|
||||
av_log(ctx->log_ctx, AV_LOG_ERROR, "Invalid JPEG image: "
|
||||
"truncated at %02x marker.\n", marker);
|
||||
return AVERROR_INVALIDDATA;
|
||||
}
|
||||
length = AV_RB16(frag->data + i);
|
||||
if (i + length > frag->data_size) {
|
||||
if (length > frag->data_size - i) {
|
||||
av_log(ctx->log_ctx, AV_LOG_ERROR, "Invalid JPEG image: "
|
||||
"truncated at %02x marker segment.\n", marker);
|
||||
return AVERROR_INVALIDDATA;
|
||||
|
||||
@@ -422,7 +422,7 @@ static int cbs_vp9_split_fragment(CodedBitstreamContext *ctx,
|
||||
superframe_header = frag->data[frag->data_size - 1];
|
||||
|
||||
if ((superframe_header & 0xe0) == 0xc0) {
|
||||
VP9RawSuperframeIndex sfi;
|
||||
VP9RawSuperframeIndex sfi = {0};
|
||||
GetBitContext gbc;
|
||||
size_t index_size, pos;
|
||||
int i;
|
||||
|
||||
@@ -91,4 +91,3 @@ AVCodec ff_cljr_decoder = {
|
||||
.decode = decode_frame,
|
||||
.capabilities = AV_CODEC_CAP_DR1,
|
||||
};
|
||||
|
||||
|
||||
@@ -195,7 +195,7 @@ static void dequant_subband_ ## PX ## _c(uint8_t *src, uint8_t *dst, ptrdiff_t s
|
||||
{ \
|
||||
int i, y; \
|
||||
for (y = 0; y < tot_v; y++) { \
|
||||
PX c, sign, *src_r = (PX *)src, *dst_r = (PX *)dst; \
|
||||
PX c, *src_r = (PX *)src, *dst_r = (PX *)dst; \
|
||||
for (i = 0; i < tot_h; i++) { \
|
||||
c = *src_r++; \
|
||||
if (c < 0) c = -((-(unsigned)c*qf + qs) >> 2); \
|
||||
|
||||
@@ -259,4 +259,3 @@ av_cold int ff_dvvideo_init(AVCodecContext *avctx)
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
@@ -338,4 +338,3 @@ const AVDVProfile *av_dv_codec_profile2(int width, int height,
|
||||
|
||||
return p;
|
||||
}
|
||||
|
||||
|
||||
+1
-1
@@ -440,7 +440,7 @@ static int get_opcodes(GetByteContext *gb, uint32_t *table, uint8_t *dst, int op
|
||||
|
||||
size_in_bits = bytestream2_get_le32(gb);
|
||||
endoffset = ((size_in_bits + 7) >> 3) - 4;
|
||||
if (endoffset <= 0 || bytestream2_get_bytes_left(gb) < endoffset)
|
||||
if ((int)endoffset <= 0 || bytestream2_get_bytes_left(gb) < endoffset)
|
||||
return AVERROR_INVALIDDATA;
|
||||
|
||||
offset = endoffset;
|
||||
|
||||
+5
-5
@@ -111,7 +111,7 @@ static int dxva_get_decoder_configuration(AVCodecContext *avctx,
|
||||
|
||||
for (i = 0; i < cfg_count; i++) {
|
||||
unsigned score;
|
||||
UINT ConfigBitstreamRaw;
|
||||
UINT ConfigBitstreamRaw = 0;
|
||||
GUID guidConfigBitstreamEncryption;
|
||||
|
||||
#if CONFIG_D3D11VA
|
||||
@@ -262,7 +262,7 @@ static int dxva_get_decoder_guid(AVCodecContext *avctx, void *service, void *sur
|
||||
*decoder_guid = ff_GUID_NULL;
|
||||
for (i = 0; dxva_modes[i].guid; i++) {
|
||||
const dxva_mode *mode = &dxva_modes[i];
|
||||
int validate;
|
||||
int validate = 0;
|
||||
if (!dxva_check_codec_compatibility(avctx, mode))
|
||||
continue;
|
||||
|
||||
@@ -794,7 +794,7 @@ int ff_dxva2_commit_buffer(AVCodecContext *avctx,
|
||||
unsigned type, const void *data, unsigned size,
|
||||
unsigned mb_count)
|
||||
{
|
||||
void *dxva_data;
|
||||
void *dxva_data = NULL;
|
||||
unsigned dxva_size;
|
||||
int result;
|
||||
HRESULT hr = 0;
|
||||
@@ -816,7 +816,7 @@ int ff_dxva2_commit_buffer(AVCodecContext *avctx,
|
||||
type, (unsigned)hr);
|
||||
return -1;
|
||||
}
|
||||
if (size <= dxva_size) {
|
||||
if (dxva_data && size <= dxva_size) {
|
||||
memcpy(dxva_data, data, size);
|
||||
|
||||
#if CONFIG_D3D11VA
|
||||
@@ -894,7 +894,7 @@ int ff_dxva2_common_end_frame(AVCodecContext *avctx, AVFrame *frame,
|
||||
#endif
|
||||
DECODER_BUFFER_DESC *buffer = NULL, *buffer_slice = NULL;
|
||||
int result, runs = 0;
|
||||
HRESULT hr;
|
||||
HRESULT hr = -1;
|
||||
unsigned type;
|
||||
FFDXVASharedContext *sctx = DXVA_SHARED_CONTEXT(avctx);
|
||||
|
||||
|
||||
+4
-1
@@ -198,12 +198,15 @@ static int cmv_decode_frame(AVCodecContext *avctx,
|
||||
if ((ret = av_image_check_size(s->width, s->height, 0, s->avctx)) < 0)
|
||||
return ret;
|
||||
|
||||
buf += EA_PREAMBLE_SIZE;
|
||||
if (!(buf[0]&1) && buf_end - buf < s->width * s->height * (int64_t)(100 - s->avctx->discard_damaged_percentage) / 100)
|
||||
return AVERROR_INVALIDDATA;
|
||||
|
||||
if ((ret = ff_get_buffer(avctx, frame, AV_GET_BUFFER_FLAG_REF)) < 0)
|
||||
return ret;
|
||||
|
||||
memcpy(frame->data[1], s->palette, AVPALETTE_SIZE);
|
||||
|
||||
buf += EA_PREAMBLE_SIZE;
|
||||
if ((buf[0]&1)) { // subtype
|
||||
cmv_decode_inter(s, frame, buf+2, buf_end);
|
||||
frame->key_frame = 0;
|
||||
|
||||
@@ -113,6 +113,13 @@ av_cold int ff_ffv1_init_slices_state(FFV1Context *f)
|
||||
return 0;
|
||||
}
|
||||
|
||||
int ff_need_new_slices(int width, int num_h_slices, int chroma_shift) {
|
||||
int mpw = 1<<chroma_shift;
|
||||
int i = width * (int64_t)(num_h_slices - 1) / num_h_slices;
|
||||
|
||||
return width % mpw && (width - i) % mpw == 0;
|
||||
}
|
||||
|
||||
av_cold int ff_ffv1_init_slice_contexts(FFV1Context *f)
|
||||
{
|
||||
int i, max_slice_count = f->num_h_slices * f->num_v_slices;
|
||||
|
||||
@@ -146,6 +146,7 @@ int ff_ffv1_init_slice_contexts(FFV1Context *f);
|
||||
int ff_ffv1_allocate_initial_states(FFV1Context *f);
|
||||
void ff_ffv1_clear_slice_state(FFV1Context *f, FFV1Context *fs);
|
||||
int ff_ffv1_close(AVCodecContext *avctx);
|
||||
int ff_need_new_slices(int width, int num_h_slices, int chroma_shift);
|
||||
|
||||
static av_always_inline int fold(int diff, int bits)
|
||||
{
|
||||
|
||||
@@ -50,4 +50,3 @@ static inline int RENAME(get_context)(PlaneContext *p, TYPE *src,
|
||||
p->quant_table[1][(LT - T) & 0xFF] +
|
||||
p->quant_table[2][(T - RT) & 0xFF];
|
||||
}
|
||||
|
||||
|
||||
@@ -361,7 +361,7 @@ static int decode_slice(AVCodecContext *c, void *arg)
|
||||
if (fs->ac != AC_GOLOMB_RICE && f->version > 2) {
|
||||
int v;
|
||||
get_rac(&fs->c, (uint8_t[]) { 129 });
|
||||
v = fs->c.bytestream_end - fs->c.bytestream - 2 - 5*f->ec;
|
||||
v = fs->c.bytestream_end - fs->c.bytestream - 2 - 5*!!f->ec;
|
||||
if (v) {
|
||||
av_log(f->avctx, AV_LOG_ERROR, "bytestream end mismatching by %d\n", v);
|
||||
fs->slice_damaged = 1;
|
||||
|
||||
+23
-12
@@ -199,7 +199,7 @@ static av_always_inline av_flatten void put_symbol_inline(RangeCoder *c,
|
||||
} while (0)
|
||||
|
||||
if (v) {
|
||||
const int a = FFABS(v);
|
||||
const unsigned a = is_signed ? FFABS(v) : v;
|
||||
const int e = av_log2(a);
|
||||
put_rac(c, state + 0, 0);
|
||||
if (e <= 9) {
|
||||
@@ -526,6 +526,11 @@ static av_cold int encode_init(AVCodecContext *avctx)
|
||||
avctx->slices > 1)
|
||||
s->version = FFMAX(s->version, 2);
|
||||
|
||||
if ((avctx->flags & (AV_CODEC_FLAG_PASS1 | AV_CODEC_FLAG_PASS2)) && s->ac == AC_GOLOMB_RICE) {
|
||||
av_log(avctx, AV_LOG_ERROR, "2 Pass mode is not possible with golomb coding\n");
|
||||
return AVERROR(EINVAL);
|
||||
}
|
||||
|
||||
// Unspecified level & slices, we choose version 1.2+ to ensure multithreaded decodability
|
||||
if (avctx->slices == 0 && avctx->level < 0 && avctx->width * avctx->height > 720*576)
|
||||
s->version = FFMAX(s->version, 2);
|
||||
@@ -550,7 +555,7 @@ static av_cold int encode_init(AVCodecContext *avctx)
|
||||
s->version = FFMAX(s->version, 3);
|
||||
|
||||
if ((s->version == 2 || s->version>3) && avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL) {
|
||||
av_log(avctx, AV_LOG_ERROR, "Version 2 needed for requested features but version 2 is experimental and not enabled\n");
|
||||
av_log(avctx, AV_LOG_ERROR, "Version 2 or 4 needed for requested features but version 2 or 4 is experimental and not enabled\n");
|
||||
return AVERROR_INVALIDDATA;
|
||||
}
|
||||
|
||||
@@ -735,19 +740,21 @@ FF_ENABLE_DEPRECATION_WARNINGS
|
||||
s->quant_tables[1][2][i]= 11*11*quant5 [i];
|
||||
s->quant_tables[1][3][i]= 5*11*11*quant5 [i];
|
||||
s->quant_tables[1][4][i]= 5*5*11*11*quant5 [i];
|
||||
s->context_count[0] = (11 * 11 * 11 + 1) / 2;
|
||||
s->context_count[1] = (11 * 11 * 5 * 5 * 5 + 1) / 2;
|
||||
} else {
|
||||
s->quant_tables[0][0][i]= quant9_10bit[i];
|
||||
s->quant_tables[0][1][i]= 11*quant9_10bit[i];
|
||||
s->quant_tables[0][2][i]= 11*11*quant9_10bit[i];
|
||||
s->quant_tables[0][1][i]= 9*quant9_10bit[i];
|
||||
s->quant_tables[0][2][i]= 9*9*quant9_10bit[i];
|
||||
s->quant_tables[1][0][i]= quant9_10bit[i];
|
||||
s->quant_tables[1][1][i]= 11*quant9_10bit[i];
|
||||
s->quant_tables[1][2][i]= 11*11*quant5_10bit[i];
|
||||
s->quant_tables[1][3][i]= 5*11*11*quant5_10bit[i];
|
||||
s->quant_tables[1][4][i]= 5*5*11*11*quant5_10bit[i];
|
||||
s->quant_tables[1][1][i]= 9*quant9_10bit[i];
|
||||
s->quant_tables[1][2][i]= 9*9*quant5_10bit[i];
|
||||
s->quant_tables[1][3][i]= 5*9*9*quant5_10bit[i];
|
||||
s->quant_tables[1][4][i]= 5*5*9*9*quant5_10bit[i];
|
||||
s->context_count[0] = (9 * 9 * 9 + 1) / 2;
|
||||
s->context_count[1] = (9 * 9 * 5 * 5 * 5 + 1) / 2;
|
||||
}
|
||||
}
|
||||
s->context_count[0] = (11 * 11 * 11 + 1) / 2;
|
||||
s->context_count[1] = (11 * 11 * 5 * 5 * 5 + 1) / 2;
|
||||
memcpy(s->quant_table, s->quant_tables[s->context_model],
|
||||
sizeof(s->quant_table));
|
||||
|
||||
@@ -885,6 +892,10 @@ FF_ENABLE_DEPRECATION_WARNINGS
|
||||
continue;
|
||||
if (maxw * maxh * (int64_t)(s->bits_per_raw_sample+1) * plane_count > 8<<24)
|
||||
continue;
|
||||
if (s->version < 4)
|
||||
if ( ff_need_new_slices(avctx->width , s->num_h_slices, s->chroma_h_shift)
|
||||
||ff_need_new_slices(avctx->height, s->num_v_slices, s->chroma_v_shift))
|
||||
continue;
|
||||
if (avctx->slices == s->num_h_slices * s->num_v_slices && avctx->slices <= MAX_SLICES || !avctx->slices)
|
||||
goto slices_ok;
|
||||
}
|
||||
@@ -933,8 +944,8 @@ static void encode_slice_header(FFV1Context *f, FFV1Context *fs)
|
||||
|
||||
put_symbol(c, state, (fs->slice_x +1)*f->num_h_slices / f->width , 0);
|
||||
put_symbol(c, state, (fs->slice_y +1)*f->num_v_slices / f->height , 0);
|
||||
put_symbol(c, state, (fs->slice_width +1)*f->num_h_slices / f->width -1, 0);
|
||||
put_symbol(c, state, (fs->slice_height+1)*f->num_v_slices / f->height-1, 0);
|
||||
put_symbol(c, state, 0, 0);
|
||||
put_symbol(c, state, 0, 0);
|
||||
for (j=0; j<f->plane_count; j++) {
|
||||
put_symbol(c, state, f->plane[j].quant_table_index, 0);
|
||||
av_assert0(f->plane[j].quant_table_index == f->context_model);
|
||||
|
||||
@@ -199,4 +199,3 @@ static int RENAME(encode_rgb_frame)(FFV1Context *s, const uint8_t *src[4],
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
@@ -359,6 +359,8 @@ static int check_header_mismatch(FLACParseContext *fpc,
|
||||
for (i = 0; i < FLAC_MAX_SEQUENTIAL_HEADERS && curr != child; i++)
|
||||
curr = curr->next;
|
||||
|
||||
av_assert0(i < FLAC_MAX_SEQUENTIAL_HEADERS);
|
||||
|
||||
if (header->link_penalty[i] < FLAC_HEADER_CRC_FAIL_PENALTY ||
|
||||
header->link_penalty[i] == FLAC_HEADER_NOT_PENALIZED_YET) {
|
||||
FLACHeaderMarker *start, *end;
|
||||
|
||||
@@ -402,6 +402,7 @@ static inline int get_ur_golomb(GetBitContext *gb, int k, int limit,
|
||||
log = av_log2(buf);
|
||||
|
||||
if (log > 31 - limit) {
|
||||
av_assert2(log >= k);
|
||||
buf >>= log - k;
|
||||
buf += (30U - log) << k;
|
||||
LAST_SKIP_BITS(re, gb, 32 + k - log);
|
||||
@@ -424,6 +425,8 @@ static inline int get_ur_golomb(GetBitContext *gb, int k, int limit,
|
||||
|
||||
/**
|
||||
* read unsigned golomb rice code (jpegls).
|
||||
*
|
||||
* @returns -1 on error
|
||||
*/
|
||||
static inline int get_ur_golomb_jpegls(GetBitContext *gb, int k, int limit,
|
||||
int esc_len)
|
||||
@@ -535,6 +538,8 @@ static inline int get_sr_golomb(GetBitContext *gb, int k, int limit,
|
||||
|
||||
/**
|
||||
* read signed golomb rice code (flac).
|
||||
*
|
||||
* @returns INT_MIN on error
|
||||
*/
|
||||
static inline int get_sr_golomb_flac(GetBitContext *gb, int k, int limit,
|
||||
int esc_len)
|
||||
|
||||
+10
-4
@@ -426,7 +426,9 @@ int ff_h263_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
|
||||
MpegEncContext *s = avctx->priv_data;
|
||||
int ret;
|
||||
int slice_ret = 0;
|
||||
|
||||
AVFrame *pict = data;
|
||||
int bak_width, bak_height;
|
||||
|
||||
/* no supplementary picture */
|
||||
if (buf_size == 0) {
|
||||
@@ -490,6 +492,9 @@ retry:
|
||||
// we need the idct permutation for reading a custom matrix
|
||||
ff_mpv_idct_init(s);
|
||||
|
||||
bak_width = s->width;
|
||||
bak_height = s->height;
|
||||
|
||||
/* let's go :-) */
|
||||
if (CONFIG_WMV2_DECODER && s->msmpeg4_version == 5) {
|
||||
ret = ff_wmv2_decode_picture_header(s);
|
||||
@@ -512,11 +517,12 @@ retry:
|
||||
}
|
||||
|
||||
if (ret < 0 || ret == FRAME_SKIPPED) {
|
||||
if ( s->width != avctx->coded_width
|
||||
|| s->height != avctx->coded_height) {
|
||||
if ( s->width != bak_width
|
||||
|| s->height != bak_height) {
|
||||
av_log(s->avctx, AV_LOG_WARNING, "Reverting picture dimensions change due to header decoding failure\n");
|
||||
s->width = avctx->coded_width;
|
||||
s->height= avctx->coded_height;
|
||||
s->width = bak_width;
|
||||
s->height= bak_height;
|
||||
|
||||
}
|
||||
}
|
||||
if (ret == FRAME_SKIPPED)
|
||||
|
||||
@@ -162,4 +162,3 @@ static void MCFUNC(hl_motion)(const H264Context *h, H264SliceContext *sl,
|
||||
if (USES_LIST(mb_type, 1))
|
||||
prefetch_motion(h, sl, 1, PIXEL_SHIFT, CHROMA_IDC);
|
||||
}
|
||||
|
||||
|
||||
@@ -372,6 +372,7 @@ static int hap_decode(AVCodecContext *avctx, void *data,
|
||||
ret = av_reallocp(&ctx->tex_buf, ctx->tex_size);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
memset(ctx->tex_buf, 0, ctx->tex_size);
|
||||
|
||||
avctx->execute2(avctx, decompress_chunks_thread, NULL,
|
||||
ctx->chunk_results, ctx->chunk_count);
|
||||
|
||||
@@ -1554,4 +1554,3 @@ void ff_hevc_hls_mvd_coding(HEVCContext *s, int x0, int y0, int log2_cb_size)
|
||||
case 0: lc->pu.mvd.y = 0; break;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -572,6 +572,10 @@ static int hls_slice_header(HEVCContext *s)
|
||||
|
||||
if (s->ps.pps->dependent_slice_segments_enabled_flag)
|
||||
sh->dependent_slice_segment_flag = get_bits1(gb);
|
||||
if (sh->dependent_slice_segment_flag && !s->slice_initialized) {
|
||||
av_log(s->avctx, AV_LOG_ERROR, "Independent slice segment missing.\n");
|
||||
return AVERROR_INVALIDDATA;
|
||||
}
|
||||
|
||||
slice_address_length = av_ceil_log2(s->ps.sps->ctb_width *
|
||||
s->ps.sps->ctb_height);
|
||||
@@ -840,9 +844,6 @@ static int hls_slice_header(HEVCContext *s)
|
||||
} else {
|
||||
sh->slice_loop_filter_across_slices_enabled_flag = s->ps.pps->seq_loop_filter_across_slices_enabled_flag;
|
||||
}
|
||||
} else if (!s->slice_initialized) {
|
||||
av_log(s->avctx, AV_LOG_ERROR, "Independent slice segment missing.\n");
|
||||
return AVERROR_INVALIDDATA;
|
||||
}
|
||||
|
||||
sh->num_entry_point_offsets = 0;
|
||||
@@ -2946,8 +2947,11 @@ static int decode_nal_unit(HEVCContext *s, const H2645NAL *nal)
|
||||
case HEVC_NAL_RASL_N:
|
||||
case HEVC_NAL_RASL_R:
|
||||
ret = hls_slice_header(s);
|
||||
if (ret < 0)
|
||||
if (ret < 0) {
|
||||
// hls_slice_header() does not cleanup on failure thus the state now is inconsistant so we cannot use it on depandant slices
|
||||
s->slice_initialized = 0;
|
||||
return ret;
|
||||
}
|
||||
if (ret == 1) {
|
||||
ret = AVERROR_INVALIDDATA;
|
||||
goto fail;
|
||||
|
||||
@@ -738,6 +738,8 @@ static void decode_gray_bitstream(HYuvContext *s, int count)
|
||||
for (i = 0; i < count && BITS_LEFT(re, &s->gb) > 0; i++) {
|
||||
READ_2PIX(s->temp[0][2 * i], s->temp[0][2 * i + 1], 0);
|
||||
}
|
||||
for (; i < count; i++)
|
||||
s->temp[0][2 * i] = s->temp[0][2 * i + 1] = 0;
|
||||
} else {
|
||||
for (i = 0; i < count; i++) {
|
||||
READ_2PIX(s->temp[0][2 * i], s->temp[0][2 * i + 1], 0);
|
||||
|
||||
+2
-2
@@ -583,7 +583,7 @@ static int decode_byterun2(uint8_t *dst, int height, int line_size,
|
||||
GetByteContext *gb)
|
||||
{
|
||||
GetByteContext cmds;
|
||||
unsigned count;
|
||||
int count;
|
||||
int i, y_pos = 0, x_pos = 0;
|
||||
|
||||
if (bytestream2_get_be32(gb) != MKBETAG('V', 'D', 'A', 'T'))
|
||||
@@ -591,7 +591,7 @@ static int decode_byterun2(uint8_t *dst, int height, int line_size,
|
||||
|
||||
bytestream2_skip(gb, 4);
|
||||
count = bytestream2_get_be16(gb) - 2;
|
||||
if (bytestream2_get_bytes_left(gb) < count)
|
||||
if (count < 0 || bytestream2_get_bytes_left(gb) < count)
|
||||
return 0;
|
||||
|
||||
bytestream2_init(&cmds, gb->buffer, count);
|
||||
|
||||
@@ -653,7 +653,7 @@ static void get_codebook(int16_t * cbvec, /* (o) Constructed codebook vector *
|
||||
int16_t k, base_size;
|
||||
int16_t lag;
|
||||
/* Stack based */
|
||||
int16_t tempbuff2[SUBL + 5];
|
||||
int16_t tempbuff2[SUBL + 5] = {0};
|
||||
|
||||
/* Determine size of codebook sections */
|
||||
base_size = lMem - cbveclen + 1;
|
||||
|
||||
+9
-2
@@ -232,12 +232,15 @@ static int decode_intra(AVCodecContext *avctx, GetBitContext *gb, AVFrame *frame
|
||||
|
||||
for (y = 0; y < avctx->height; y += 16) {
|
||||
for (x = 0; x < avctx->width; x += 16) {
|
||||
unsigned flag, cbphi, cbplo;
|
||||
unsigned flag, cbplo;
|
||||
int cbphi;
|
||||
|
||||
cbplo = get_vlc2(gb, cbplo_tab.table, cbplo_tab.bits, 1) >> 4;
|
||||
flag = get_bits1(gb);
|
||||
|
||||
cbphi = get_cbphi(gb, 1);
|
||||
if (cbphi < 0)
|
||||
return cbphi;
|
||||
|
||||
ret = decode_blocks(avctx, gb, cbplo | (cbphi << 2), 0, offset, flag);
|
||||
if (ret < 0)
|
||||
@@ -285,7 +288,8 @@ static int decode_inter(AVCodecContext *avctx, GetBitContext *gb,
|
||||
for (y = 0; y < avctx->height; y += 16) {
|
||||
for (x = 0; x < avctx->width; x += 16) {
|
||||
int reverse, intra_block, value;
|
||||
unsigned cbphi, cbplo, flag2 = 0;
|
||||
unsigned cbplo, flag2 = 0;
|
||||
int cbphi;
|
||||
|
||||
if (get_bits1(gb)) {
|
||||
copy_block16(frame->data[0] + y * frame->linesize[0] + x,
|
||||
@@ -311,6 +315,9 @@ static int decode_inter(AVCodecContext *avctx, GetBitContext *gb,
|
||||
|
||||
cbplo = value >> 4;
|
||||
cbphi = get_cbphi(gb, reverse);
|
||||
if (cbphi < 0)
|
||||
return cbphi;
|
||||
|
||||
if (intra_block) {
|
||||
ret = decode_blocks(avctx, gb, cbplo | (cbphi << 2), 0, offset, flag2);
|
||||
if (ret < 0)
|
||||
|
||||
@@ -69,7 +69,7 @@
|
||||
#define GLOBAL(x) x
|
||||
#define RIGHT_SHIFT(x, n) ((x) >> (n))
|
||||
#define MULTIPLY16C16(var,const) ((var)*(const))
|
||||
#define DESCALE(x,n) RIGHT_SHIFT((x) + (1 << ((n) - 1)), n)
|
||||
#define DESCALE(x,n) RIGHT_SHIFT((int)(x) + (1 << ((n) - 1)), n)
|
||||
|
||||
|
||||
/*
|
||||
@@ -175,7 +175,7 @@
|
||||
#if BITS_IN_JSAMPLE == 8 && CONST_BITS<=13 && PASS1_BITS<=2
|
||||
#define MULTIPLY(var,const) MULTIPLY16C16(var,const)
|
||||
#else
|
||||
#define MULTIPLY(var,const) ((var) * (const))
|
||||
#define MULTIPLY(var,const) (int)((var) * (unsigned)(const))
|
||||
#endif
|
||||
|
||||
|
||||
@@ -183,7 +183,7 @@ static av_always_inline void FUNC(row_fdct)(int16_t *data)
|
||||
{
|
||||
int tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
|
||||
int tmp10, tmp11, tmp12, tmp13;
|
||||
int z1, z2, z3, z4, z5;
|
||||
unsigned z1, z2, z3, z4, z5;
|
||||
int16_t *dataptr;
|
||||
int ctr;
|
||||
|
||||
@@ -261,7 +261,7 @@ FUNC(ff_jpeg_fdct_islow)(int16_t *data)
|
||||
{
|
||||
int tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
|
||||
int tmp10, tmp11, tmp12, tmp13;
|
||||
int z1, z2, z3, z4, z5;
|
||||
unsigned z1, z2, z3, z4, z5;
|
||||
int16_t *dataptr;
|
||||
int ctr;
|
||||
|
||||
|
||||
@@ -340,6 +340,25 @@ static int get_siz(Jpeg2000DecoderContext *s)
|
||||
return AVERROR_INVALIDDATA;
|
||||
}
|
||||
|
||||
for (i = 0; i < s->ncomponents; i++) {
|
||||
if (s->cdef[i] < 0) {
|
||||
for (i = 0; i < s->ncomponents; i++) {
|
||||
s->cdef[i] = i + 1;
|
||||
}
|
||||
if ((s->ncomponents & 1) == 0)
|
||||
s->cdef[s->ncomponents-1] = 0;
|
||||
}
|
||||
}
|
||||
// after here we no longer have to consider negative cdef
|
||||
|
||||
int cdef_used = 0;
|
||||
for (i = 0; i < s->ncomponents; i++)
|
||||
cdef_used |= 1<<s->cdef[i];
|
||||
|
||||
// Check that the channels we have are what we expect for the number of components
|
||||
if (cdef_used != ((int[]){0,2,3,14,15})[s->ncomponents])
|
||||
return AVERROR_INVALIDDATA;
|
||||
|
||||
for (i = 0; i < s->ncomponents; i++) { // Ssiz_i XRsiz_i, YRsiz_i
|
||||
uint8_t x = bytestream2_get_byteu(&s->g);
|
||||
s->cbps[i] = (x & 0x7f) + 1;
|
||||
@@ -352,7 +371,9 @@ static int get_siz(Jpeg2000DecoderContext *s)
|
||||
av_log(s->avctx, AV_LOG_ERROR, "Invalid sample separation %d/%d\n", s->cdx[i], s->cdy[i]);
|
||||
return AVERROR_INVALIDDATA;
|
||||
}
|
||||
log2_chroma_wh |= s->cdy[i] >> 1 << i * 4 | s->cdx[i] >> 1 << i * 4 + 2;
|
||||
int i_remapped = s->cdef[i] ? s->cdef[i]-1 : (s->ncomponents-1);
|
||||
|
||||
log2_chroma_wh |= s->cdy[i] >> 1 << i_remapped * 4 | s->cdx[i] >> 1 << i_remapped * 4 + 2;
|
||||
}
|
||||
|
||||
s->numXtiles = ff_jpeg2000_ceildiv(s->width - s->tile_offset_x, s->tile_width);
|
||||
@@ -1198,6 +1219,7 @@ static int jpeg2000_decode_packet(Jpeg2000DecoderContext *s, Jpeg2000Tile *tile,
|
||||
|
||||
bytestream2_get_bufferu(&s->g, cblk->data + cblk->length, cblk->lengthinc[cwsno]);
|
||||
cblk->length += cblk->lengthinc[cwsno];
|
||||
memset(cblk->data + cblk->length, 0, 4);
|
||||
cblk->lengthinc[cwsno] = 0;
|
||||
if (cblk->nb_terminationsinc) {
|
||||
cblk->nb_terminationsinc--;
|
||||
|
||||
+8
-1
@@ -91,10 +91,15 @@ static inline int loco_get_rice(RICEContext *r)
|
||||
if (get_bits_left(&r->gb) < 1)
|
||||
return INT_MIN;
|
||||
v = get_ur_golomb_jpegls(&r->gb, loco_get_rice_param(r), INT_MAX, 0);
|
||||
if (v == -1)
|
||||
return INT_MIN;
|
||||
loco_update_rice_param(r, (v + 1) >> 1);
|
||||
if (!v) {
|
||||
if (r->save >= 0) {
|
||||
r->run = get_ur_golomb_jpegls(&r->gb, 2, INT_MAX, 0);
|
||||
int run = get_ur_golomb_jpegls(&r->gb, 2, INT_MAX, 0);
|
||||
if (run == -1)
|
||||
return INT_MIN;
|
||||
r->run = run;
|
||||
if (r->run > 1)
|
||||
r->save += r->run + 1;
|
||||
else
|
||||
@@ -151,6 +156,8 @@ static int loco_decode_plane(LOCOContext *l, uint8_t *data, int width, int heigh
|
||||
|
||||
/* restore top left pixel */
|
||||
val = loco_get_rice(&rc);
|
||||
if (val == INT_MIN)
|
||||
return AVERROR_INVALIDDATA;
|
||||
data[0] = 128 + val;
|
||||
/* restore top line */
|
||||
for (i = 1; i < width; i++) {
|
||||
|
||||
+1
-1
@@ -244,7 +244,7 @@ static int mf_sample_to_avpacket(AVCodecContext *avctx, IMFSample *sample, AVPac
|
||||
if ((ret = av_new_packet(avpkt, len)) < 0)
|
||||
return ret;
|
||||
|
||||
IMFSample_ConvertToContiguousBuffer(sample, &buffer);
|
||||
hr = IMFSample_ConvertToContiguousBuffer(sample, &buffer);
|
||||
if (FAILED(hr))
|
||||
return AVERROR_EXTERNAL;
|
||||
|
||||
|
||||
@@ -1442,7 +1442,7 @@ static inline int direct_search(MpegEncContext * s, int mb_x, int mb_y)
|
||||
s->b_direct_mv_table[mot_xy][0]= 0;
|
||||
s->b_direct_mv_table[mot_xy][1]= 0;
|
||||
|
||||
return 256*256*256*64;
|
||||
return 256*256*256*64-1;
|
||||
}
|
||||
|
||||
c->xmin= xmin;
|
||||
|
||||
@@ -398,4 +398,3 @@ void RENAME(ff_imdct36_blocks)(INTFLOAT *out, INTFLOAT *buf, INTFLOAT *in,
|
||||
out++;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -782,4 +782,3 @@ static const AVCodecDefault mp2_defaults[] = {
|
||||
{ "b", "0" },
|
||||
{ NULL },
|
||||
};
|
||||
|
||||
|
||||
@@ -562,6 +562,12 @@ FF_ENABLE_DEPRECATION_WARNINGS
|
||||
av_log(avctx, AV_LOG_ERROR, "H.263 does not support resolutions above 2048x1152\n");
|
||||
return AVERROR(EINVAL);
|
||||
}
|
||||
if (s->codec_id == AV_CODEC_ID_FLV1 &&
|
||||
(avctx->width > 65535 ||
|
||||
avctx->height > 65535 )) {
|
||||
av_log(avctx, AV_LOG_ERROR, "FLV does not support resolutions above 16bit\n");
|
||||
return AVERROR(EINVAL);
|
||||
}
|
||||
if ((s->codec_id == AV_CODEC_ID_H263 ||
|
||||
s->codec_id == AV_CODEC_ID_H263P) &&
|
||||
((avctx->width &3) ||
|
||||
@@ -1297,6 +1303,8 @@ static int load_input_picture(MpegEncContext *s, const AVFrame *pic_arg)
|
||||
/* shift buffer entries */
|
||||
for (i = flush_offset; i < MAX_PICTURE_COUNT /*s->encoding_delay + 1*/; i++)
|
||||
s->input_picture[i - flush_offset] = s->input_picture[i];
|
||||
for (int i = MAX_B_FRAMES + 1 - flush_offset; i <= MAX_B_FRAMES; i++)
|
||||
s->input_picture[i] = NULL;
|
||||
|
||||
s->input_picture[encoding_delay] = (Picture*) pic;
|
||||
|
||||
|
||||
@@ -334,4 +334,3 @@ int ff_msmpeg4_pred_dc(MpegEncContext *s, int n,
|
||||
*dc_val_ptr = &dc_val[0];
|
||||
return pred;
|
||||
}
|
||||
|
||||
|
||||
@@ -187,6 +187,8 @@ static int decode_frame(AVCodecContext *avctx,
|
||||
av_log(avctx, AV_LOG_ERROR, "Inflate error: %d\n", ret);
|
||||
return AVERROR_EXTERNAL;
|
||||
}
|
||||
if (s->zstream.avail_out > 0)
|
||||
memset(s->zstream.next_out, 0, s->zstream.avail_out);
|
||||
}
|
||||
}
|
||||
} else if (type == MKTAG('H','U','F','Y')) {
|
||||
|
||||
@@ -92,6 +92,9 @@ static int lz4_decompress(AVCodecContext *avctx,
|
||||
} while (current == 255);
|
||||
}
|
||||
|
||||
if (bytestream2_get_bytes_left(gb) < num_literals)
|
||||
return AVERROR_INVALIDDATA;
|
||||
|
||||
if (pos + num_literals < HISTORY_SIZE) {
|
||||
bytestream2_get_buffer(gb, history + pos, num_literals);
|
||||
pos += num_literals;
|
||||
|
||||
@@ -267,6 +267,7 @@ int ff_combine_frame(ParseContext *pc, int next,
|
||||
}
|
||||
pc->buffer = new_buffer;
|
||||
memcpy(&pc->buffer[pc->index], *buf, *buf_size);
|
||||
memset(&pc->buffer[pc->index + *buf_size], 0, AV_INPUT_BUFFER_PADDING_SIZE);
|
||||
pc->index += *buf_size;
|
||||
return -1;
|
||||
}
|
||||
|
||||
+2
-2
@@ -230,8 +230,8 @@ static int read_high_coeffs(AVCodecContext *avctx, uint8_t *src, int16_t *dst,
|
||||
if (cnt1 >= length) {
|
||||
cnt1 = get_bits(bc, nbits);
|
||||
} else {
|
||||
pfx = 14 + ((((uint64_t)(value - 14)) >> 32) & (value - 14));
|
||||
if (pfx < 1 || pfx > 25)
|
||||
pfx = FFMIN(value, 14);
|
||||
if (pfx < 1)
|
||||
return AVERROR_INVALIDDATA;
|
||||
cnt1 *= (1 << pfx) - 1;
|
||||
shbits = show_bits(bc, pfx);
|
||||
|
||||
+1
-1
@@ -256,7 +256,7 @@ static int pnm_decode_frame(AVCodecContext *avctx, void *data,
|
||||
}
|
||||
break;
|
||||
case AV_PIX_FMT_GBRPF32:
|
||||
if (avctx->width * avctx->height * 12 > s->bytestream_end - s->bytestream)
|
||||
if (avctx->width * avctx->height * 12LL > s->bytestream_end - s->bytestream)
|
||||
return AVERROR_INVALIDDATA;
|
||||
scale = 1.f / s->scale;
|
||||
if (s->endian) {
|
||||
|
||||
@@ -488,7 +488,7 @@ static av_always_inline int decode_ac_coeffs(AVCodecContext *avctx, GetBitContex
|
||||
|
||||
for (pos = block_mask;;) {
|
||||
bits_left = gb->size_in_bits - re_index;
|
||||
if (!bits_left || (bits_left < 32 && !SHOW_UBITS(re, gb, bits_left)))
|
||||
if (bits_left <= 0 || (bits_left < 32 && !SHOW_UBITS(re, gb, bits_left)))
|
||||
break;
|
||||
|
||||
DECODE_CODEWORD(run, run_to_cb[FFMIN(run, 15)], LAST_SKIP_BITS);
|
||||
|
||||
@@ -339,7 +339,7 @@ static void get_slice_data(ProresContext *ctx, const uint16_t *src,
|
||||
|
||||
static void get_alpha_data(ProresContext *ctx, const uint16_t *src,
|
||||
ptrdiff_t linesize, int x, int y, int w, int h,
|
||||
int16_t *blocks, int mbs_per_slice, int abits)
|
||||
uint16_t *blocks, int mbs_per_slice, int abits)
|
||||
{
|
||||
const int slice_width = 16 * mbs_per_slice;
|
||||
int i, j, copy_w, copy_h;
|
||||
|
||||
@@ -31,7 +31,7 @@ static av_cold int encode_init(AVCodecContext *avctx)
|
||||
|
||||
avctx->bits_per_coded_sample = 32;
|
||||
if (avctx->width > 0)
|
||||
avctx->bit_rate = ff_guess_coded_bitrate(avctx) * aligned_width / avctx->width;
|
||||
avctx->bit_rate = av_rescale(ff_guess_coded_bitrate(avctx), aligned_width, avctx->width);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -71,7 +71,6 @@ void ff_build_rac_states(RangeCoder *c, int factor, int max_p);
|
||||
static inline void renorm_encoder(RangeCoder *c)
|
||||
{
|
||||
// FIXME: optimize
|
||||
while (c->range < 0x100) {
|
||||
if (c->outstanding_byte < 0) {
|
||||
c->outstanding_byte = c->low >> 8;
|
||||
} else if (c->low <= 0xFF00) {
|
||||
@@ -90,7 +89,6 @@ static inline void renorm_encoder(RangeCoder *c)
|
||||
|
||||
c->low = (c->low & 0xFF) << 8;
|
||||
c->range <<= 8;
|
||||
}
|
||||
}
|
||||
|
||||
static inline int get_rac_count(RangeCoder *c)
|
||||
@@ -117,7 +115,8 @@ static inline void put_rac(RangeCoder *c, uint8_t *const state, int bit)
|
||||
*state = c->one_state[*state];
|
||||
}
|
||||
|
||||
renorm_encoder(c);
|
||||
while (c->range < 0x100)
|
||||
renorm_encoder(c);
|
||||
}
|
||||
|
||||
static inline void refill(RangeCoder *c)
|
||||
|
||||
@@ -558,6 +558,7 @@ static int shorten_decode_frame(AVCodecContext *avctx, void *data,
|
||||
buf = &s->bitstream[s->bitstream_index];
|
||||
buf_size += s->bitstream_size;
|
||||
s->bitstream_size = buf_size;
|
||||
memset(buf + buf_size, 0, AV_INPUT_BUFFER_PADDING_SIZE);
|
||||
|
||||
/* do not decode until buffer has at least max_framesize bytes or
|
||||
* the end of the file has been reached */
|
||||
|
||||
+1
-1
@@ -491,7 +491,7 @@ av_cold int ff_snow_common_init(AVCodecContext *avctx){
|
||||
FF_ALLOCZ_ARRAY_OR_GOTO(avctx, s->spatial_dwt_buffer, width, height * sizeof(DWTELEM), fail); //FIXME this does not belong here
|
||||
FF_ALLOCZ_ARRAY_OR_GOTO(avctx, s->temp_dwt_buffer, width, sizeof(DWTELEM), fail);
|
||||
FF_ALLOCZ_ARRAY_OR_GOTO(avctx, s->temp_idwt_buffer, width, sizeof(IDWTELEM), fail);
|
||||
FF_ALLOC_ARRAY_OR_GOTO(avctx, s->run_buffer, ((width + 1) >> 1), ((height + 1) >> 1) * sizeof(*s->run_buffer), fail);
|
||||
FF_ALLOC_ARRAY_OR_GOTO(avctx, s->run_buffer, ((width + 1) >> 1) * ((height + 1) >> 1) + 1, sizeof(*s->run_buffer), fail);
|
||||
|
||||
for(i=0; i<MAX_REF_FRAMES; i++) {
|
||||
for(j=0; j<MAX_REF_FRAMES; j++)
|
||||
|
||||
@@ -856,5 +856,3 @@ av_cold void ff_dwt_init(SnowDWTContext *c)
|
||||
if (HAVE_MMX)
|
||||
ff_dwt_init_x86(c);
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -269,6 +269,7 @@ static int encode_q_branch(SnowContext *s, int level, int x, int y){
|
||||
int my_context= av_log2(2*FFABS(left->my - top->my));
|
||||
int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
|
||||
int ref, best_ref, ref_score, ref_mx, ref_my;
|
||||
int range = MAX_MV >> (1 + qpel);
|
||||
|
||||
av_assert0(sizeof(s->block_state) >= 256);
|
||||
if(s->keyframe){
|
||||
@@ -310,6 +311,11 @@ static int encode_q_branch(SnowContext *s, int level, int x, int y){
|
||||
c->xmax = - (x+1)*block_w + (w<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-3;
|
||||
c->ymax = - (y+1)*block_w + (h<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-3;
|
||||
|
||||
c->xmin = FFMAX(c->xmin,-range);
|
||||
c->xmax = FFMIN(c->xmax, range);
|
||||
c->ymin = FFMAX(c->ymin,-range);
|
||||
c->ymax = FFMIN(c->ymax, range);
|
||||
|
||||
if(P_LEFT[0] > (c->xmax<<shift)) P_LEFT[0] = (c->xmax<<shift);
|
||||
if(P_LEFT[1] > (c->ymax<<shift)) P_LEFT[1] = (c->ymax<<shift);
|
||||
if(P_TOP[0] > (c->xmax<<shift)) P_TOP[0] = (c->xmax<<shift);
|
||||
|
||||
@@ -1439,6 +1439,9 @@ static int svq3_decode_frame(AVCodecContext *avctx, void *data,
|
||||
if (svq3_decode_slice_header(avctx))
|
||||
return -1;
|
||||
|
||||
if (avpkt->size < s->mb_width * s->mb_height / 8)
|
||||
return AVERROR_INVALIDDATA;
|
||||
|
||||
s->pict_type = s->slice_type;
|
||||
|
||||
if (s->pict_type != AV_PICTURE_TYPE_B)
|
||||
|
||||
@@ -21,6 +21,7 @@
|
||||
|
||||
#include <string.h>
|
||||
|
||||
#include "libavutil/avassert.h"
|
||||
#include "libavutil/imgutils.h"
|
||||
#include "libavutil/internal.h"
|
||||
#include "libavutil/intreadwrite.h"
|
||||
@@ -88,10 +89,11 @@ static int targa_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
|
||||
TargaContext *s = avctx->priv_data;
|
||||
int bpp, picsize, datasize = -1, ret, i;
|
||||
uint8_t *out;
|
||||
int maxpal = 32*32;
|
||||
|
||||
picsize = av_image_get_buffer_size(avctx->pix_fmt,
|
||||
avctx->width, avctx->height, 1);
|
||||
if ((ret = ff_alloc_packet2(avctx, pkt, picsize + 45, 0)) < 0)
|
||||
if ((ret = ff_alloc_packet2(avctx, pkt, picsize + 45 + maxpal, 0)) < 0)
|
||||
return ret;
|
||||
|
||||
/* zero out the header and only set applicable fields */
|
||||
@@ -124,6 +126,7 @@ static int targa_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
|
||||
AV_WL24(pkt->data + 18 + 3 * i, *(uint32_t *)(p->data[1] + i * 4));
|
||||
}
|
||||
out += 32 * pal_bpp; /* skip past the palette we just output */
|
||||
av_assert0(32 * pal_bpp <= maxpal);
|
||||
break;
|
||||
}
|
||||
case AV_PIX_FMT_GRAY8:
|
||||
|
||||
+18
-2
@@ -1300,9 +1300,13 @@ static int tiff_decode_tag(TiffContext *s, AVFrame *frame)
|
||||
s->is_thumbnail = (value != 0);
|
||||
break;
|
||||
case TIFF_WIDTH:
|
||||
if (value > INT_MAX)
|
||||
return AVERROR_INVALIDDATA;
|
||||
s->width = value;
|
||||
break;
|
||||
case TIFF_HEIGHT:
|
||||
if (value > INT_MAX)
|
||||
return AVERROR_INVALIDDATA;
|
||||
s->height = value;
|
||||
break;
|
||||
case TIFF_BPP:
|
||||
@@ -1434,12 +1438,18 @@ static int tiff_decode_tag(TiffContext *s, AVFrame *frame)
|
||||
s->tile_byte_counts_offset = off;
|
||||
break;
|
||||
case TIFF_TILE_LENGTH:
|
||||
if (value > INT_MAX)
|
||||
return AVERROR_INVALIDDATA;
|
||||
s->tile_length = value;
|
||||
break;
|
||||
case TIFF_TILE_WIDTH:
|
||||
if (value > INT_MAX)
|
||||
return AVERROR_INVALIDDATA;
|
||||
s->tile_width = value;
|
||||
break;
|
||||
case TIFF_PREDICTOR:
|
||||
if (value > INT_MAX)
|
||||
return AVERROR_INVALIDDATA;
|
||||
s->predictor = value;
|
||||
break;
|
||||
case TIFF_SUB_IFDS:
|
||||
@@ -1570,12 +1580,18 @@ static int tiff_decode_tag(TiffContext *s, AVFrame *frame)
|
||||
}
|
||||
break;
|
||||
case TIFF_T4OPTIONS:
|
||||
if (s->compr == TIFF_G3)
|
||||
if (s->compr == TIFF_G3) {
|
||||
if (value > INT_MAX)
|
||||
return AVERROR_INVALIDDATA;
|
||||
s->fax_opts = value;
|
||||
}
|
||||
break;
|
||||
case TIFF_T6OPTIONS:
|
||||
if (s->compr == TIFF_G4)
|
||||
if (s->compr == TIFF_G4) {
|
||||
if (value > INT_MAX)
|
||||
return AVERROR_INVALIDDATA;
|
||||
s->fax_opts = value;
|
||||
}
|
||||
break;
|
||||
#define ADD_METADATA(count, name, sep)\
|
||||
if ((ret = add_metadata(count, type, name, sep, s, frame)) < 0) {\
|
||||
|
||||
+4
-1
@@ -269,6 +269,9 @@ void avcodec_align_dimensions2(AVCodecContext *s, int *width, int *height,
|
||||
if (s->codec_id == AV_CODEC_ID_SVQ1) {
|
||||
w_align = 64;
|
||||
h_align = 64;
|
||||
} else if (s->codec_id == AV_CODEC_ID_SNOW) {
|
||||
w_align = 16;
|
||||
h_align = 16;
|
||||
}
|
||||
break;
|
||||
case AV_PIX_FMT_RGB555:
|
||||
@@ -1745,7 +1748,7 @@ static int get_audio_frame_duration(enum AVCodecID id, int sr, int ch, int ba,
|
||||
case AV_CODEC_ID_ADPCM_IMA_WAV:
|
||||
if (bps < 2 || bps > 5)
|
||||
return 0;
|
||||
tmp = blocks * (1LL + (ba - 4 * ch) / (bps * ch) * 8);
|
||||
tmp = blocks * (1LL + (ba - 4 * ch) / (bps * ch) * 8LL);
|
||||
break;
|
||||
case AV_CODEC_ID_ADPCM_IMA_DK3:
|
||||
tmp = blocks * (((ba - 16LL) * 2 / 3 * 4) / ch);
|
||||
|
||||
@@ -233,7 +233,7 @@ FF_ENABLE_DEPRECATION_WARNINGS
|
||||
* - Compression mode (none/huff)
|
||||
* And write the flags.
|
||||
*/
|
||||
c->flags = (c->slices - 1) << 24;
|
||||
c->flags = (c->slices - 1U) << 24;
|
||||
c->flags |= 0 << 11; // bit field to signal interlaced encoding mode
|
||||
c->flags |= c->compression;
|
||||
|
||||
|
||||
@@ -2382,12 +2382,14 @@ av_cold int ff_vaapi_encode_close(AVCodecContext *avctx)
|
||||
av_buffer_pool_uninit(&ctx->output_buffer_pool);
|
||||
|
||||
if (ctx->va_context != VA_INVALID_ID) {
|
||||
vaDestroyContext(ctx->hwctx->display, ctx->va_context);
|
||||
if (ctx->hwctx)
|
||||
vaDestroyContext(ctx->hwctx->display, ctx->va_context);
|
||||
ctx->va_context = VA_INVALID_ID;
|
||||
}
|
||||
|
||||
if (ctx->va_config != VA_INVALID_ID) {
|
||||
vaDestroyConfig(ctx->hwctx->display, ctx->va_config);
|
||||
if (ctx->hwctx)
|
||||
vaDestroyConfig(ctx->hwctx->display, ctx->va_config);
|
||||
ctx->va_config = VA_INVALID_ID;
|
||||
}
|
||||
|
||||
|
||||
+40
-19
@@ -1313,6 +1313,7 @@ static int vc1_decode_p_mb(VC1Context *v)
|
||||
int dst_idx, off;
|
||||
int skipped, fourmv;
|
||||
int block_cbp = 0, pat, block_tt = 0, block_intra = 0;
|
||||
int ret;
|
||||
|
||||
mquant = v->pq; /* lossy initialization */
|
||||
|
||||
@@ -1371,8 +1372,10 @@ static int vc1_decode_p_mb(VC1Context *v)
|
||||
if (i == 1 || i == 3 || s->mb_x)
|
||||
v->c_avail = v->mb_type[0][s->block_index[i] - 1];
|
||||
|
||||
vc1_decode_intra_block(v, v->block[v->cur_blk_idx][block_map[i]], i, val, mquant,
|
||||
(i & 4) ? v->codingset2 : v->codingset);
|
||||
ret = vc1_decode_intra_block(v, v->block[v->cur_blk_idx][block_map[i]], i, val, mquant,
|
||||
(i & 4) ? v->codingset2 : v->codingset);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
if (CONFIG_GRAY && (i > 3) && (s->avctx->flags & AV_CODEC_FLAG_GRAY))
|
||||
continue;
|
||||
v->vc1dsp.vc1_inv_trans_8x8(v->block[v->cur_blk_idx][block_map[i]]);
|
||||
@@ -1474,8 +1477,10 @@ static int vc1_decode_p_mb(VC1Context *v)
|
||||
if (i == 1 || i == 3 || s->mb_x)
|
||||
v->c_avail = v->mb_type[0][s->block_index[i] - 1];
|
||||
|
||||
vc1_decode_intra_block(v, v->block[v->cur_blk_idx][block_map[i]], i, is_coded[i], mquant,
|
||||
(i & 4) ? v->codingset2 : v->codingset);
|
||||
ret = vc1_decode_intra_block(v, v->block[v->cur_blk_idx][block_map[i]], i, is_coded[i], mquant,
|
||||
(i & 4) ? v->codingset2 : v->codingset);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
if (CONFIG_GRAY && (i > 3) && (s->avctx->flags & AV_CODEC_FLAG_GRAY))
|
||||
continue;
|
||||
v->vc1dsp.vc1_inv_trans_8x8(v->block[v->cur_blk_idx][block_map[i]]);
|
||||
@@ -1546,6 +1551,7 @@ static int vc1_decode_p_mb_intfr(VC1Context *v)
|
||||
int block_cbp = 0, pat, block_tt = 0;
|
||||
int idx_mbmode = 0, mvbp;
|
||||
int fieldtx;
|
||||
int ret;
|
||||
|
||||
mquant = v->pq; /* Lossy initialization */
|
||||
|
||||
@@ -1618,8 +1624,10 @@ static int vc1_decode_p_mb_intfr(VC1Context *v)
|
||||
if (i == 1 || i == 3 || s->mb_x)
|
||||
v->c_avail = v->mb_type[0][s->block_index[i] - 1];
|
||||
|
||||
vc1_decode_intra_block(v, v->block[v->cur_blk_idx][block_map[i]], i, val, mquant,
|
||||
(i & 4) ? v->codingset2 : v->codingset);
|
||||
ret = vc1_decode_intra_block(v, v->block[v->cur_blk_idx][block_map[i]], i, val, mquant,
|
||||
(i & 4) ? v->codingset2 : v->codingset);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
if (CONFIG_GRAY && (i > 3) && (s->avctx->flags & AV_CODEC_FLAG_GRAY))
|
||||
continue;
|
||||
v->vc1dsp.vc1_inv_trans_8x8(v->block[v->cur_blk_idx][block_map[i]]);
|
||||
@@ -1755,6 +1763,7 @@ static int vc1_decode_p_mb_intfi(VC1Context *v)
|
||||
int pred_flag = 0;
|
||||
int block_cbp = 0, pat, block_tt = 0;
|
||||
int idx_mbmode = 0;
|
||||
int ret;
|
||||
|
||||
mquant = v->pq; /* Lossy initialization */
|
||||
|
||||
@@ -1786,8 +1795,10 @@ static int vc1_decode_p_mb_intfi(VC1Context *v)
|
||||
if (i == 1 || i == 3 || s->mb_x)
|
||||
v->c_avail = v->mb_type[0][s->block_index[i] - 1];
|
||||
|
||||
vc1_decode_intra_block(v, v->block[v->cur_blk_idx][block_map[i]], i, val, mquant,
|
||||
(i & 4) ? v->codingset2 : v->codingset);
|
||||
ret = vc1_decode_intra_block(v, v->block[v->cur_blk_idx][block_map[i]], i, val, mquant,
|
||||
(i & 4) ? v->codingset2 : v->codingset);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
if (CONFIG_GRAY && (i > 3) && (s->avctx->flags & AV_CODEC_FLAG_GRAY))
|
||||
continue;
|
||||
v->vc1dsp.vc1_inv_trans_8x8(v->block[v->cur_blk_idx][block_map[i]]);
|
||||
@@ -1878,6 +1889,7 @@ static int vc1_decode_b_mb(VC1Context *v)
|
||||
int skipped, direct;
|
||||
int dmv_x[2], dmv_y[2];
|
||||
int bmvtype = BMV_TYPE_BACKWARD;
|
||||
int ret;
|
||||
|
||||
mquant = v->pq; /* lossy initialization */
|
||||
s->mb_intra = 0;
|
||||
@@ -1990,8 +2002,10 @@ static int vc1_decode_b_mb(VC1Context *v)
|
||||
if (i == 1 || i == 3 || s->mb_x)
|
||||
v->c_avail = v->mb_type[0][s->block_index[i] - 1];
|
||||
|
||||
vc1_decode_intra_block(v, s->block[i], i, val, mquant,
|
||||
(i & 4) ? v->codingset2 : v->codingset);
|
||||
ret = vc1_decode_intra_block(v, s->block[i], i, val, mquant,
|
||||
(i & 4) ? v->codingset2 : v->codingset);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
if (CONFIG_GRAY && (i > 3) && (s->avctx->flags & AV_CODEC_FLAG_GRAY))
|
||||
continue;
|
||||
v->vc1dsp.vc1_inv_trans_8x8(s->block[i]);
|
||||
@@ -2037,6 +2051,7 @@ static int vc1_decode_b_mb_intfi(VC1Context *v)
|
||||
int bmvtype = BMV_TYPE_BACKWARD;
|
||||
int block_cbp = 0, pat, block_tt = 0;
|
||||
int idx_mbmode;
|
||||
int ret;
|
||||
|
||||
mquant = v->pq; /* Lossy initialization */
|
||||
s->mb_intra = 0;
|
||||
@@ -2069,8 +2084,10 @@ static int vc1_decode_b_mb_intfi(VC1Context *v)
|
||||
if (i == 1 || i == 3 || s->mb_x)
|
||||
v->c_avail = v->mb_type[0][s->block_index[i] - 1];
|
||||
|
||||
vc1_decode_intra_block(v, s->block[i], i, val, mquant,
|
||||
(i & 4) ? v->codingset2 : v->codingset);
|
||||
ret = vc1_decode_intra_block(v, s->block[i], i, val, mquant,
|
||||
(i & 4) ? v->codingset2 : v->codingset);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
if (CONFIG_GRAY && (i > 3) && (s->avctx->flags & AV_CODEC_FLAG_GRAY))
|
||||
continue;
|
||||
v->vc1dsp.vc1_inv_trans_8x8(s->block[i]);
|
||||
@@ -2207,6 +2224,7 @@ static int vc1_decode_b_mb_intfr(VC1Context *v)
|
||||
int stride_y, fieldtx;
|
||||
int bmvtype = BMV_TYPE_BACKWARD;
|
||||
int dir, dir2;
|
||||
int ret;
|
||||
|
||||
mquant = v->pq; /* Lossy initialization */
|
||||
s->mb_intra = 0;
|
||||
@@ -2263,8 +2281,10 @@ static int vc1_decode_b_mb_intfr(VC1Context *v)
|
||||
if (i == 1 || i == 3 || s->mb_x)
|
||||
v->c_avail = v->mb_type[0][s->block_index[i] - 1];
|
||||
|
||||
vc1_decode_intra_block(v, s->block[i], i, val, mquant,
|
||||
(i & 4) ? v->codingset2 : v->codingset);
|
||||
ret = vc1_decode_intra_block(v, s->block[i], i, val, mquant,
|
||||
(i & 4) ? v->codingset2 : v->codingset);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
if (CONFIG_GRAY && i > 3 && (s->avctx->flags & AV_CODEC_FLAG_GRAY))
|
||||
continue;
|
||||
v->vc1dsp.vc1_inv_trans_8x8(s->block[i]);
|
||||
@@ -2808,6 +2828,7 @@ static void vc1_decode_p_blocks(VC1Context *v)
|
||||
{
|
||||
MpegEncContext *s = &v->s;
|
||||
int apply_loop_filter;
|
||||
int ret;
|
||||
|
||||
/* select coding mode used for VLC tables selection */
|
||||
switch (v->c_ac_table_index) {
|
||||
@@ -2850,22 +2871,22 @@ static void vc1_decode_p_blocks(VC1Context *v)
|
||||
}
|
||||
|
||||
if (v->fcm == ILACE_FIELD) {
|
||||
vc1_decode_p_mb_intfi(v);
|
||||
ret = vc1_decode_p_mb_intfi(v);
|
||||
if (apply_loop_filter)
|
||||
ff_vc1_p_loop_filter(v);
|
||||
} else if (v->fcm == ILACE_FRAME) {
|
||||
vc1_decode_p_mb_intfr(v);
|
||||
ret = vc1_decode_p_mb_intfr(v);
|
||||
if (apply_loop_filter)
|
||||
ff_vc1_p_intfr_loop_filter(v);
|
||||
} else {
|
||||
vc1_decode_p_mb(v);
|
||||
ret = vc1_decode_p_mb(v);
|
||||
if (apply_loop_filter)
|
||||
ff_vc1_p_loop_filter(v);
|
||||
}
|
||||
if (get_bits_left(&s->gb) < 0 || get_bits_count(&s->gb) < 0) {
|
||||
if (ret < 0 || get_bits_left(&s->gb) < 0 || get_bits_count(&s->gb) < 0) {
|
||||
// TODO: may need modification to handle slice coding
|
||||
ff_er_add_slice(&s->er, 0, s->start_mb_y, s->mb_x, s->mb_y, ER_MB_ERROR);
|
||||
av_log(s->avctx, AV_LOG_ERROR, "Bits overconsumption: %i > %i at %ix%i\n",
|
||||
av_log(s->avctx, AV_LOG_ERROR, "Error or Bits overconsumption: %i > %i at %ix%i\n",
|
||||
get_bits_count(&s->gb), s->gb.size_in_bits, s->mb_x, s->mb_y);
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -1125,10 +1125,7 @@ static av_always_inline void vc1_b_h_intfi_loop_filter(VC1Context *v, uint8_t *d
|
||||
dst = dest + (block_num & 2) * 4 * s->linesize + (block_num & 1) * 8;
|
||||
|
||||
if (!(flags & RIGHT_EDGE) || !(block_num & 5)) {
|
||||
if (block_num > 3)
|
||||
v->vc1dsp.vc1_h_loop_filter8(dst + 8, linesize, pq);
|
||||
else
|
||||
v->vc1dsp.vc1_h_loop_filter8(dst + 8, linesize, pq);
|
||||
v->vc1dsp.vc1_h_loop_filter8(dst + 8, linesize, pq);
|
||||
}
|
||||
|
||||
tt = ttblk[0] >> (block_num * 4) & 0xf;
|
||||
|
||||
+3
-2
@@ -344,7 +344,7 @@ av_cold int ff_vc1_decode_init_alloc_tables(VC1Context *v)
|
||||
if (!v->block || !v->cbp_base)
|
||||
goto error;
|
||||
v->cbp = v->cbp_base + 2 * s->mb_stride;
|
||||
v->ttblk_base = av_malloc(sizeof(v->ttblk_base[0]) * 3 * s->mb_stride);
|
||||
v->ttblk_base = av_mallocz(sizeof(v->ttblk_base[0]) * 3 * s->mb_stride);
|
||||
if (!v->ttblk_base)
|
||||
goto error;
|
||||
v->ttblk = v->ttblk_base + 2 * s->mb_stride;
|
||||
@@ -358,7 +358,7 @@ av_cold int ff_vc1_decode_init_alloc_tables(VC1Context *v)
|
||||
v->luma_mv = v->luma_mv_base + 2 * s->mb_stride;
|
||||
|
||||
/* allocate block type info in that way so it could be used with s->block_index[] */
|
||||
v->mb_type_base = av_malloc(s->b8_stride * (mb_height * 2 + 1) + s->mb_stride * (mb_height + 1) * 2);
|
||||
v->mb_type_base = av_mallocz(s->b8_stride * (mb_height * 2 + 1) + s->mb_stride * (mb_height + 1) * 2);
|
||||
if (!v->mb_type_base)
|
||||
goto error;
|
||||
v->mb_type[0] = v->mb_type_base + s->b8_stride + 1;
|
||||
@@ -608,6 +608,7 @@ av_cold int ff_vc1_decode_end(AVCodecContext *avctx)
|
||||
av_freep(&v->hrd_rate);
|
||||
av_freep(&v->hrd_buffer);
|
||||
ff_mpv_common_end(&v->s);
|
||||
memset(v->s.block_index, 0, sizeof(v->s.block_index));
|
||||
av_freep(&v->mv_type_mb_plane);
|
||||
av_freep(&v->direct_mb_plane);
|
||||
av_freep(&v->forward_mb_plane);
|
||||
|
||||
+6
-3
@@ -183,7 +183,9 @@ typedef struct VC2EncContext {
|
||||
static av_always_inline void put_vc2_ue_uint(PutBitContext *pb, uint32_t val)
|
||||
{
|
||||
int i;
|
||||
int pbits = 0, bits = 0, topbit = 1, maxval = 1;
|
||||
int bits = 0;
|
||||
unsigned topbit = 1, maxval = 1;
|
||||
uint64_t pbits = 0;
|
||||
|
||||
if (!val++) {
|
||||
put_bits(pb, 1, 1);
|
||||
@@ -200,12 +202,13 @@ static av_always_inline void put_vc2_ue_uint(PutBitContext *pb, uint32_t val)
|
||||
|
||||
for (i = 0; i < bits; i++) {
|
||||
topbit >>= 1;
|
||||
av_assert2(pbits <= UINT64_MAX>>3);
|
||||
pbits <<= 2;
|
||||
if (val & topbit)
|
||||
pbits |= 0x1;
|
||||
}
|
||||
|
||||
put_bits(pb, bits*2 + 1, (pbits << 1) | 1);
|
||||
put_bits64(pb, bits*2 + 1, (pbits << 1) | 1);
|
||||
}
|
||||
|
||||
static av_always_inline int count_vc2_ue_uint(uint32_t val)
|
||||
@@ -983,7 +986,7 @@ static av_cold int vc2_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
|
||||
}
|
||||
|
||||
s->slice_min_bytes = s->slice_max_bytes - s->slice_max_bytes*(s->tolerance/100.0f);
|
||||
if (s->slice_min_bytes < 0)
|
||||
if (s->slice_min_bytes < 0 || s->slice_max_bytes > INT_MAX >> 3)
|
||||
return AVERROR(EINVAL);
|
||||
|
||||
ret = encode_frame(s, avpkt, frame, aux_data, header_size, s->interlaced);
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user