vf_transpose_vulkan: convert to compile-time SPIR-V generation

This commit is contained in:
Lynne
2026-02-10 20:51:38 +01:00
parent d0ee5d0556
commit 4061e3351f
4 changed files with 93 additions and 71 deletions
Vendored
+1 -1
View File
@@ -4283,7 +4283,7 @@ tonemap_opencl_filter_deps="opencl const_nan"
transpose_opencl_filter_deps="opencl"
transpose_vaapi_filter_deps="vaapi VAProcPipelineCaps_rotation_flags"
transpose_vt_filter_deps="videotoolbox VTPixelRotationSessionCreate"
transpose_vulkan_filter_deps="vulkan spirv_library"
transpose_vulkan_filter_deps="vulkan spirv_compiler"
unsharp_opencl_filter_deps="opencl"
uspp_filter_deps="gpl avcodec"
v360_vulkan_filter_deps="vulkan spirv_compiler"
+25 -70
View File
@@ -19,15 +19,16 @@
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "libavutil/random_seed.h"
#include "libavutil/opt.h"
#include "libavutil/vulkan_spirv.h"
#include "vulkan_filter.h"
#include "filters.h"
#include "transpose.h"
#include "video.h"
extern const unsigned char ff_transpose_comp_spv_data[];
extern const unsigned int ff_transpose_comp_spv_len;
typedef struct TransposeVulkanContext {
FFVulkanContext vkctx;
@@ -43,22 +44,9 @@ typedef struct TransposeVulkanContext {
static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in)
{
int err;
uint8_t *spv_data;
size_t spv_len;
void *spv_opaque = NULL;
TransposeVulkanContext *s = ctx->priv;
FFVulkanContext *vkctx = &s->vkctx;
const int planes = av_pix_fmt_count_planes(s->vkctx.output_format);
FFVulkanShader *shd = &s->shd;
FFVkSPIRVCompiler *spv;
FFVulkanDescriptorSetBinding *desc;
spv = ff_vk_spirv_init();
if (!spv) {
av_log(ctx, AV_LOG_ERROR, "Unable to initialize SPIR-V compiler!\n");
return AVERROR_EXTERNAL;
}
s->qf = ff_vk_qf_find(vkctx, VK_QUEUE_COMPUTE_BIT, 0);
if (!s->qf) {
@@ -68,70 +56,36 @@ static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in)
}
RET(ff_vk_exec_pool_init(vkctx, s->qf, &s->e, s->qf->num*4, 0, 0, 0, NULL));
RET(ff_vk_shader_init(vkctx, &s->shd, "transpose",
VK_SHADER_STAGE_COMPUTE_BIT,
NULL, 0,
32, 1, 1,
0));
desc = (FFVulkanDescriptorSetBinding []) {
{
.name = "input_images",
.type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
.mem_layout = ff_vk_shader_rep_fmt(s->vkctx.input_format, FF_VK_REP_FLOAT),
.mem_quali = "readonly",
.dimensions = 2,
.elems = planes,
.stages = VK_SHADER_STAGE_COMPUTE_BIT,
ff_vk_shader_load(&s->shd, VK_SHADER_STAGE_COMPUTE_BIT, NULL,
(uint32_t []) { 32, 1, planes }, 0);
ff_vk_shader_add_push_const(&s->shd, 0, sizeof(int),
VK_SHADER_STAGE_COMPUTE_BIT);
const FFVulkanDescriptorSetBinding desc[] = {
{ /* input_img */
.type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
.stages = VK_SHADER_STAGE_COMPUTE_BIT,
.elems = planes,
},
{
.name = "output_images",
.type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
.mem_layout = ff_vk_shader_rep_fmt(s->vkctx.output_format, FF_VK_REP_FLOAT),
.mem_quali = "writeonly",
.dimensions = 2,
.elems = planes,
.stages = VK_SHADER_STAGE_COMPUTE_BIT,
{ /* output_img */
.type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
.stages = VK_SHADER_STAGE_COMPUTE_BIT,
.elems = planes,
},
};
ff_vk_shader_add_descriptor_set(vkctx, &s->shd, desc, 2, 0, 0);
RET(ff_vk_shader_add_descriptor_set(vkctx, &s->shd, desc, 2, 0, 0));
GLSLC(0, void main() );
GLSLC(0, { );
GLSLC(1, ivec2 size; );
GLSLC(1, ivec2 pos = ivec2(gl_GlobalInvocationID.xy); );
for (int i = 0; i < planes; i++) {
GLSLC(0, );
GLSLF(1, size = imageSize(output_images[%i]); ,i);
GLSLC(1, if (IS_WITHIN(pos, size)) { );
if (s->dir == TRANSPOSE_CCLOCK)
GLSLF(2, vec4 res = imageLoad(input_images[%i], ivec2(size.y - pos.y, pos.x)); ,i);
else if (s->dir == TRANSPOSE_CLOCK_FLIP || s->dir == TRANSPOSE_CLOCK) {
GLSLF(2, vec4 res = imageLoad(input_images[%i], ivec2(size.yx - pos.yx)); ,i);
if (s->dir == TRANSPOSE_CLOCK)
GLSLC(2, pos = ivec2(pos.x, size.y - pos.y); );
} else
GLSLF(2, vec4 res = imageLoad(input_images[%i], pos.yx); ,i);
GLSLF(2, imageStore(output_images[%i], pos, res); ,i);
GLSLC(1, } );
}
GLSLC(0, } );
RET(spv->compile_shader(vkctx, spv, shd, &spv_data, &spv_len, "main",
&spv_opaque));
RET(ff_vk_shader_link(vkctx, shd, spv_data, spv_len, "main"));
RET(ff_vk_shader_link(vkctx, &s->shd,
ff_transpose_comp_spv_data,
ff_transpose_comp_spv_len, "main"));
RET(ff_vk_shader_register_exec(vkctx, &s->e, &s->shd));
s->initialized = 1;
fail:
if (spv_opaque)
spv->free_shader(spv, &spv_opaque);
if (spv)
spv->uninit(&spv);
return err;
}
@@ -156,7 +110,7 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *in)
RET(init_filter(ctx, in));
RET(ff_vk_filter_process_simple(&s->vkctx, &s->e, &s->shd, out, in,
VK_NULL_HANDLE, 1, NULL, 0));
VK_NULL_HANDLE, 1, &s->dir, sizeof(int)));
RET(av_frame_copy_props(out, in));
@@ -223,7 +177,8 @@ static int config_props_output(AVFilterLink *outlink)
}
#define OFFSET(x) offsetof(TransposeVulkanContext, x)
#define FLAGS (AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM)
#define FLAGS (AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM | \
AV_OPT_FLAG_RUNTIME_PARAM)
static const AVOption transpose_vulkan_options[] = {
{ "dir", "set transpose direction", OFFSET(dir), AV_OPT_TYPE_INT, { .i64 = TRANSPOSE_CCLOCK_FLIP }, 0, 7, FLAGS, .unit = "dir" },
+1
View File
@@ -5,6 +5,7 @@ OBJS-$(CONFIG_AVGBLUR_VULKAN_FILTER) += vulkan/avgblur.comp.spv.o
OBJS-$(CONFIG_BWDIF_VULKAN_FILTER) += vulkan/bwdif.comp.spv.o
OBJS-$(CONFIG_SCALE_VULKAN_FILTER) += vulkan/debayer.comp.spv.o
OBJS-$(CONFIG_FLIP_VULKAN_FILTER) += vulkan/flip.comp.spv.o
OBJS-$(CONFIG_TRANSPOSE_VULKAN_FILTER) += vulkan/transpose.comp.spv.o
OBJS-$(CONFIG_V360_VULKAN_FILTER) += vulkan/v360.comp.spv.o
OBJS-$(CONFIG_INTERLACE_VULKAN_FILTER) += vulkan/interlace.comp.spv.o
OBJS-$(CONFIG_XFADE_VULKAN_FILTER) += vulkan/xfade.comp.spv.o
+66
View File
@@ -0,0 +1,66 @@
/*
* Copyright (c) 2021 Wu Jianhua <jianhua.wu@intel.com>
* Copyright (c) 2026 Lynne <dev@lynne.ee>
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#pragma shader_stage(compute)
#extension GL_EXT_shader_image_load_formatted : require
#extension GL_EXT_scalar_block_layout : require
#extension GL_EXT_nonuniform_qualifier : require
layout (local_size_x_id = 253, local_size_y_id = 254, local_size_z_id = 255) in;
layout (set = 0, binding = 0) uniform readonly image2D input_img[];
layout (set = 0, binding = 1) uniform writeonly image2D output_img[];
#define TRANSPOSE_CCLOCK_FLIP 0
#define TRANSPOSE_CLOCK 1
#define TRANSPOSE_CCLOCK 2
#define TRANSPOSE_CLOCK_FLIP 3
#define TRANSPOSE_REVERSAL 4
#define TRANSPOSE_HFLIP 5
#define TRANSPOSE_VFLIP 6
layout (push_constant, scalar) uniform pushConstants {
int dir;
};
void main()
{
ivec2 pos = ivec2(gl_GlobalInvocationID.xy);
if (dir == TRANSPOSE_CCLOCK || dir == TRANSPOSE_CLOCK || dir == TRANSPOSE_CLOCK_FLIP)
pos = pos.yx;
ivec2 size = imageSize(input_img[nonuniformEXT(gl_LocalInvocationID.z)]);
if (any(greaterThanEqual(pos, size)))
return;
ivec2 dst;
switch (dir) {
case TRANSPOSE_CCLOCK: dst = ivec2(size.y - pos.y, pos.x); break;
case TRANSPOSE_CLOCK: pos = ivec2(pos.x, size.y - pos.y); /* fall */
case TRANSPOSE_CLOCK_FLIP: dst = ivec2(size.yx - pos.yx); break;
default: dst = pos.yx; break;
}
vec4 res = imageLoad(input_img[nonuniformEXT(gl_LocalInvocationID.z)], pos);
imageStore(output_img[nonuniformEXT(gl_LocalInvocationID.z)], dst, res);
}