vf_transpose_vulkan: convert to compile-time SPIR-V generation
This commit is contained in:
@@ -4283,7 +4283,7 @@ tonemap_opencl_filter_deps="opencl const_nan"
|
||||
transpose_opencl_filter_deps="opencl"
|
||||
transpose_vaapi_filter_deps="vaapi VAProcPipelineCaps_rotation_flags"
|
||||
transpose_vt_filter_deps="videotoolbox VTPixelRotationSessionCreate"
|
||||
transpose_vulkan_filter_deps="vulkan spirv_library"
|
||||
transpose_vulkan_filter_deps="vulkan spirv_compiler"
|
||||
unsharp_opencl_filter_deps="opencl"
|
||||
uspp_filter_deps="gpl avcodec"
|
||||
v360_vulkan_filter_deps="vulkan spirv_compiler"
|
||||
|
||||
@@ -19,15 +19,16 @@
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#include "libavutil/random_seed.h"
|
||||
#include "libavutil/opt.h"
|
||||
#include "libavutil/vulkan_spirv.h"
|
||||
#include "vulkan_filter.h"
|
||||
|
||||
#include "filters.h"
|
||||
#include "transpose.h"
|
||||
#include "video.h"
|
||||
|
||||
extern const unsigned char ff_transpose_comp_spv_data[];
|
||||
extern const unsigned int ff_transpose_comp_spv_len;
|
||||
|
||||
typedef struct TransposeVulkanContext {
|
||||
FFVulkanContext vkctx;
|
||||
|
||||
@@ -43,22 +44,9 @@ typedef struct TransposeVulkanContext {
|
||||
static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in)
|
||||
{
|
||||
int err;
|
||||
uint8_t *spv_data;
|
||||
size_t spv_len;
|
||||
void *spv_opaque = NULL;
|
||||
TransposeVulkanContext *s = ctx->priv;
|
||||
FFVulkanContext *vkctx = &s->vkctx;
|
||||
|
||||
const int planes = av_pix_fmt_count_planes(s->vkctx.output_format);
|
||||
FFVulkanShader *shd = &s->shd;
|
||||
FFVkSPIRVCompiler *spv;
|
||||
FFVulkanDescriptorSetBinding *desc;
|
||||
|
||||
spv = ff_vk_spirv_init();
|
||||
if (!spv) {
|
||||
av_log(ctx, AV_LOG_ERROR, "Unable to initialize SPIR-V compiler!\n");
|
||||
return AVERROR_EXTERNAL;
|
||||
}
|
||||
|
||||
s->qf = ff_vk_qf_find(vkctx, VK_QUEUE_COMPUTE_BIT, 0);
|
||||
if (!s->qf) {
|
||||
@@ -68,70 +56,36 @@ static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in)
|
||||
}
|
||||
|
||||
RET(ff_vk_exec_pool_init(vkctx, s->qf, &s->e, s->qf->num*4, 0, 0, 0, NULL));
|
||||
RET(ff_vk_shader_init(vkctx, &s->shd, "transpose",
|
||||
VK_SHADER_STAGE_COMPUTE_BIT,
|
||||
NULL, 0,
|
||||
32, 1, 1,
|
||||
0));
|
||||
|
||||
desc = (FFVulkanDescriptorSetBinding []) {
|
||||
{
|
||||
.name = "input_images",
|
||||
.type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
|
||||
.mem_layout = ff_vk_shader_rep_fmt(s->vkctx.input_format, FF_VK_REP_FLOAT),
|
||||
.mem_quali = "readonly",
|
||||
.dimensions = 2,
|
||||
.elems = planes,
|
||||
.stages = VK_SHADER_STAGE_COMPUTE_BIT,
|
||||
ff_vk_shader_load(&s->shd, VK_SHADER_STAGE_COMPUTE_BIT, NULL,
|
||||
(uint32_t []) { 32, 1, planes }, 0);
|
||||
|
||||
ff_vk_shader_add_push_const(&s->shd, 0, sizeof(int),
|
||||
VK_SHADER_STAGE_COMPUTE_BIT);
|
||||
|
||||
const FFVulkanDescriptorSetBinding desc[] = {
|
||||
{ /* input_img */
|
||||
.type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
|
||||
.stages = VK_SHADER_STAGE_COMPUTE_BIT,
|
||||
.elems = planes,
|
||||
},
|
||||
{
|
||||
.name = "output_images",
|
||||
.type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
|
||||
.mem_layout = ff_vk_shader_rep_fmt(s->vkctx.output_format, FF_VK_REP_FLOAT),
|
||||
.mem_quali = "writeonly",
|
||||
.dimensions = 2,
|
||||
.elems = planes,
|
||||
.stages = VK_SHADER_STAGE_COMPUTE_BIT,
|
||||
{ /* output_img */
|
||||
.type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
|
||||
.stages = VK_SHADER_STAGE_COMPUTE_BIT,
|
||||
.elems = planes,
|
||||
},
|
||||
};
|
||||
ff_vk_shader_add_descriptor_set(vkctx, &s->shd, desc, 2, 0, 0);
|
||||
|
||||
RET(ff_vk_shader_add_descriptor_set(vkctx, &s->shd, desc, 2, 0, 0));
|
||||
|
||||
GLSLC(0, void main() );
|
||||
GLSLC(0, { );
|
||||
GLSLC(1, ivec2 size; );
|
||||
GLSLC(1, ivec2 pos = ivec2(gl_GlobalInvocationID.xy); );
|
||||
for (int i = 0; i < planes; i++) {
|
||||
GLSLC(0, );
|
||||
GLSLF(1, size = imageSize(output_images[%i]); ,i);
|
||||
GLSLC(1, if (IS_WITHIN(pos, size)) { );
|
||||
if (s->dir == TRANSPOSE_CCLOCK)
|
||||
GLSLF(2, vec4 res = imageLoad(input_images[%i], ivec2(size.y - pos.y, pos.x)); ,i);
|
||||
else if (s->dir == TRANSPOSE_CLOCK_FLIP || s->dir == TRANSPOSE_CLOCK) {
|
||||
GLSLF(2, vec4 res = imageLoad(input_images[%i], ivec2(size.yx - pos.yx)); ,i);
|
||||
if (s->dir == TRANSPOSE_CLOCK)
|
||||
GLSLC(2, pos = ivec2(pos.x, size.y - pos.y); );
|
||||
} else
|
||||
GLSLF(2, vec4 res = imageLoad(input_images[%i], pos.yx); ,i);
|
||||
GLSLF(2, imageStore(output_images[%i], pos, res); ,i);
|
||||
GLSLC(1, } );
|
||||
}
|
||||
GLSLC(0, } );
|
||||
|
||||
RET(spv->compile_shader(vkctx, spv, shd, &spv_data, &spv_len, "main",
|
||||
&spv_opaque));
|
||||
RET(ff_vk_shader_link(vkctx, shd, spv_data, spv_len, "main"));
|
||||
RET(ff_vk_shader_link(vkctx, &s->shd,
|
||||
ff_transpose_comp_spv_data,
|
||||
ff_transpose_comp_spv_len, "main"));
|
||||
|
||||
RET(ff_vk_shader_register_exec(vkctx, &s->e, &s->shd));
|
||||
|
||||
s->initialized = 1;
|
||||
|
||||
fail:
|
||||
if (spv_opaque)
|
||||
spv->free_shader(spv, &spv_opaque);
|
||||
if (spv)
|
||||
spv->uninit(&spv);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
@@ -156,7 +110,7 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *in)
|
||||
RET(init_filter(ctx, in));
|
||||
|
||||
RET(ff_vk_filter_process_simple(&s->vkctx, &s->e, &s->shd, out, in,
|
||||
VK_NULL_HANDLE, 1, NULL, 0));
|
||||
VK_NULL_HANDLE, 1, &s->dir, sizeof(int)));
|
||||
|
||||
RET(av_frame_copy_props(out, in));
|
||||
|
||||
@@ -223,7 +177,8 @@ static int config_props_output(AVFilterLink *outlink)
|
||||
}
|
||||
|
||||
#define OFFSET(x) offsetof(TransposeVulkanContext, x)
|
||||
#define FLAGS (AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM)
|
||||
#define FLAGS (AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM | \
|
||||
AV_OPT_FLAG_RUNTIME_PARAM)
|
||||
|
||||
static const AVOption transpose_vulkan_options[] = {
|
||||
{ "dir", "set transpose direction", OFFSET(dir), AV_OPT_TYPE_INT, { .i64 = TRANSPOSE_CCLOCK_FLIP }, 0, 7, FLAGS, .unit = "dir" },
|
||||
|
||||
@@ -5,6 +5,7 @@ OBJS-$(CONFIG_AVGBLUR_VULKAN_FILTER) += vulkan/avgblur.comp.spv.o
|
||||
OBJS-$(CONFIG_BWDIF_VULKAN_FILTER) += vulkan/bwdif.comp.spv.o
|
||||
OBJS-$(CONFIG_SCALE_VULKAN_FILTER) += vulkan/debayer.comp.spv.o
|
||||
OBJS-$(CONFIG_FLIP_VULKAN_FILTER) += vulkan/flip.comp.spv.o
|
||||
OBJS-$(CONFIG_TRANSPOSE_VULKAN_FILTER) += vulkan/transpose.comp.spv.o
|
||||
OBJS-$(CONFIG_V360_VULKAN_FILTER) += vulkan/v360.comp.spv.o
|
||||
OBJS-$(CONFIG_INTERLACE_VULKAN_FILTER) += vulkan/interlace.comp.spv.o
|
||||
OBJS-$(CONFIG_XFADE_VULKAN_FILTER) += vulkan/xfade.comp.spv.o
|
||||
|
||||
@@ -0,0 +1,66 @@
|
||||
/*
|
||||
* Copyright (c) 2021 Wu Jianhua <jianhua.wu@intel.com>
|
||||
* Copyright (c) 2026 Lynne <dev@lynne.ee>
|
||||
*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#pragma shader_stage(compute)
|
||||
|
||||
#extension GL_EXT_shader_image_load_formatted : require
|
||||
#extension GL_EXT_scalar_block_layout : require
|
||||
#extension GL_EXT_nonuniform_qualifier : require
|
||||
|
||||
layout (local_size_x_id = 253, local_size_y_id = 254, local_size_z_id = 255) in;
|
||||
|
||||
layout (set = 0, binding = 0) uniform readonly image2D input_img[];
|
||||
layout (set = 0, binding = 1) uniform writeonly image2D output_img[];
|
||||
|
||||
#define TRANSPOSE_CCLOCK_FLIP 0
|
||||
#define TRANSPOSE_CLOCK 1
|
||||
#define TRANSPOSE_CCLOCK 2
|
||||
#define TRANSPOSE_CLOCK_FLIP 3
|
||||
#define TRANSPOSE_REVERSAL 4
|
||||
#define TRANSPOSE_HFLIP 5
|
||||
#define TRANSPOSE_VFLIP 6
|
||||
|
||||
layout (push_constant, scalar) uniform pushConstants {
|
||||
int dir;
|
||||
};
|
||||
|
||||
void main()
|
||||
{
|
||||
ivec2 pos = ivec2(gl_GlobalInvocationID.xy);
|
||||
if (dir == TRANSPOSE_CCLOCK || dir == TRANSPOSE_CLOCK || dir == TRANSPOSE_CLOCK_FLIP)
|
||||
pos = pos.yx;
|
||||
|
||||
ivec2 size = imageSize(input_img[nonuniformEXT(gl_LocalInvocationID.z)]);
|
||||
if (any(greaterThanEqual(pos, size)))
|
||||
return;
|
||||
|
||||
ivec2 dst;
|
||||
switch (dir) {
|
||||
case TRANSPOSE_CCLOCK: dst = ivec2(size.y - pos.y, pos.x); break;
|
||||
case TRANSPOSE_CLOCK: pos = ivec2(pos.x, size.y - pos.y); /* fall */
|
||||
case TRANSPOSE_CLOCK_FLIP: dst = ivec2(size.yx - pos.yx); break;
|
||||
default: dst = pos.yx; break;
|
||||
}
|
||||
|
||||
vec4 res = imageLoad(input_img[nonuniformEXT(gl_LocalInvocationID.z)], pos);
|
||||
|
||||
imageStore(output_img[nonuniformEXT(gl_LocalInvocationID.z)], dst, res);
|
||||
}
|
||||
Reference in New Issue
Block a user