Improved Threading, better Frame Submission and more

The encoder is now capable of delayed or immediate threading (using fixed latency) which allows for a much better performance and works better with older VFW encoders that assume they are being used for transcoding instead of live-encoding.

In addition to the above, the MPEG-2 rewrite code for Matrox MPEG-2 encoders is completely done now and will fix up the bitstream Matroxs encoder sends out to be mostly correct, allowing for better seeking.

The last change this encompasses is the removal of the need to flip a RGBA frame to BGRA. This doesn't break any support and should work better with what we have.

This exposed several bugs in OBS Studio:
- Color Range, Space & Format are ignored by OBS Studio if set by the encoder.
- The ffmpeg-mux process freezes when writing the header for a correct MPEG-2 stream that doesn't match what OBS thinks it should be. ffmpeg itself has no issues writing this header, so this is an OBS Studio issue.

And exposed the following bugs in Matrox VFW:
- The MPEG-2 bitstream is written as interlaced Top-Field-First instead of Progressive - the content itself is Progressive.
- The encoder ignores FastCompress mode and just behaves identically in either mode.
- Some parameters that should be set are not being set by Matrox (extended Framerate for example).
This commit is contained in:
Xaymar
2017-10-21 18:49:36 +02:00
parent b10e3cbe9c
commit aeaa8dd90e
4 changed files with 596 additions and 162 deletions
+1 -1
View File
@@ -5,7 +5,7 @@ PROJECT(enc-vfw)
# Version
################################################################################
SET(enc-vfw_VERSION_MAJOR 0)
SET(enc-vfw_VERSION_MINOR 1)
SET(enc-vfw_VERSION_MINOR 2)
SET(enc-vfw_VERSION_PATCH 0)
SET(enc-vfw_VERSION_BUILD 0)
#configure_file(
+26 -1
View File
@@ -5,6 +5,10 @@
#include <string>
#include <vector>
#include <fstream>
#include <thread>
#include <mutex>
#include <condition_variable>
#include <queue>
// VFW
#define COMPMAN
@@ -68,6 +72,12 @@ namespace VFW {
static void get_video_info(void *data, struct video_scale_info *info);
void get_video_info(struct video_scale_info *info);
static void threadMain(void *data, int32_t flag);
void threadLocal(int32_t flag);
void preProcessLocal(std::unique_lock<std::mutex>& ul);
void encodeLocal(std::unique_lock<std::mutex>& ul);
void postProcessLocal(std::unique_lock<std::mutex>& ul);
private:
VFW::Info* myInfo;
HIC hIC;
@@ -88,12 +98,27 @@ namespace VFW {
m_width, m_height,
m_fpsNum, m_fpsDen,
m_keyframeInterval,
m_bitrate, m_quality;
m_bitrate, m_quality,
m_latency, m_maxQueueSize;
bool
m_useNormalCompress,
m_useTemporalFlag,
m_useBitrateFlag,
m_useQualityFlag,
m_forceKeyframes;
struct thread_data {
std::thread worker;
std::mutex lock;
std::condition_variable cv;
// Data Vector, Frame, Keyframe
std::queue<std::tuple<std::shared_ptr<std::vector<char>>, int64_t, bool>> data;
} m_preProcessData,
m_encodeData,
m_postProcessData;
std::mutex m_finalPacketsLock;
std::queue<std::tuple<std::shared_ptr<std::vector<char>>, int64_t, bool>> m_finalPackets;
bool m_threadShutdown;
std::shared_ptr<std::vector<char>> m_donotuse_datastor;
};
};
+20 -16
View File
@@ -20,26 +20,30 @@
#endif
// Plugin
#define PLUGIN_NAME "Video For Windows"
#define PLUGIN_NAME "Video For Windows"
#include "Version.h"
// Logging
#define PLOG(level, ...) blog(level, "[VFW] " __VA_ARGS__);
#define PLOG_ERROR(...) PLOG(LOG_ERROR, __VA_ARGS__)
#define PLOG_WARNING(...) PLOG(LOG_WARNING, __VA_ARGS__)
#define PLOG_INFO(...) PLOG(LOG_INFO, __VA_ARGS__)
#define PLOG_DEBUG(...) PLOG(LOG_DEBUG, __VA_ARGS__)
#define PLOG(level, ...) blog(level, "[VFW] " __VA_ARGS__);
#define PLOG_ERROR(...) PLOG(LOG_ERROR, __VA_ARGS__)
#define PLOG_WARNING(...) PLOG(LOG_WARNING, __VA_ARGS__)
#define PLOG_INFO(...) PLOG(LOG_INFO, __VA_ARGS__)
#define PLOG_DEBUG(...) PLOG(LOG_DEBUG, __VA_ARGS__)
// Properties
#define PROP_BITRATE "Bitrate"
#define PROP_QUALITY "Quality"
#define PROP_INTERVAL_TYPE "IntervalType"
#define PROP_KEYFRAME_INTERVAL "KeyframeInterval"
#define PROP_FORCE_KEYFRAMES "ForceKeyframes"
#define PROP_CONFIGURE "Configure"
#define PROP_BITRATE "Bitrate"
#define PROP_QUALITY "Quality"
#define PROP_INTERVAL_TYPE "IntervalType"
#define PROP_KEYFRAME_INTERVAL "KeyframeInterval"
#define PROP_KEYFRAME_INTERVAL2 "KeyframeInterval2"
#define PROP_FORCE_KEYFRAMES "ForceKeyframes"
#define PROP_MODE "Mode"
#define PROP_MODE_NORMAL "Mode.Normal"
#define PROP_MODE_TEMPORAL "Mode.Temporal"
#define PROP_MODE_SEQUENTIAL "Mode.Sequential"
#define PROP_CONFIGURE "Configure"
#define PROP_MODE_NORMAL "Mode.Normal"
#define PROP_MODE_TEMPORAL "Mode.Temporal"
#define PROP_MODE_SEQUENTIAL "Mode.Sequential"
#define PROP_ICMODE "ICMode"
#define PROP_ICMODE_COMPRESS "ICMode.Normal"
#define PROP_ICMODE_FASTCOMPRESS "ICMode.Fast"
#define PROP_LATENCY "Latency"
#define PROP_ABOUT "About"
+546 -141
View File
@@ -1,15 +1,19 @@
#include "enc-vfw.h"
#include "libobs/obs-encoder.h"
#include <chrono>
#include <list>
#include <tuple>
#include <vector>
#include <map>
#include <sstream>
#include <emmintrin.h>
#include <sstream>
std::map<std::string, VFW::Info*> _IdToInfo;
#define snprintf sprintf_s
static const size_t preprocessthreads = 4;
std::vector<std::pair<const char*, const char*>> codecCorrections = {
// Cinepak Codec
@@ -44,6 +48,9 @@ std::vector<std::pair<const char*, const char*>> codecCorrections = {
{ "dv50", "dvvideo" }, // Matrox DVCPRO50
};
std::map<uint64_t, std::tuple<uint8_t, uint8_t, uint8_t>> mpeg2hertz;
const uint64_t mpeg2hertz_mult = 0xFFFFFFFF;
std::string FourCCFromInt32(DWORD& fccHandler) {
return std::string(reinterpret_cast<char*>(&fccHandler), 4);
}
@@ -85,6 +92,43 @@ std::string FormattedICCError(LRESULT error) {
}
bool VFW::Initialize() {
// Initialize MPEG-2 Rewriting Map
std::pair<uint8_t, double_t> native_hertz[] = {
std::make_pair(8, 60.0),
std::make_pair(7, (60000.0 / 1001.0)),
std::make_pair(6, 50.0),
std::make_pair(5, 30.0),
std::make_pair(4, (30000.0 / 1001.0)),
std::make_pair(3, 25.0),
std::make_pair(2, 24.0),
std::make_pair(1, (24000.0 / 1001.0)),
};
for (auto kv : native_hertz) {
PLOG_DEBUG("(MPEG-2 Rewrite) Native Framerate: %f", kv.second);
mpeg2hertz.insert(std::make_pair(uint64_t(kv.second * mpeg2hertz_mult),
std::make_tuple(kv.first, 0, 0)));
}
for (auto kv : native_hertz) {
std::stringstream buf;
for (uint8_t num = 0; num < (1 << 2); num++) {
for (uint8_t den = 0; den < (1 << 5); den++) {
if (num == den)
continue; // Don't need the 1:1 ones >_>
double_t fps = kv.second * (double_t(num + 1) / double_t(den + 1));
uint64_t key = uint64_t(fps * mpeg2hertz_mult);
if (mpeg2hertz.count(key)) {
continue; // Duplicate.
}
mpeg2hertz.insert(std::make_pair(key, std::make_tuple(kv.first, num, den)));
buf << fps << " (" << kv.second << " * " << num + 1 << " / " << den + 1 << "), ";
}
}
PLOG_DEBUG("(MPEG-2 Rewrite) Extended Framerates for native %f: %s", kv.second, buf.str().c_str());
}
// Initialize all VFW Encoders (we can only use one anyway)
ICINFO icinfo;
std::memset(&icinfo, 0, sizeof(ICINFO));
@@ -174,7 +218,13 @@ const char* VFW::Encoder::get_name(void* type_data) {
void VFW::Encoder::get_defaults(obs_data_t *settings) {
obs_data_set_default_int(settings, PROP_BITRATE, 0);
obs_data_set_default_double(settings, PROP_QUALITY, 100.0);
obs_data_set_default_double(settings, PROP_KEYFRAME_INTERVAL, 0.0);
obs_data_set_default_int(settings, PROP_INTERVAL_TYPE, 0);
obs_data_set_default_double(settings, PROP_KEYFRAME_INTERVAL, 1.0);
obs_data_set_default_int(settings, PROP_KEYFRAME_INTERVAL2, 30);
obs_data_set_default_bool(settings, PROP_FORCE_KEYFRAMES, true);
obs_data_set_default_string(settings, PROP_MODE, PROP_MODE_SEQUENTIAL);
obs_data_set_default_string(settings, PROP_ICMODE, PROP_ICMODE_FASTCOMPRESS);
obs_data_set_default_int(settings, PROP_LATENCY, 3);
}
obs_properties_t* VFW::Encoder::get_properties(void *data) {
@@ -184,8 +234,12 @@ obs_properties_t* VFW::Encoder::get_properties(void *data) {
obs_properties_set_param(pr, data, nullptr);
obs_property_t* p;
p = obs_properties_add_int_slider(pr, PROP_BITRATE, "Bitrate", 0, 300000, 1);
p = obs_properties_add_button(pr, PROP_CONFIGURE, "Configure", cb_configure);
obs_property_set_visible(p, info->hasConfigure);
p = obs_properties_add_int_slider(pr, PROP_BITRATE, "Bitrate", 0, 1000000, 1);
obs_property_set_visible(p, ((info->icInfo2.dwFlags & VIDCF_CRUNCH) != 0));
p = obs_properties_add_float_slider(pr, PROP_QUALITY, "Quality", 1, 100, 0.01);
obs_property_set_visible(p, ((info->icInfo2.dwFlags & VIDCF_QUALITY) != 0));
@@ -193,7 +247,9 @@ obs_properties_t* VFW::Encoder::get_properties(void *data) {
obs_property_list_add_int(p, "Seconds", 0);
obs_property_list_add_int(p, "Frames", 1);
obs_property_set_modified_callback(p, cb_modified);
p = obs_properties_add_float(pr, PROP_KEYFRAME_INTERVAL, "Keyframe Interval", 0.00, 30.00, 0.01);
p = obs_properties_add_int(pr, PROP_KEYFRAME_INTERVAL2, "Keyframe Interval", 0, 300, 1);
p = obs_properties_add_bool(pr, PROP_FORCE_KEYFRAMES, "Force Keyframes");
p = obs_properties_add_list(pr, PROP_MODE, "Mode", OBS_COMBO_TYPE_LIST, OBS_COMBO_FORMAT_STRING);
@@ -202,8 +258,12 @@ obs_properties_t* VFW::Encoder::get_properties(void *data) {
obs_property_list_add_string(p, "Temporal", PROP_MODE_TEMPORAL);
obs_property_list_add_string(p, "Sequential", PROP_MODE_SEQUENTIAL);
p = obs_properties_add_button(pr, PROP_CONFIGURE, "Configure", cb_configure);
obs_property_set_visible(p, info->hasConfigure);
p = obs_properties_add_list(pr, PROP_ICMODE, "Compress Mode", OBS_COMBO_TYPE_LIST, OBS_COMBO_FORMAT_STRING);
obs_property_list_add_string(p, "Normal", PROP_ICMODE_COMPRESS);
obs_property_list_add_string(p, "Fast", PROP_ICMODE_FASTCOMPRESS);
p = obs_properties_add_int_slider(pr, PROP_LATENCY, "Frame Latency", 0, 10, 1);
p = obs_properties_add_button(pr, PROP_ABOUT, "About", cb_about);
obs_property_set_visible(p, info->hasAbout);
@@ -217,7 +277,12 @@ bool VFW::Encoder::cb_configure(obs_properties_t *pr, obs_property_t *p, void *d
VFW::Info* info = static_cast<VFW::Info*>(obs_properties_get_param(pr));
HIC hIC = ICOpen(info->icInfo.fccType, info->icInfo.fccHandler, ICMODE_FASTCOMPRESS);
HIC hIC = ICOpen(info->icInfo.fccType, info->icInfo.fccHandler, ICMODE_COMPRESS);
if (hIC == 0)
hIC = ICOpen(info->icInfo.fccType, info->icInfo.fccHandler, ICMODE_FASTCOMPRESS);
if (hIC == 0)
return false;
if (info->stateInfo.size() > 0) {
LRESULT err = ICSetState(hIC, info->stateInfo.data(), info->stateInfo.size());
if (err != ICERR_OK) {
@@ -248,26 +313,21 @@ bool VFW::Encoder::cb_about(obs_properties_t *pr, obs_property_t *p, void *data)
UNREFERENCED_PARAMETER(data);
VFW::Info* info = static_cast<VFW::Info*>(obs_properties_get_param(pr));
HIC hIC = ICOpen(info->icInfo.fccType, info->icInfo.fccHandler, ICMODE_FASTCOMPRESS);
HIC hIC = ICOpen(info->icInfo.fccType, info->icInfo.fccHandler, ICMODE_COMPRESS);
if (hIC == 0)
hIC = ICOpen(info->icInfo.fccType, info->icInfo.fccHandler, ICMODE_FASTCOMPRESS);
if (hIC == 0)
return false;
ICAbout(hIC, GetDesktopWindow());
ICClose(hIC);
return false;
}
bool VFW::Encoder::cb_modified(obs_properties_t *pr, obs_property_t *p, obs_data_t *data) {
if (strcmp(obs_property_name(p), PROP_INTERVAL_TYPE) == 0) {
int64_t v = obs_data_get_int(data, PROP_INTERVAL_TYPE);
switch (v) {
case 0:
obs_property_int_set_limits(obs_properties_get(pr, PROP_KEYFRAME_INTERVAL), 0.00, 30.00, 0.01);
break;
case 1:
obs_property_int_set_limits(obs_properties_get(pr, PROP_KEYFRAME_INTERVAL), 0, 300, 1);
break;
}
}
bool VFW::Encoder::cb_modified(obs_properties_t *pr, obs_property_t *, obs_data_t *data) {
int64_t v = obs_data_get_int(data, PROP_INTERVAL_TYPE);
obs_property_set_visible(obs_properties_get(pr, PROP_KEYFRAME_INTERVAL), v == 0);
obs_property_set_visible(obs_properties_get(pr, PROP_KEYFRAME_INTERVAL2), v == 1);
return true;
}
@@ -296,16 +356,18 @@ VFW::Encoder::Encoder(obs_data_t *settings, obs_encoder_t *encoder) {
case 0:
m_keyframeInterval = max(uint32_t(
factor * obs_data_get_double(settings, PROP_KEYFRAME_INTERVAL)
), 0);
), 0);
break;
case 1:
m_keyframeInterval =
(uint32_t)obs_data_get_double(settings, PROP_KEYFRAME_INTERVAL);
(uint32_t)obs_data_get_int(settings, PROP_KEYFRAME_INTERVAL2);
break;
}
m_forceKeyframes = obs_data_get_bool(settings, PROP_FORCE_KEYFRAMES);
m_bitrate = uint32_t(obs_data_get_int(settings, PROP_BITRATE));
m_quality = uint32_t(obs_data_get_double(settings, PROP_QUALITY) * 100);
m_latency = uint32_t(obs_data_get_int(settings, PROP_LATENCY));
m_maxQueueSize = (m_latency + 1) * 2;
PLOG_INFO("<%s> Initializing... ("
"Resolution: %" PRIu32 "x%" PRIu32 ", "
@@ -313,23 +375,42 @@ VFW::Encoder::Encoder(obs_data_t *settings, obs_encoder_t *encoder) {
"Bitrate: %" PRIu32 ", "
"Quality: %0.2f%%, "
"Keyframe Interval: %" PRIu32 " (%s), "
"Mode: %s"
"Mode: %s, "
"Compress Mode: %s"
")",
myInfo->Name.c_str(),
m_width, m_height,
m_fpsNum, m_fpsDen, (double_t)m_fpsNum / (double_t)m_fpsDen,
m_bitrate, m_quality,
m_keyframeInterval, m_forceKeyframes ? "Enforced" : "Standard",
obs_data_get_string(settings, PROP_MODE));
obs_data_get_string(settings, PROP_MODE),
obs_data_get_string(settings, PROP_ICMODE));
hIC = ICOpen(myInfo->icInfo.fccType, myInfo->icInfo.fccHandler, ICMODE_FASTCOMPRESS);
if (!hIC) {
PLOG_ERROR("<%s> Failed to initialize.",
myInfo->Name.c_str());
throw std::exception();
UINT mainIC = ICMODE_FASTCOMPRESS;
const char* mainICs = "Fast";
UINT backupIC = ICMODE_COMPRESS;
const char* backupICs = "Normal";
if (strcmp(obs_data_get_string(settings, PROP_ICMODE), PROP_ICMODE_COMPRESS) == 0) {
std::swap(mainIC, backupIC);
std::swap(mainICs, backupICs);
}
hIC = ICOpen(myInfo->icInfo.fccType, myInfo->icInfo.fccHandler, mainIC);
if (hIC == 0) {
PLOG_WARNING(
"<%s> Failed to initialize with %s compression mode, "
"falling back to %s compression mode...",
myInfo->Name.c_str(), mainICs, backupICs);
hIC = ICOpen(myInfo->icInfo.fccType, myInfo->icInfo.fccHandler, backupIC);
if (hIC == 0) {
PLOG_ERROR("<%s> Failed to initialize.",
myInfo->Name.c_str());
throw std::exception();
}
} else {
PLOG_DEBUG("<%s> Initialized, setting up.",
myInfo->Name.c_str());
PLOG_DEBUG("<%s> Initialized with %s compression mode, setting up...",
myInfo->Name.c_str(), mainICs);
}
// Store temporary flags
@@ -350,7 +431,7 @@ VFW::Encoder::Encoder(obs_data_t *settings, obs_encoder_t *encoder) {
// Load State from memory.
if (myInfo->stateInfo.size() > 0) {
LRESULT err = ICSetState(hIC, myInfo->stateInfo.data(), myInfo->stateInfo.size());
err = ICSetState(hIC, myInfo->stateInfo.data(), myInfo->stateInfo.size());
if (err != ICERR_OK) {
PLOG_ERROR("Failed to set state before encoding: %s.",
FormattedICCError(err).c_str());
@@ -359,7 +440,7 @@ VFW::Encoder::Encoder(obs_data_t *settings, obs_encoder_t *encoder) {
ICSetState(hIC, NULL, 0);
}
#pragma region Get Bitmap Information
#pragma region Get Bitmap Information
m_bufferInputBitmapInfo.resize(sizeof(BITMAPINFOHEADER));
std::memset(m_bufferInputBitmapInfo.data(), 0, m_bufferInputBitmapInfo.size());
m_inputBitmapInfo = reinterpret_cast<BITMAPINFO*>(m_bufferInputBitmapInfo.data());
@@ -393,7 +474,7 @@ VFW::Encoder::Encoder(obs_data_t *settings, obs_encoder_t *encoder) {
FormattedICCError(err).c_str());
throw std::exception();
}
#pragma endregion Get Bitmap Information
#pragma endregion Get Bitmap Information
// Prepare Input Buffers
size_t alignedWidth = (m_width / 16 + 1) * 16;
@@ -403,7 +484,7 @@ VFW::Encoder::Encoder(obs_data_t *settings, obs_encoder_t *encoder) {
// Begin Compression
if (m_useNormalCompress) {
LRESULT err = ICCompressBegin(hIC, m_inputBitmapInfo, m_outputBitmapInfo);
err = ICCompressBegin(hIC, m_inputBitmapInfo, m_outputBitmapInfo);
if (err != ICERR_OK) {
PLOG_ERROR("Unable to begin encoding: %s.", FormattedICCError(err).c_str());
throw std::runtime_error(FormattedICCError(err));
@@ -429,6 +510,12 @@ VFW::Encoder::Encoder(obs_data_t *settings, obs_encoder_t *encoder) {
}
}
// Thread stuff. These can't fail in most situations.
m_threadShutdown = false;
m_preProcessData.worker = std::thread(threadMain, this, 0);
m_encodeData.worker = std::thread(threadMain, this, 1);
m_postProcessData.worker = std::thread(threadMain, this, 2);
PLOG_INFO("<%s> Started.",
myInfo->Name.c_str());
}
@@ -438,6 +525,14 @@ void VFW::Encoder::destroy(void* data) {
}
VFW::Encoder::~Encoder() {
m_threadShutdown = true;
m_preProcessData.cv.notify_all();
m_preProcessData.worker.join();
m_encodeData.cv.notify_all();
m_encodeData.worker.join();
m_postProcessData.cv.notify_all();
m_postProcessData.worker.join();
if (m_useNormalCompress) {
ICCompressEnd(hIC);
} else {
@@ -453,121 +548,54 @@ bool VFW::Encoder::encode(void *data, struct encoder_frame *frame, struct encode
}
bool VFW::Encoder::encode(struct encoder_frame *frame, struct encoder_packet *packet, bool *received_packet) {
// Vertically invert Image for some reason.
auto tbegin = std::chrono::high_resolution_clock::now();
size_t maxX = (m_width * 4) / 16;
if (maxX <= 0)
maxX = 1;
namespace sc = std::chrono;
using schrc = std::chrono::high_resolution_clock;
// xF xE xD xC xB xA x9 x8 x7 x6 x5 x4 x3 x2 x1 x0
// A B G R A B G R A B G R A B G R
// Swizzle Mask (128-Bit swapping, could do higher but AMD decided to say fuck it)
/*
0xF 0xE 0xD 0xC
0xB 0xA 0x9 0x8
0x7 0x6 0x5 0x4
0x3 0x2 0x1 0x0
*/
__m128i swizzle = _mm_set_epi8(
0xF, 0xC, 0xD, 0xE,
0xB, 0x8, 0x9, 0xA,
0x7, 0x4, 0x5, 0x6,
0x3, 0x0, 0x1, 0x2);
const size_t lineSize = m_width * 4;
for (size_t y = 0; y < m_height; ++y) {
__m128i* pIn = reinterpret_cast<__m128i*>(frame->data[0] + (y * lineSize));
__m128i* pOut = reinterpret_cast<__m128i*>(m_bufferInput.data() + ((m_height - 1 - y) * lineSize));
for (size_t x = 0; x < maxX; ++x) {
__m128i in = _mm_loadu_si128(pIn + x);
__m128i out = _mm_shuffle_epi8(in, swizzle);
_mm_storeu_si128(pOut + x, out);
}
size_t queueSize = 0;
{
std::unique_lock<std::mutex> ulock(m_finalPacketsLock);
queueSize = m_finalPackets.size();
}
//for (size_t y = 0; y < m_height; ++y) {
// uint8_t* lineOut = reinterpret_cast<uint8_t*>(m_bufferInput.data()) + (lineOutSize * (m_height - y - 1));
// for (size_t x = 0; x < m_width; ++x) {
// lineOut[x * 4] = lineIn[x * 4 + 2];
// lineOut[x * 4 + 1] = lineIn[x * 4 + 1];
// lineOut[x * 4 + 2] = lineIn[x * 4];
// lineOut[x * 4 + 3] = lineIn[x * 4 + 3];
// }
//}
bool isKeyframe = false;
bool makeKeyframe = (m_keyframeInterval > 0) && ((frame->pts % m_keyframeInterval) == 0);
*received_packet = false;
if (m_useNormalCompress) {
DWORD dwFlags, cwCompFlags;
PLOG_DEBUG("<%s:Normal> PTS: %" PRIu32 ", Keyframe: %s",
myInfo->Name.c_str(), frame->pts, makeKeyframe ? "Yes" : "No");
LRESULT err = ICCompress(hIC,
makeKeyframe ? ICCOMPRESS_KEYFRAME : 0,
&(m_outputBitmapInfo->bmiHeader), m_bufferOutput.data(),
&(m_inputBitmapInfo->bmiHeader), m_bufferInput.data(),
&dwFlags, &cwCompFlags,
(LONG)frame->pts,
m_useBitrateFlag ? m_bitrate : 0,
m_useQualityFlag ? m_quality : 0,
!makeKeyframe && m_useTemporalFlag ? &(m_prevInputBitmapInfo->bmiHeader) : NULL,
!makeKeyframe && m_useTemporalFlag ? m_bufferPrevInput.data() : NULL);
if (err != ICERR_OK) {
PLOG_ERROR("Unable to encode: %s.", FormattedICCError(err).c_str());
return false;
bool submittedFrame = false;
long long maxTime = size_t((double_t(m_fpsNum) / double_t(m_fpsDen)) * 1000000000);
while (((*received_packet == false && queueSize >= m_latency) || (submittedFrame == false))
&& (sc::nanoseconds((schrc::now() - tbegin)).count() < maxTime)) {
// Submit frame to PreProcessor
if (!submittedFrame) {
std::unique_lock<std::mutex> ulock(m_preProcessData.lock);
if (m_preProcessData.data.size() < m_maxQueueSize) {
m_preProcessData.data.push(std::make_tuple(
std::make_shared<std::vector<char>>(frame->data[0], frame->data[0] + (frame->linesize[0] * this->m_height)),
frame->pts,
false));
submittedFrame = true;
m_preProcessData.cv.notify_all();
}
}
// Swap Buffers
m_bufferPrevInput.swap(m_bufferInput);
// Store some information we need right now.
packet->size = m_outputBitmapInfo->bmiHeader.biSizeImage;
isKeyframe = (cwCompFlags & AVIIF_KEYFRAME) != 0;
PLOG_DEBUG("<%s:Normal> PTS: %" PRIu32 ", Keyframe: %s, Size: %" PRIu32,
myInfo->Name.c_str(), frame->pts, isKeyframe ? "Yes" : "No", packet->size);
} else {
BOOL keyframe; LONG plSize = (LONG)m_bufferInput.size();
PLOG_DEBUG("<%s:Sequential> PTS: %" PRIu32 ", Keyframe: %s",
myInfo->Name.c_str(), frame->pts, makeKeyframe ? "Yes" : "No");
LPVOID fptr = ICSeqCompressFrame(
&cv,
makeKeyframe ? 1 : 0,
reinterpret_cast<LPVOID>(m_bufferInput.data()),
&keyframe,
&plSize);
if (fptr == NULL) {
PLOG_ERROR("Unable to encode.");
return false;
if (!*received_packet) {
std::unique_lock<std::mutex> ulock(m_finalPacketsLock);
if (m_finalPackets.size() > m_latency) {
auto front = m_finalPackets.front();
m_donotuse_datastor = std::get<0>(front);
packet->type = OBS_ENCODER_VIDEO;
packet->data = reinterpret_cast<uint8_t*>(m_donotuse_datastor->data());
packet->size = m_donotuse_datastor->size();
packet->pts = packet->dts = std::get<1>(front);
packet->keyframe = std::get<2>(front);
*received_packet = true;
m_finalPackets.pop();
PLOG_DEBUG("<%s> PTS: %" PRIu32 ", DTS: %" PRIu32 ", Keyframe: %s, Size: %" PRIu32,
myInfo->Name.c_str(), packet->pts, packet->dts, packet->keyframe ? "Yes" : "No", packet->size);
}
}
if (plSize > m_bufferOutput.size())
m_bufferOutput.resize(plSize);
std::memcpy(m_bufferOutput.data(), fptr, plSize);
packet->size = plSize;
isKeyframe = keyframe != 0;
PLOG_DEBUG("<%s:Sequential> PTS: %" PRIu32 ", Keyframe: %s, Size: %" PRIu32,
myInfo->Name.c_str(), frame->pts, isKeyframe ? "Yes" : "No", packet->size);
return true;
std::this_thread::sleep_for(sc::milliseconds(1));
}
*received_packet = true;
packet->type = OBS_ENCODER_VIDEO;
packet->data = reinterpret_cast<uint8_t*>(m_bufferOutput.data());
packet->keyframe = m_forceKeyframes ? makeKeyframe || isKeyframe : isKeyframe;
packet->pts = frame->pts;
packet->dts = frame->pts;
PLOG_DEBUG("<%s> PTS: %" PRIu32 ", DTS: %" PRIu32 ", Keyframe: %s, Size: %" PRIu32,
myInfo->Name.c_str(), packet->pts, packet->dts, packet->keyframe ? "Yes" : "No", packet->size);
return true;
}
@@ -607,7 +635,384 @@ void VFW::Encoder::get_video_info(void *data, struct video_scale_info *info) {
}
void VFW::Encoder::get_video_info(struct video_scale_info *info) {
info->format = VIDEO_FORMAT_RGBA;
info->format = VIDEO_FORMAT_BGRA;
info->range = VIDEO_RANGE_FULL;
info->colorspace = VIDEO_CS_DEFAULT;
info->colorspace = VIDEO_CS_709;
}
void VFW::Encoder::threadMain(void *data, int32_t flag) {
reinterpret_cast<Encoder*>(data)->threadLocal(flag);
}
void VFW::Encoder::threadLocal(int32_t flag) {
thread_data* td = &m_preProcessData;
if (flag == 0) {
td = &m_preProcessData;
} else if (flag == 1) {
td = &m_encodeData;
} else if (flag == 2) {
td = &m_postProcessData;
}
std::unique_lock<std::mutex> ulock(td->lock);
while (!m_threadShutdown) {
td->cv.wait(ulock, [this, td] {
return m_threadShutdown || td->data.size() > 0;
});
if (m_threadShutdown)
break;
if (flag == 0) {
preProcessLocal(ulock);
} else if (flag == 1) {
encodeLocal(ulock);
} else if (flag == 2) {
postProcessLocal(ulock);
}
}
}
void VFW::Encoder::preProcessLocal(std::unique_lock<std::mutex>& ul) {
auto total_start = std::chrono::high_resolution_clock::now();
auto kv = m_preProcessData.data.front();
ul.unlock();
auto invert_start = std::chrono::high_resolution_clock::now();
std::shared_ptr<std::vector<char>> inbuf = std::get<0>(kv);
std::shared_ptr<std::vector<char>> outbuf = inbuf;// std::make_shared<std::vector<char>>(inbuf->size());
size_t halfHeight = m_height / 2;
size_t lineSize = inbuf->size() / m_height;
std::vector<char> tempBuf(lineSize);
for (size_t line = 0; line < halfHeight; line++) {
size_t front = line * lineSize;
size_t back = (m_height - line - 1) * lineSize;
std::memcpy(tempBuf.data(), inbuf->data() + front, lineSize);
std::memcpy(outbuf->data() + front, inbuf->data() + back, lineSize);
std::memcpy(outbuf->data() + back, tempBuf.data(), lineSize);
}
auto invert_end = std::chrono::high_resolution_clock::now();
auto wait_start = std::chrono::high_resolution_clock::now();
// Do not fill queue if it is > latency.
size_t queueSize = m_maxQueueSize;
while (queueSize >= m_maxQueueSize) {
{
std::unique_lock<std::mutex> elock(m_encodeData.lock);
queueSize = m_encodeData.data.size();
}
if (queueSize >= m_maxQueueSize)
std::this_thread::sleep_for(std::chrono::milliseconds(1));
}
auto wait_end = std::chrono::high_resolution_clock::now();
auto queue_start = std::chrono::high_resolution_clock::now();
{
std::unique_lock<std::mutex> plock(m_preProcessData.lock);
std::unique_lock<std::mutex> elock(m_encodeData.lock);
m_encodeData.data.push(std::make_tuple(outbuf, std::get<1>(kv), std::get<2>(kv)));
m_encodeData.cv.notify_all();
m_preProcessData.data.pop();
}
auto queue_end = std::chrono::high_resolution_clock::now();
ul.lock();
auto total_end = std::chrono::high_resolution_clock::now();
auto time_total = std::chrono::duration_cast<std::chrono::nanoseconds>(total_end - total_start);
auto time_invert = std::chrono::duration_cast<std::chrono::nanoseconds>(invert_end - invert_start);
auto time_wait = std::chrono::duration_cast<std::chrono::nanoseconds>(wait_end - wait_start);
auto time_queue = std::chrono::duration_cast<std::chrono::nanoseconds>(queue_end - queue_start);
PLOG_INFO("[Thread PrePro] Frame %" PRId64 ": "
"Total: %" PRId64 "ns, "
"Invert: %" PRId64 "ns, "
"Wait: %" PRId64 "ns, "
"Queue: %" PRId64 "ns",
std::get<1>(kv),
time_total.count(),
time_invert.count(),
time_wait.count(),
time_queue.count());
}
void VFW::Encoder::encodeLocal(std::unique_lock<std::mutex>& ul) {
auto total_start = std::chrono::high_resolution_clock::now();
auto kv = m_encodeData.data.front();
ul.unlock();
auto encode_start = std::chrono::high_resolution_clock::now();
bool isKeyframe = false;
bool makeKeyframe = (m_keyframeInterval > 0) && ((std::get<1>(kv) % m_keyframeInterval) == 0);
std::shared_ptr<std::vector<char>> inbuf = std::get<0>(kv);
std::shared_ptr<std::vector<char>> outbuf = std::make_shared<std::vector<char>>(m_bufferOutput.size());
if (m_useNormalCompress) {
DWORD dwFlags = 0, cwCompFlags = 0;
PLOG_DEBUG("<%s:Normal> PTS: %" PRIu32 ", Keyframe: %s", myInfo->Name.c_str(), std::get<1>(kv), makeKeyframe ? "Yes" : "No");
LRESULT err = ICCompress(hIC,
makeKeyframe ? ICCOMPRESS_KEYFRAME : 0,
&(m_outputBitmapInfo->bmiHeader), outbuf->data(),
&(m_inputBitmapInfo->bmiHeader), inbuf->data(),
&dwFlags, &cwCompFlags,
(LONG)std::get<1>(kv),
m_useBitrateFlag ? m_bitrate : 0,
m_useQualityFlag ? m_quality : 0,
!makeKeyframe && m_useTemporalFlag ? &(m_prevInputBitmapInfo->bmiHeader) : NULL,
!makeKeyframe && m_useTemporalFlag ? m_bufferPrevInput.data() : NULL);
if (err == ICERR_OK) {
outbuf->resize(m_outputBitmapInfo->bmiHeader.biSizeImage);
//std::memcpy(outbuf->data(), m_bufferOutput.data(), outbuf->size());
isKeyframe = (cwCompFlags & AVIIF_KEYFRAME) != 0;
// Swap Buffers
m_bufferPrevInput.swap(m_bufferInput);
PLOG_DEBUG("<%s:Normal> PTS: %" PRIu32 ", Keyframe: %s, Size: %" PRIu32,
myInfo->Name.c_str(), std::get<1>(kv), isKeyframe ? "Yes" : "No", outbuf->size());
} else {
PLOG_ERROR("Unable to encode: %s.", FormattedICCError(err).c_str());
}
} else {
BOOL keyframe; LONG plSize = (LONG)inbuf->size();
PLOG_DEBUG("<%s:Sequential> PTS: %" PRIu32 ", Keyframe: %s",
myInfo->Name.c_str(), std::get<1>(kv), makeKeyframe ? "Yes" : "No");
LPVOID fptr = ICSeqCompressFrame(
&cv,
makeKeyframe ? 1 : 0,
reinterpret_cast<LPVOID>(inbuf->data()),
&keyframe,
&plSize);
if (fptr == NULL) {
PLOG_ERROR("Unable to encode.");
} else {
outbuf->resize(plSize);
std::memcpy(outbuf->data(), fptr, outbuf->size());
isKeyframe = keyframe != 0;
PLOG_DEBUG("<%s:Sequential> PTS: %" PRIu32 ", Keyframe: %s, Size: %" PRIu32,
myInfo->Name.c_str(), std::get<1>(kv), isKeyframe ? "Yes" : "No", outbuf->size());
}
}
isKeyframe = m_forceKeyframes ? makeKeyframe || isKeyframe : isKeyframe;
auto encode_end = std::chrono::high_resolution_clock::now();
auto wait_start = std::chrono::high_resolution_clock::now();
// Do not fill queue if it is > latency.
size_t queueSize = m_maxQueueSize;
while (queueSize >= m_maxQueueSize) {
{
std::unique_lock<std::mutex> elock(m_postProcessData.lock);
queueSize = m_postProcessData.data.size();
}
std::this_thread::sleep_for(std::chrono::milliseconds(1));
}
auto wait_end = std::chrono::high_resolution_clock::now();
auto queue_start = std::chrono::high_resolution_clock::now();
{
std::unique_lock<std::mutex> elock(m_encodeData.lock);
std::unique_lock<std::mutex> plock(m_postProcessData.lock);
m_postProcessData.data.push(std::make_tuple(outbuf, std::get<1>(kv), isKeyframe));
m_postProcessData.cv.notify_all();
m_encodeData.data.pop();
}
auto queue_end = std::chrono::high_resolution_clock::now();
ul.lock();
auto total_end = std::chrono::high_resolution_clock::now();
auto time_total = std::chrono::duration_cast<std::chrono::nanoseconds>(total_end - total_start);
auto time_encode = std::chrono::duration_cast<std::chrono::nanoseconds>(encode_end - encode_start);
auto time_wait = std::chrono::duration_cast<std::chrono::nanoseconds>(wait_end - wait_start);
auto time_queue = std::chrono::duration_cast<std::chrono::nanoseconds>(queue_end - queue_start);
PLOG_INFO("[Thread Encode] Frame %" PRId64 ": "
"Total: %" PRId64 "ns, "
"Encode: %" PRId64 "ns, "
"Wait: %" PRId64 "ns, "
"Queue: %" PRId64 "ns",
std::get<1>(kv),
time_total.count(),
time_encode.count(),
time_wait.count(),
time_queue.count());
}
void MatroxM2VBitstreamFixer(std::shared_ptr<std::vector<char>>& ptr, std::pair<uint32_t, uint32_t> framerate) {
// Matrox developers are idiots. Their MPEG-2 codec flags the content
// as interlaced top-field top-displayed, but in reality there is a
// progressive frame there. But that isn't the only issue.
// They also have structures in the stream that are larger than the
// standard allows for, or even invalid user data (all 0s). It's just
// a big bunch of "How did this ever work?" ...
// Find best match FPS
double_t sourceHertz = double_t(framerate.first) / double_t(framerate.second);
uint64_t sourceKey = uint64_t(sourceHertz * mpeg2hertz_mult);
std::pair<uint64_t, std::tuple<uint8_t, uint8_t, uint8_t>> bestMatch;
uint64_t bestMatchDiff = UINT64_MAX;
for (auto kv : mpeg2hertz) {
uint64_t diff = uint64_t(abs(int64_t(sourceKey) - int64_t(kv.first)));
if (diff < bestMatchDiff) {
bestMatch = kv;
bestMatchDiff = diff;
}
}
#ifdef _DEBUG
PLOG_DEBUG("(MPEG-2 Rewrite) Best Match for Content: %f (Diff: %llu idx: %i, extn: %i, extd: %i)",
double_t(bestMatch.first / mpeg2hertz_mult),
bestMatchDiff,
std::get<0>(bestMatch.second),
std::get<1>(bestMatch.second),
std::get<2>(bestMatch.second));
#endif
std::vector<char>* buffer = ptr.get();
// Rewrite stream
size_t streamPosition = 0, streamSize = buffer->size();
while ((streamPosition < streamSize) && ((streamSize - streamPosition) >= 4)) {
uint8_t blockId = (uint8_t)(*buffer)[streamPosition + 3];
streamPosition += 4;
switch (blockId) {
case 0xB3: // Sequence Header
{
#ifdef _DEBUG
PLOG_DEBUG("(MPEG-2 Rewrite) Sequence Header at %" PRIu64, streamPosition);
#endif
// Rewrite Framerate
char b = (*buffer)[streamPosition + 3];
char fpsflag = std::get<0>(bestMatch.second);
(*buffer)[streamPosition + 3] = (b & 0xF0) + (fpsflag & 0x0F);
streamPosition += 8;
break;
}
case 0xB5:
{
// streamPosition += 1;
char type = ((*buffer)[streamPosition] & 0xF0) >> 4;
switch (type) {
case 0b0001:
{ // Sequence Extension (Progressive, FPS, ChromaFormat possible)
#ifdef _DEBUG
PLOG_DEBUG("(MPEG-2 Rewrite) Sequence Extension at %" PRIu64, streamPosition);
#endif
(*buffer)[streamPosition + 1] |= 1 << 3; // Flag Progressive
(*buffer)[streamPosition + 5] = // Rewrite FPS Ext
((*buffer)[streamPosition + 5] & 0x80)
| ((std::get<1>(bestMatch.second) & 0x3) << 5)
| ((std::get<2>(bestMatch.second) & 0x1F));
streamPosition += 6;
break;
}
case 0b0010:
{
#ifdef _DEBUG
PLOG_DEBUG("(MPEG-2 Rewrite) Sequence Display Extension at %" PRIu64, streamPosition);
#endif
if ((*buffer)[streamPosition] & 0b1) {
streamPosition += 8;
} else {
streamPosition += 5;
}
}
break;
case 0b1000:
{ // Chroma Stuff?
#ifdef _DEBUG
PLOG_DEBUG("(MPEG-2 Rewrite) Picture Coding Extension at %" PRIu64, streamPosition);
#endif
// Picture Structure
(*buffer)[streamPosition + 2] |= 0x3; // Full Frame
(*buffer)[streamPosition + 3] &= ~(1 << 7); // top field first
(*buffer)[streamPosition + 3] &= ~(1 << 1); // repeat first field
(*buffer)[streamPosition + 4] |= 1 << 7; // progressive
if ((*buffer)[streamPosition + 4] & 0b1000000) {
streamPosition += 7;
} else {
streamPosition += 5;
}
break;
}
#ifdef _DEBUG
default:
PLOG_DEBUG("(MPEG-2 Rewrite) Unknown Extension %" PRIx8 " at %" PRIu64, type, streamPosition);
break;
#endif
}
break;
}
}
// Seek to a valid position.
while ((streamPosition < buffer->size()) && ((buffer->size() - streamPosition) >= 4)
&& (
((*buffer)[streamPosition] != 0)
|| ((*buffer)[streamPosition + 1] != 0)
|| ((*buffer)[streamPosition + 2] != 1)
)) {
++streamPosition;
}
}
}
void VFW::Encoder::postProcessLocal(std::unique_lock<std::mutex>& ul) {
auto total_start = std::chrono::high_resolution_clock::now();
auto kv = m_postProcessData.data.front();
ul.unlock();
auto bitstream_start = std::chrono::high_resolution_clock::now();
if ((myInfo->Id == "mvcVfwMpeg2-mmes")
|| (myInfo->Id == "mvcVfwMpeg2Alpha-m704")
|| (myInfo->Id == "mvcVfwMpeg2HD-m701")
|| (myInfo->Id == "mvcVfwMpeg2Alpha-m705")) {
MatroxM2VBitstreamFixer(std::get<0>(kv), std::make_pair(m_fpsNum, m_fpsDen));
}
auto bitstream_end = std::chrono::high_resolution_clock::now();
auto wait_start = std::chrono::high_resolution_clock::now();
// Do not fill queue if it is > latency.
size_t queueSize = m_maxQueueSize;
while (queueSize >= m_maxQueueSize) {
{
std::unique_lock<std::mutex> flock(m_finalPacketsLock);
queueSize = m_finalPackets.size();
}
std::this_thread::sleep_for(std::chrono::milliseconds(1));
}
auto wait_end = std::chrono::high_resolution_clock::now();
auto queue_start = std::chrono::high_resolution_clock::now();
{
std::unique_lock<std::mutex> plock(m_postProcessData.lock);
std::unique_lock<std::mutex> flock(m_finalPacketsLock);
m_finalPackets.push(kv);
m_postProcessData.data.pop();
}
auto queue_end = std::chrono::high_resolution_clock::now();
ul.lock();
auto total_end = std::chrono::high_resolution_clock::now();
auto time_total = std::chrono::duration_cast<std::chrono::nanoseconds>(total_end - total_start);
auto time_bitstream = std::chrono::duration_cast<std::chrono::nanoseconds>(bitstream_end - bitstream_start);
auto time_wait = std::chrono::duration_cast<std::chrono::nanoseconds>(wait_end - wait_start);
auto time_queue = std::chrono::duration_cast<std::chrono::nanoseconds>(queue_end - queue_start);
PLOG_INFO("[Thread PostPr] Frame %" PRId64 ": "
"Total: %" PRId64 "ns, "
"Bitstream: %" PRId64 "ns, "
"Wait: %" PRId64 "ns, "
"Queue: %" PRId64 "ns",
std::get<1>(kv),
time_total.count(),
time_bitstream.count(),
time_wait.count(),
time_queue.count());
}