diff --git a/CMakeLists.txt b/CMakeLists.txt index 699b136..b7a06f7 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -5,7 +5,7 @@ PROJECT(enc-vfw) # Version ################################################################################ SET(enc-vfw_VERSION_MAJOR 0) -SET(enc-vfw_VERSION_MINOR 1) +SET(enc-vfw_VERSION_MINOR 2) SET(enc-vfw_VERSION_PATCH 0) SET(enc-vfw_VERSION_BUILD 0) #configure_file( diff --git a/Include/enc-vfw.h b/Include/enc-vfw.h index efd4db8..0daf2d7 100644 --- a/Include/enc-vfw.h +++ b/Include/enc-vfw.h @@ -5,6 +5,10 @@ #include #include #include +#include +#include +#include +#include // VFW #define COMPMAN @@ -68,6 +72,12 @@ namespace VFW { static void get_video_info(void *data, struct video_scale_info *info); void get_video_info(struct video_scale_info *info); + static void threadMain(void *data, int32_t flag); + void threadLocal(int32_t flag); + void preProcessLocal(std::unique_lock& ul); + void encodeLocal(std::unique_lock& ul); + void postProcessLocal(std::unique_lock& ul); + private: VFW::Info* myInfo; HIC hIC; @@ -88,12 +98,27 @@ namespace VFW { m_width, m_height, m_fpsNum, m_fpsDen, m_keyframeInterval, - m_bitrate, m_quality; - bool - m_useNormalCompress, + m_bitrate, m_quality, + m_latency, m_maxQueueSize; + bool + m_useNormalCompress, m_useTemporalFlag, m_useBitrateFlag, m_useQualityFlag, m_forceKeyframes; + + struct thread_data { + std::thread worker; + std::mutex lock; + std::condition_variable cv; + // Data Vector, Frame, Keyframe + std::queue>, int64_t, bool>> data; + } m_preProcessData, + m_encodeData, + m_postProcessData; + std::mutex m_finalPacketsLock; + std::queue>, int64_t, bool>> m_finalPackets; + bool m_threadShutdown; + std::shared_ptr> m_donotuse_datastor; }; }; diff --git a/Include/plugin.h b/Include/plugin.h index 0bff0b6..d3740d1 100644 --- a/Include/plugin.h +++ b/Include/plugin.h @@ -20,26 +20,30 @@ #endif // Plugin -#define PLUGIN_NAME "Video For Windows" +#define PLUGIN_NAME "Video For Windows" #include "Version.h" // Logging -#define PLOG(level, ...) blog(level, "[VFW] " __VA_ARGS__); -#define PLOG_ERROR(...) PLOG(LOG_ERROR, __VA_ARGS__) -#define PLOG_WARNING(...) PLOG(LOG_WARNING, __VA_ARGS__) -#define PLOG_INFO(...) PLOG(LOG_INFO, __VA_ARGS__) -#define PLOG_DEBUG(...) PLOG(LOG_DEBUG, __VA_ARGS__) +#define PLOG(level, ...) blog(level, "[VFW] " __VA_ARGS__); +#define PLOG_ERROR(...) PLOG(LOG_ERROR, __VA_ARGS__) +#define PLOG_WARNING(...) PLOG(LOG_WARNING, __VA_ARGS__) +#define PLOG_INFO(...) PLOG(LOG_INFO, __VA_ARGS__) +#define PLOG_DEBUG(...) PLOG(LOG_DEBUG, __VA_ARGS__) // Properties -#define PROP_BITRATE "Bitrate" -#define PROP_QUALITY "Quality" -#define PROP_INTERVAL_TYPE "IntervalType" -#define PROP_KEYFRAME_INTERVAL "KeyframeInterval" -#define PROP_FORCE_KEYFRAMES "ForceKeyframes" +#define PROP_CONFIGURE "Configure" +#define PROP_BITRATE "Bitrate" +#define PROP_QUALITY "Quality" +#define PROP_INTERVAL_TYPE "IntervalType" +#define PROP_KEYFRAME_INTERVAL "KeyframeInterval" +#define PROP_KEYFRAME_INTERVAL2 "KeyframeInterval2" +#define PROP_FORCE_KEYFRAMES "ForceKeyframes" #define PROP_MODE "Mode" -#define PROP_MODE_NORMAL "Mode.Normal" -#define PROP_MODE_TEMPORAL "Mode.Temporal" -#define PROP_MODE_SEQUENTIAL "Mode.Sequential" - -#define PROP_CONFIGURE "Configure" +#define PROP_MODE_NORMAL "Mode.Normal" +#define PROP_MODE_TEMPORAL "Mode.Temporal" +#define PROP_MODE_SEQUENTIAL "Mode.Sequential" +#define PROP_ICMODE "ICMode" +#define PROP_ICMODE_COMPRESS "ICMode.Normal" +#define PROP_ICMODE_FASTCOMPRESS "ICMode.Fast" +#define PROP_LATENCY "Latency" #define PROP_ABOUT "About" \ No newline at end of file diff --git a/Source/enc-vfw.cpp b/Source/enc-vfw.cpp index 3775844..c7ef765 100644 --- a/Source/enc-vfw.cpp +++ b/Source/enc-vfw.cpp @@ -1,15 +1,19 @@ #include "enc-vfw.h" #include "libobs/obs-encoder.h" +#include #include +#include #include #include #include #include +#include std::map _IdToInfo; #define snprintf sprintf_s +static const size_t preprocessthreads = 4; std::vector> codecCorrections = { // Cinepak Codec @@ -44,6 +48,9 @@ std::vector> codecCorrections = { { "dv50", "dvvideo" }, // Matrox DVCPRO50 }; +std::map> mpeg2hertz; +const uint64_t mpeg2hertz_mult = 0xFFFFFFFF; + std::string FourCCFromInt32(DWORD& fccHandler) { return std::string(reinterpret_cast(&fccHandler), 4); } @@ -85,6 +92,43 @@ std::string FormattedICCError(LRESULT error) { } bool VFW::Initialize() { + // Initialize MPEG-2 Rewriting Map + std::pair native_hertz[] = { + std::make_pair(8, 60.0), + std::make_pair(7, (60000.0 / 1001.0)), + std::make_pair(6, 50.0), + std::make_pair(5, 30.0), + std::make_pair(4, (30000.0 / 1001.0)), + std::make_pair(3, 25.0), + std::make_pair(2, 24.0), + std::make_pair(1, (24000.0 / 1001.0)), + }; + for (auto kv : native_hertz) { + PLOG_DEBUG("(MPEG-2 Rewrite) Native Framerate: %f", kv.second); + mpeg2hertz.insert(std::make_pair(uint64_t(kv.second * mpeg2hertz_mult), + std::make_tuple(kv.first, 0, 0))); + + } + for (auto kv : native_hertz) { + std::stringstream buf; + for (uint8_t num = 0; num < (1 << 2); num++) { + for (uint8_t den = 0; den < (1 << 5); den++) { + if (num == den) + continue; // Don't need the 1:1 ones >_> + + double_t fps = kv.second * (double_t(num + 1) / double_t(den + 1)); + uint64_t key = uint64_t(fps * mpeg2hertz_mult); + if (mpeg2hertz.count(key)) { + continue; // Duplicate. + } + + mpeg2hertz.insert(std::make_pair(key, std::make_tuple(kv.first, num, den))); + buf << fps << " (" << kv.second << " * " << num + 1 << " / " << den + 1 << "), "; + } + } + PLOG_DEBUG("(MPEG-2 Rewrite) Extended Framerates for native %f: %s", kv.second, buf.str().c_str()); + } + // Initialize all VFW Encoders (we can only use one anyway) ICINFO icinfo; std::memset(&icinfo, 0, sizeof(ICINFO)); @@ -174,7 +218,13 @@ const char* VFW::Encoder::get_name(void* type_data) { void VFW::Encoder::get_defaults(obs_data_t *settings) { obs_data_set_default_int(settings, PROP_BITRATE, 0); obs_data_set_default_double(settings, PROP_QUALITY, 100.0); - obs_data_set_default_double(settings, PROP_KEYFRAME_INTERVAL, 0.0); + obs_data_set_default_int(settings, PROP_INTERVAL_TYPE, 0); + obs_data_set_default_double(settings, PROP_KEYFRAME_INTERVAL, 1.0); + obs_data_set_default_int(settings, PROP_KEYFRAME_INTERVAL2, 30); + obs_data_set_default_bool(settings, PROP_FORCE_KEYFRAMES, true); + obs_data_set_default_string(settings, PROP_MODE, PROP_MODE_SEQUENTIAL); + obs_data_set_default_string(settings, PROP_ICMODE, PROP_ICMODE_FASTCOMPRESS); + obs_data_set_default_int(settings, PROP_LATENCY, 3); } obs_properties_t* VFW::Encoder::get_properties(void *data) { @@ -184,8 +234,12 @@ obs_properties_t* VFW::Encoder::get_properties(void *data) { obs_properties_set_param(pr, data, nullptr); obs_property_t* p; - p = obs_properties_add_int_slider(pr, PROP_BITRATE, "Bitrate", 0, 300000, 1); + p = obs_properties_add_button(pr, PROP_CONFIGURE, "Configure", cb_configure); + obs_property_set_visible(p, info->hasConfigure); + + p = obs_properties_add_int_slider(pr, PROP_BITRATE, "Bitrate", 0, 1000000, 1); obs_property_set_visible(p, ((info->icInfo2.dwFlags & VIDCF_CRUNCH) != 0)); + p = obs_properties_add_float_slider(pr, PROP_QUALITY, "Quality", 1, 100, 0.01); obs_property_set_visible(p, ((info->icInfo2.dwFlags & VIDCF_QUALITY) != 0)); @@ -193,7 +247,9 @@ obs_properties_t* VFW::Encoder::get_properties(void *data) { obs_property_list_add_int(p, "Seconds", 0); obs_property_list_add_int(p, "Frames", 1); obs_property_set_modified_callback(p, cb_modified); + p = obs_properties_add_float(pr, PROP_KEYFRAME_INTERVAL, "Keyframe Interval", 0.00, 30.00, 0.01); + p = obs_properties_add_int(pr, PROP_KEYFRAME_INTERVAL2, "Keyframe Interval", 0, 300, 1); p = obs_properties_add_bool(pr, PROP_FORCE_KEYFRAMES, "Force Keyframes"); p = obs_properties_add_list(pr, PROP_MODE, "Mode", OBS_COMBO_TYPE_LIST, OBS_COMBO_FORMAT_STRING); @@ -202,8 +258,12 @@ obs_properties_t* VFW::Encoder::get_properties(void *data) { obs_property_list_add_string(p, "Temporal", PROP_MODE_TEMPORAL); obs_property_list_add_string(p, "Sequential", PROP_MODE_SEQUENTIAL); - p = obs_properties_add_button(pr, PROP_CONFIGURE, "Configure", cb_configure); - obs_property_set_visible(p, info->hasConfigure); + p = obs_properties_add_list(pr, PROP_ICMODE, "Compress Mode", OBS_COMBO_TYPE_LIST, OBS_COMBO_FORMAT_STRING); + obs_property_list_add_string(p, "Normal", PROP_ICMODE_COMPRESS); + obs_property_list_add_string(p, "Fast", PROP_ICMODE_FASTCOMPRESS); + + p = obs_properties_add_int_slider(pr, PROP_LATENCY, "Frame Latency", 0, 10, 1); + p = obs_properties_add_button(pr, PROP_ABOUT, "About", cb_about); obs_property_set_visible(p, info->hasAbout); @@ -217,7 +277,12 @@ bool VFW::Encoder::cb_configure(obs_properties_t *pr, obs_property_t *p, void *d VFW::Info* info = static_cast(obs_properties_get_param(pr)); - HIC hIC = ICOpen(info->icInfo.fccType, info->icInfo.fccHandler, ICMODE_FASTCOMPRESS); + HIC hIC = ICOpen(info->icInfo.fccType, info->icInfo.fccHandler, ICMODE_COMPRESS); + if (hIC == 0) + hIC = ICOpen(info->icInfo.fccType, info->icInfo.fccHandler, ICMODE_FASTCOMPRESS); + if (hIC == 0) + return false; + if (info->stateInfo.size() > 0) { LRESULT err = ICSetState(hIC, info->stateInfo.data(), info->stateInfo.size()); if (err != ICERR_OK) { @@ -248,26 +313,21 @@ bool VFW::Encoder::cb_about(obs_properties_t *pr, obs_property_t *p, void *data) UNREFERENCED_PARAMETER(data); VFW::Info* info = static_cast(obs_properties_get_param(pr)); - HIC hIC = ICOpen(info->icInfo.fccType, info->icInfo.fccHandler, ICMODE_FASTCOMPRESS); + HIC hIC = ICOpen(info->icInfo.fccType, info->icInfo.fccHandler, ICMODE_COMPRESS); + if (hIC == 0) + hIC = ICOpen(info->icInfo.fccType, info->icInfo.fccHandler, ICMODE_FASTCOMPRESS); + if (hIC == 0) + return false; ICAbout(hIC, GetDesktopWindow()); ICClose(hIC); return false; } -bool VFW::Encoder::cb_modified(obs_properties_t *pr, obs_property_t *p, obs_data_t *data) { - if (strcmp(obs_property_name(p), PROP_INTERVAL_TYPE) == 0) { - int64_t v = obs_data_get_int(data, PROP_INTERVAL_TYPE); - - switch (v) { - case 0: - obs_property_int_set_limits(obs_properties_get(pr, PROP_KEYFRAME_INTERVAL), 0.00, 30.00, 0.01); - break; - case 1: - obs_property_int_set_limits(obs_properties_get(pr, PROP_KEYFRAME_INTERVAL), 0, 300, 1); - break; - } - } +bool VFW::Encoder::cb_modified(obs_properties_t *pr, obs_property_t *, obs_data_t *data) { + int64_t v = obs_data_get_int(data, PROP_INTERVAL_TYPE); + obs_property_set_visible(obs_properties_get(pr, PROP_KEYFRAME_INTERVAL), v == 0); + obs_property_set_visible(obs_properties_get(pr, PROP_KEYFRAME_INTERVAL2), v == 1); return true; } @@ -296,16 +356,18 @@ VFW::Encoder::Encoder(obs_data_t *settings, obs_encoder_t *encoder) { case 0: m_keyframeInterval = max(uint32_t( factor * obs_data_get_double(settings, PROP_KEYFRAME_INTERVAL) - ), 0); + ), 0); break; case 1: - m_keyframeInterval = - (uint32_t)obs_data_get_double(settings, PROP_KEYFRAME_INTERVAL); + m_keyframeInterval = + (uint32_t)obs_data_get_int(settings, PROP_KEYFRAME_INTERVAL2); break; } m_forceKeyframes = obs_data_get_bool(settings, PROP_FORCE_KEYFRAMES); m_bitrate = uint32_t(obs_data_get_int(settings, PROP_BITRATE)); m_quality = uint32_t(obs_data_get_double(settings, PROP_QUALITY) * 100); + m_latency = uint32_t(obs_data_get_int(settings, PROP_LATENCY)); + m_maxQueueSize = (m_latency + 1) * 2; PLOG_INFO("<%s> Initializing... (" "Resolution: %" PRIu32 "x%" PRIu32 ", " @@ -313,23 +375,42 @@ VFW::Encoder::Encoder(obs_data_t *settings, obs_encoder_t *encoder) { "Bitrate: %" PRIu32 ", " "Quality: %0.2f%%, " "Keyframe Interval: %" PRIu32 " (%s), " - "Mode: %s" + "Mode: %s, " + "Compress Mode: %s" ")", myInfo->Name.c_str(), m_width, m_height, m_fpsNum, m_fpsDen, (double_t)m_fpsNum / (double_t)m_fpsDen, m_bitrate, m_quality, m_keyframeInterval, m_forceKeyframes ? "Enforced" : "Standard", - obs_data_get_string(settings, PROP_MODE)); + obs_data_get_string(settings, PROP_MODE), + obs_data_get_string(settings, PROP_ICMODE)); - hIC = ICOpen(myInfo->icInfo.fccType, myInfo->icInfo.fccHandler, ICMODE_FASTCOMPRESS); - if (!hIC) { - PLOG_ERROR("<%s> Failed to initialize.", - myInfo->Name.c_str()); - throw std::exception(); + UINT mainIC = ICMODE_FASTCOMPRESS; + const char* mainICs = "Fast"; + UINT backupIC = ICMODE_COMPRESS; + const char* backupICs = "Normal"; + + if (strcmp(obs_data_get_string(settings, PROP_ICMODE), PROP_ICMODE_COMPRESS) == 0) { + std::swap(mainIC, backupIC); + std::swap(mainICs, backupICs); + } + + hIC = ICOpen(myInfo->icInfo.fccType, myInfo->icInfo.fccHandler, mainIC); + if (hIC == 0) { + PLOG_WARNING( + "<%s> Failed to initialize with %s compression mode, " + "falling back to %s compression mode...", + myInfo->Name.c_str(), mainICs, backupICs); + hIC = ICOpen(myInfo->icInfo.fccType, myInfo->icInfo.fccHandler, backupIC); + if (hIC == 0) { + PLOG_ERROR("<%s> Failed to initialize.", + myInfo->Name.c_str()); + throw std::exception(); + } } else { - PLOG_DEBUG("<%s> Initialized, setting up.", - myInfo->Name.c_str()); + PLOG_DEBUG("<%s> Initialized with %s compression mode, setting up...", + myInfo->Name.c_str(), mainICs); } // Store temporary flags @@ -350,7 +431,7 @@ VFW::Encoder::Encoder(obs_data_t *settings, obs_encoder_t *encoder) { // Load State from memory. if (myInfo->stateInfo.size() > 0) { - LRESULT err = ICSetState(hIC, myInfo->stateInfo.data(), myInfo->stateInfo.size()); + err = ICSetState(hIC, myInfo->stateInfo.data(), myInfo->stateInfo.size()); if (err != ICERR_OK) { PLOG_ERROR("Failed to set state before encoding: %s.", FormattedICCError(err).c_str()); @@ -359,7 +440,7 @@ VFW::Encoder::Encoder(obs_data_t *settings, obs_encoder_t *encoder) { ICSetState(hIC, NULL, 0); } -#pragma region Get Bitmap Information + #pragma region Get Bitmap Information m_bufferInputBitmapInfo.resize(sizeof(BITMAPINFOHEADER)); std::memset(m_bufferInputBitmapInfo.data(), 0, m_bufferInputBitmapInfo.size()); m_inputBitmapInfo = reinterpret_cast(m_bufferInputBitmapInfo.data()); @@ -393,7 +474,7 @@ VFW::Encoder::Encoder(obs_data_t *settings, obs_encoder_t *encoder) { FormattedICCError(err).c_str()); throw std::exception(); } -#pragma endregion Get Bitmap Information + #pragma endregion Get Bitmap Information // Prepare Input Buffers size_t alignedWidth = (m_width / 16 + 1) * 16; @@ -403,7 +484,7 @@ VFW::Encoder::Encoder(obs_data_t *settings, obs_encoder_t *encoder) { // Begin Compression if (m_useNormalCompress) { - LRESULT err = ICCompressBegin(hIC, m_inputBitmapInfo, m_outputBitmapInfo); + err = ICCompressBegin(hIC, m_inputBitmapInfo, m_outputBitmapInfo); if (err != ICERR_OK) { PLOG_ERROR("Unable to begin encoding: %s.", FormattedICCError(err).c_str()); throw std::runtime_error(FormattedICCError(err)); @@ -429,6 +510,12 @@ VFW::Encoder::Encoder(obs_data_t *settings, obs_encoder_t *encoder) { } } + // Thread stuff. These can't fail in most situations. + m_threadShutdown = false; + m_preProcessData.worker = std::thread(threadMain, this, 0); + m_encodeData.worker = std::thread(threadMain, this, 1); + m_postProcessData.worker = std::thread(threadMain, this, 2); + PLOG_INFO("<%s> Started.", myInfo->Name.c_str()); } @@ -438,6 +525,14 @@ void VFW::Encoder::destroy(void* data) { } VFW::Encoder::~Encoder() { + m_threadShutdown = true; + m_preProcessData.cv.notify_all(); + m_preProcessData.worker.join(); + m_encodeData.cv.notify_all(); + m_encodeData.worker.join(); + m_postProcessData.cv.notify_all(); + m_postProcessData.worker.join(); + if (m_useNormalCompress) { ICCompressEnd(hIC); } else { @@ -453,121 +548,54 @@ bool VFW::Encoder::encode(void *data, struct encoder_frame *frame, struct encode } bool VFW::Encoder::encode(struct encoder_frame *frame, struct encoder_packet *packet, bool *received_packet) { - // Vertically invert Image for some reason. + auto tbegin = std::chrono::high_resolution_clock::now(); - size_t maxX = (m_width * 4) / 16; - if (maxX <= 0) - maxX = 1; + namespace sc = std::chrono; + using schrc = std::chrono::high_resolution_clock; - // xF xE xD xC xB xA x9 x8 x7 x6 x5 x4 x3 x2 x1 x0 - // A B G R A B G R A B G R A B G R - - // Swizzle Mask (128-Bit swapping, could do higher but AMD decided to say fuck it) - /* - 0xF 0xE 0xD 0xC - 0xB 0xA 0x9 0x8 - 0x7 0x6 0x5 0x4 - 0x3 0x2 0x1 0x0 - */ - __m128i swizzle = _mm_set_epi8( - 0xF, 0xC, 0xD, 0xE, - 0xB, 0x8, 0x9, 0xA, - 0x7, 0x4, 0x5, 0x6, - 0x3, 0x0, 0x1, 0x2); - - const size_t lineSize = m_width * 4; - for (size_t y = 0; y < m_height; ++y) { - __m128i* pIn = reinterpret_cast<__m128i*>(frame->data[0] + (y * lineSize)); - __m128i* pOut = reinterpret_cast<__m128i*>(m_bufferInput.data() + ((m_height - 1 - y) * lineSize)); - - for (size_t x = 0; x < maxX; ++x) { - __m128i in = _mm_loadu_si128(pIn + x); - __m128i out = _mm_shuffle_epi8(in, swizzle); - _mm_storeu_si128(pOut + x, out); - } + size_t queueSize = 0; + { + std::unique_lock ulock(m_finalPacketsLock); + queueSize = m_finalPackets.size(); } - //for (size_t y = 0; y < m_height; ++y) { - - // uint8_t* lineOut = reinterpret_cast(m_bufferInput.data()) + (lineOutSize * (m_height - y - 1)); - - // for (size_t x = 0; x < m_width; ++x) { - // lineOut[x * 4] = lineIn[x * 4 + 2]; - // lineOut[x * 4 + 1] = lineIn[x * 4 + 1]; - // lineOut[x * 4 + 2] = lineIn[x * 4]; - // lineOut[x * 4 + 3] = lineIn[x * 4 + 3]; - // } - - //} - - bool isKeyframe = false; - bool makeKeyframe = (m_keyframeInterval > 0) && ((frame->pts % m_keyframeInterval) == 0); - - *received_packet = false; - if (m_useNormalCompress) { - DWORD dwFlags, cwCompFlags; - - PLOG_DEBUG("<%s:Normal> PTS: %" PRIu32 ", Keyframe: %s", - myInfo->Name.c_str(), frame->pts, makeKeyframe ? "Yes" : "No"); - LRESULT err = ICCompress(hIC, - makeKeyframe ? ICCOMPRESS_KEYFRAME : 0, - &(m_outputBitmapInfo->bmiHeader), m_bufferOutput.data(), - &(m_inputBitmapInfo->bmiHeader), m_bufferInput.data(), - &dwFlags, &cwCompFlags, - (LONG)frame->pts, - m_useBitrateFlag ? m_bitrate : 0, - m_useQualityFlag ? m_quality : 0, - !makeKeyframe && m_useTemporalFlag ? &(m_prevInputBitmapInfo->bmiHeader) : NULL, - !makeKeyframe && m_useTemporalFlag ? m_bufferPrevInput.data() : NULL); - if (err != ICERR_OK) { - PLOG_ERROR("Unable to encode: %s.", FormattedICCError(err).c_str()); - return false; + bool submittedFrame = false; + long long maxTime = size_t((double_t(m_fpsNum) / double_t(m_fpsDen)) * 1000000000); + while (((*received_packet == false && queueSize >= m_latency) || (submittedFrame == false)) + && (sc::nanoseconds((schrc::now() - tbegin)).count() < maxTime)) { + // Submit frame to PreProcessor + if (!submittedFrame) { + std::unique_lock ulock(m_preProcessData.lock); + if (m_preProcessData.data.size() < m_maxQueueSize) { + m_preProcessData.data.push(std::make_tuple( + std::make_shared>(frame->data[0], frame->data[0] + (frame->linesize[0] * this->m_height)), + frame->pts, + false)); + submittedFrame = true; + m_preProcessData.cv.notify_all(); + } } - // Swap Buffers - m_bufferPrevInput.swap(m_bufferInput); - - // Store some information we need right now. - packet->size = m_outputBitmapInfo->bmiHeader.biSizeImage; - isKeyframe = (cwCompFlags & AVIIF_KEYFRAME) != 0; - - PLOG_DEBUG("<%s:Normal> PTS: %" PRIu32 ", Keyframe: %s, Size: %" PRIu32, - myInfo->Name.c_str(), frame->pts, isKeyframe ? "Yes" : "No", packet->size); - } else { - BOOL keyframe; LONG plSize = (LONG)m_bufferInput.size(); - - PLOG_DEBUG("<%s:Sequential> PTS: %" PRIu32 ", Keyframe: %s", - myInfo->Name.c_str(), frame->pts, makeKeyframe ? "Yes" : "No"); - LPVOID fptr = ICSeqCompressFrame( - &cv, - makeKeyframe ? 1 : 0, - reinterpret_cast(m_bufferInput.data()), - &keyframe, - &plSize); - if (fptr == NULL) { - PLOG_ERROR("Unable to encode."); - return false; + if (!*received_packet) { + std::unique_lock ulock(m_finalPacketsLock); + if (m_finalPackets.size() > m_latency) { + auto front = m_finalPackets.front(); + m_donotuse_datastor = std::get<0>(front); + packet->type = OBS_ENCODER_VIDEO; + packet->data = reinterpret_cast(m_donotuse_datastor->data()); + packet->size = m_donotuse_datastor->size(); + packet->pts = packet->dts = std::get<1>(front); + packet->keyframe = std::get<2>(front); + *received_packet = true; + m_finalPackets.pop(); + PLOG_DEBUG("<%s> PTS: %" PRIu32 ", DTS: %" PRIu32 ", Keyframe: %s, Size: %" PRIu32, + myInfo->Name.c_str(), packet->pts, packet->dts, packet->keyframe ? "Yes" : "No", packet->size); + } } - if (plSize > m_bufferOutput.size()) - m_bufferOutput.resize(plSize); - std::memcpy(m_bufferOutput.data(), fptr, plSize); - packet->size = plSize; - isKeyframe = keyframe != 0; - PLOG_DEBUG("<%s:Sequential> PTS: %" PRIu32 ", Keyframe: %s, Size: %" PRIu32, - myInfo->Name.c_str(), frame->pts, isKeyframe ? "Yes" : "No", packet->size); - return true; + std::this_thread::sleep_for(sc::milliseconds(1)); } - *received_packet = true; - packet->type = OBS_ENCODER_VIDEO; - packet->data = reinterpret_cast(m_bufferOutput.data()); - packet->keyframe = m_forceKeyframes ? makeKeyframe || isKeyframe : isKeyframe; - packet->pts = frame->pts; - packet->dts = frame->pts; - PLOG_DEBUG("<%s> PTS: %" PRIu32 ", DTS: %" PRIu32 ", Keyframe: %s, Size: %" PRIu32, - myInfo->Name.c_str(), packet->pts, packet->dts, packet->keyframe ? "Yes" : "No", packet->size); - return true; } @@ -607,7 +635,384 @@ void VFW::Encoder::get_video_info(void *data, struct video_scale_info *info) { } void VFW::Encoder::get_video_info(struct video_scale_info *info) { - info->format = VIDEO_FORMAT_RGBA; + info->format = VIDEO_FORMAT_BGRA; info->range = VIDEO_RANGE_FULL; - info->colorspace = VIDEO_CS_DEFAULT; + info->colorspace = VIDEO_CS_709; +} + +void VFW::Encoder::threadMain(void *data, int32_t flag) { + reinterpret_cast(data)->threadLocal(flag); +} + +void VFW::Encoder::threadLocal(int32_t flag) { + thread_data* td = &m_preProcessData; + if (flag == 0) { + td = &m_preProcessData; + } else if (flag == 1) { + td = &m_encodeData; + } else if (flag == 2) { + td = &m_postProcessData; + } + + std::unique_lock ulock(td->lock); + while (!m_threadShutdown) { + td->cv.wait(ulock, [this, td] { + return m_threadShutdown || td->data.size() > 0; + }); + if (m_threadShutdown) + break; + + if (flag == 0) { + preProcessLocal(ulock); + } else if (flag == 1) { + encodeLocal(ulock); + } else if (flag == 2) { + postProcessLocal(ulock); + } + } +} + +void VFW::Encoder::preProcessLocal(std::unique_lock& ul) { + auto total_start = std::chrono::high_resolution_clock::now(); + + auto kv = m_preProcessData.data.front(); + ul.unlock(); + + auto invert_start = std::chrono::high_resolution_clock::now(); + std::shared_ptr> inbuf = std::get<0>(kv); + std::shared_ptr> outbuf = inbuf;// std::make_shared>(inbuf->size()); + + size_t halfHeight = m_height / 2; + size_t lineSize = inbuf->size() / m_height; + std::vector tempBuf(lineSize); + for (size_t line = 0; line < halfHeight; line++) { + size_t front = line * lineSize; + size_t back = (m_height - line - 1) * lineSize; + + std::memcpy(tempBuf.data(), inbuf->data() + front, lineSize); + std::memcpy(outbuf->data() + front, inbuf->data() + back, lineSize); + std::memcpy(outbuf->data() + back, tempBuf.data(), lineSize); + } + auto invert_end = std::chrono::high_resolution_clock::now(); + + auto wait_start = std::chrono::high_resolution_clock::now(); + // Do not fill queue if it is > latency. + size_t queueSize = m_maxQueueSize; + while (queueSize >= m_maxQueueSize) { + { + std::unique_lock elock(m_encodeData.lock); + queueSize = m_encodeData.data.size(); + } + + if (queueSize >= m_maxQueueSize) + std::this_thread::sleep_for(std::chrono::milliseconds(1)); + } + auto wait_end = std::chrono::high_resolution_clock::now(); + + auto queue_start = std::chrono::high_resolution_clock::now(); + { + std::unique_lock plock(m_preProcessData.lock); + std::unique_lock elock(m_encodeData.lock); + m_encodeData.data.push(std::make_tuple(outbuf, std::get<1>(kv), std::get<2>(kv))); + m_encodeData.cv.notify_all(); + m_preProcessData.data.pop(); + } + auto queue_end = std::chrono::high_resolution_clock::now(); + + ul.lock(); + auto total_end = std::chrono::high_resolution_clock::now(); + + auto time_total = std::chrono::duration_cast(total_end - total_start); + auto time_invert = std::chrono::duration_cast(invert_end - invert_start); + auto time_wait = std::chrono::duration_cast(wait_end - wait_start); + auto time_queue = std::chrono::duration_cast(queue_end - queue_start); + PLOG_INFO("[Thread PrePro] Frame %" PRId64 ": " + "Total: %" PRId64 "ns, " + "Invert: %" PRId64 "ns, " + "Wait: %" PRId64 "ns, " + "Queue: %" PRId64 "ns", + std::get<1>(kv), + time_total.count(), + time_invert.count(), + time_wait.count(), + time_queue.count()); +} + +void VFW::Encoder::encodeLocal(std::unique_lock& ul) { + auto total_start = std::chrono::high_resolution_clock::now(); + + auto kv = m_encodeData.data.front(); + ul.unlock(); + + auto encode_start = std::chrono::high_resolution_clock::now(); + bool isKeyframe = false; + bool makeKeyframe = (m_keyframeInterval > 0) && ((std::get<1>(kv) % m_keyframeInterval) == 0); + std::shared_ptr> inbuf = std::get<0>(kv); + std::shared_ptr> outbuf = std::make_shared>(m_bufferOutput.size()); + if (m_useNormalCompress) { + DWORD dwFlags = 0, cwCompFlags = 0; + PLOG_DEBUG("<%s:Normal> PTS: %" PRIu32 ", Keyframe: %s", myInfo->Name.c_str(), std::get<1>(kv), makeKeyframe ? "Yes" : "No"); + LRESULT err = ICCompress(hIC, + makeKeyframe ? ICCOMPRESS_KEYFRAME : 0, + &(m_outputBitmapInfo->bmiHeader), outbuf->data(), + &(m_inputBitmapInfo->bmiHeader), inbuf->data(), + &dwFlags, &cwCompFlags, + (LONG)std::get<1>(kv), + m_useBitrateFlag ? m_bitrate : 0, + m_useQualityFlag ? m_quality : 0, + !makeKeyframe && m_useTemporalFlag ? &(m_prevInputBitmapInfo->bmiHeader) : NULL, + !makeKeyframe && m_useTemporalFlag ? m_bufferPrevInput.data() : NULL); + if (err == ICERR_OK) { + outbuf->resize(m_outputBitmapInfo->bmiHeader.biSizeImage); + //std::memcpy(outbuf->data(), m_bufferOutput.data(), outbuf->size()); + + isKeyframe = (cwCompFlags & AVIIF_KEYFRAME) != 0; + + // Swap Buffers + m_bufferPrevInput.swap(m_bufferInput); + + PLOG_DEBUG("<%s:Normal> PTS: %" PRIu32 ", Keyframe: %s, Size: %" PRIu32, + myInfo->Name.c_str(), std::get<1>(kv), isKeyframe ? "Yes" : "No", outbuf->size()); + } else { + PLOG_ERROR("Unable to encode: %s.", FormattedICCError(err).c_str()); + } + } else { + BOOL keyframe; LONG plSize = (LONG)inbuf->size(); + PLOG_DEBUG("<%s:Sequential> PTS: %" PRIu32 ", Keyframe: %s", + myInfo->Name.c_str(), std::get<1>(kv), makeKeyframe ? "Yes" : "No"); + LPVOID fptr = ICSeqCompressFrame( + &cv, + makeKeyframe ? 1 : 0, + reinterpret_cast(inbuf->data()), + &keyframe, + &plSize); + if (fptr == NULL) { + PLOG_ERROR("Unable to encode."); + } else { + outbuf->resize(plSize); + std::memcpy(outbuf->data(), fptr, outbuf->size()); + isKeyframe = keyframe != 0; + + PLOG_DEBUG("<%s:Sequential> PTS: %" PRIu32 ", Keyframe: %s, Size: %" PRIu32, + myInfo->Name.c_str(), std::get<1>(kv), isKeyframe ? "Yes" : "No", outbuf->size()); + } + } + + isKeyframe = m_forceKeyframes ? makeKeyframe || isKeyframe : isKeyframe; + auto encode_end = std::chrono::high_resolution_clock::now(); + + auto wait_start = std::chrono::high_resolution_clock::now(); + // Do not fill queue if it is > latency. + size_t queueSize = m_maxQueueSize; + while (queueSize >= m_maxQueueSize) { + { + std::unique_lock elock(m_postProcessData.lock); + queueSize = m_postProcessData.data.size(); + } + std::this_thread::sleep_for(std::chrono::milliseconds(1)); + } + auto wait_end = std::chrono::high_resolution_clock::now(); + + auto queue_start = std::chrono::high_resolution_clock::now(); + { + std::unique_lock elock(m_encodeData.lock); + std::unique_lock plock(m_postProcessData.lock); + m_postProcessData.data.push(std::make_tuple(outbuf, std::get<1>(kv), isKeyframe)); + m_postProcessData.cv.notify_all(); + m_encodeData.data.pop(); + } + auto queue_end = std::chrono::high_resolution_clock::now(); + + ul.lock(); + auto total_end = std::chrono::high_resolution_clock::now(); + + auto time_total = std::chrono::duration_cast(total_end - total_start); + auto time_encode = std::chrono::duration_cast(encode_end - encode_start); + auto time_wait = std::chrono::duration_cast(wait_end - wait_start); + auto time_queue = std::chrono::duration_cast(queue_end - queue_start); + PLOG_INFO("[Thread Encode] Frame %" PRId64 ": " + "Total: %" PRId64 "ns, " + "Encode: %" PRId64 "ns, " + "Wait: %" PRId64 "ns, " + "Queue: %" PRId64 "ns", + std::get<1>(kv), + time_total.count(), + time_encode.count(), + time_wait.count(), + time_queue.count()); +} + +void MatroxM2VBitstreamFixer(std::shared_ptr>& ptr, std::pair framerate) { + // Matrox developers are idiots. Their MPEG-2 codec flags the content + // as interlaced top-field top-displayed, but in reality there is a + // progressive frame there. But that isn't the only issue. + // They also have structures in the stream that are larger than the + // standard allows for, or even invalid user data (all 0s). It's just + // a big bunch of "How did this ever work?" ... + + // Find best match FPS + double_t sourceHertz = double_t(framerate.first) / double_t(framerate.second); + uint64_t sourceKey = uint64_t(sourceHertz * mpeg2hertz_mult); + std::pair> bestMatch; + uint64_t bestMatchDiff = UINT64_MAX; + for (auto kv : mpeg2hertz) { + uint64_t diff = uint64_t(abs(int64_t(sourceKey) - int64_t(kv.first))); + if (diff < bestMatchDiff) { + bestMatch = kv; + bestMatchDiff = diff; + } + } + #ifdef _DEBUG + PLOG_DEBUG("(MPEG-2 Rewrite) Best Match for Content: %f (Diff: %llu idx: %i, extn: %i, extd: %i)", + double_t(bestMatch.first / mpeg2hertz_mult), + bestMatchDiff, + std::get<0>(bestMatch.second), + std::get<1>(bestMatch.second), + std::get<2>(bestMatch.second)); + #endif + + std::vector* buffer = ptr.get(); + + // Rewrite stream + size_t streamPosition = 0, streamSize = buffer->size(); + while ((streamPosition < streamSize) && ((streamSize - streamPosition) >= 4)) { + uint8_t blockId = (uint8_t)(*buffer)[streamPosition + 3]; + streamPosition += 4; + + switch (blockId) { + case 0xB3: // Sequence Header + { + #ifdef _DEBUG + PLOG_DEBUG("(MPEG-2 Rewrite) Sequence Header at %" PRIu64, streamPosition); + #endif + // Rewrite Framerate + char b = (*buffer)[streamPosition + 3]; + char fpsflag = std::get<0>(bestMatch.second); + (*buffer)[streamPosition + 3] = (b & 0xF0) + (fpsflag & 0x0F); + streamPosition += 8; + break; + } + case 0xB5: + { +// streamPosition += 1; + char type = ((*buffer)[streamPosition] & 0xF0) >> 4; + switch (type) { + case 0b0001: + { // Sequence Extension (Progressive, FPS, ChromaFormat possible) + #ifdef _DEBUG + PLOG_DEBUG("(MPEG-2 Rewrite) Sequence Extension at %" PRIu64, streamPosition); + #endif + (*buffer)[streamPosition + 1] |= 1 << 3; // Flag Progressive + (*buffer)[streamPosition + 5] = // Rewrite FPS Ext + ((*buffer)[streamPosition + 5] & 0x80) + | ((std::get<1>(bestMatch.second) & 0x3) << 5) + | ((std::get<2>(bestMatch.second) & 0x1F)); + streamPosition += 6; + break; + } + case 0b0010: + { + #ifdef _DEBUG + PLOG_DEBUG("(MPEG-2 Rewrite) Sequence Display Extension at %" PRIu64, streamPosition); + #endif + if ((*buffer)[streamPosition] & 0b1) { + streamPosition += 8; + } else { + streamPosition += 5; + } + } + break; + case 0b1000: + { // Chroma Stuff? + #ifdef _DEBUG + PLOG_DEBUG("(MPEG-2 Rewrite) Picture Coding Extension at %" PRIu64, streamPosition); + #endif + // Picture Structure + (*buffer)[streamPosition + 2] |= 0x3; // Full Frame + (*buffer)[streamPosition + 3] &= ~(1 << 7); // top field first + (*buffer)[streamPosition + 3] &= ~(1 << 1); // repeat first field + (*buffer)[streamPosition + 4] |= 1 << 7; // progressive + if ((*buffer)[streamPosition + 4] & 0b1000000) { + streamPosition += 7; + } else { + streamPosition += 5; + } + break; + } + #ifdef _DEBUG + default: + PLOG_DEBUG("(MPEG-2 Rewrite) Unknown Extension %" PRIx8 " at %" PRIu64, type, streamPosition); + break; + #endif + } + + break; + } + } + + // Seek to a valid position. + while ((streamPosition < buffer->size()) && ((buffer->size() - streamPosition) >= 4) + && ( + ((*buffer)[streamPosition] != 0) + || ((*buffer)[streamPosition + 1] != 0) + || ((*buffer)[streamPosition + 2] != 1) + )) { + ++streamPosition; + } + } +} + +void VFW::Encoder::postProcessLocal(std::unique_lock& ul) { + auto total_start = std::chrono::high_resolution_clock::now(); + + auto kv = m_postProcessData.data.front(); + ul.unlock(); + + auto bitstream_start = std::chrono::high_resolution_clock::now(); + if ((myInfo->Id == "mvcVfwMpeg2-mmes") + || (myInfo->Id == "mvcVfwMpeg2Alpha-m704") + || (myInfo->Id == "mvcVfwMpeg2HD-m701") + || (myInfo->Id == "mvcVfwMpeg2Alpha-m705")) { + MatroxM2VBitstreamFixer(std::get<0>(kv), std::make_pair(m_fpsNum, m_fpsDen)); + } + auto bitstream_end = std::chrono::high_resolution_clock::now(); + + auto wait_start = std::chrono::high_resolution_clock::now(); + // Do not fill queue if it is > latency. + size_t queueSize = m_maxQueueSize; + while (queueSize >= m_maxQueueSize) { + { + std::unique_lock flock(m_finalPacketsLock); + queueSize = m_finalPackets.size(); + } + std::this_thread::sleep_for(std::chrono::milliseconds(1)); + } + auto wait_end = std::chrono::high_resolution_clock::now(); + + auto queue_start = std::chrono::high_resolution_clock::now(); + { + std::unique_lock plock(m_postProcessData.lock); + std::unique_lock flock(m_finalPacketsLock); + m_finalPackets.push(kv); + m_postProcessData.data.pop(); + } + auto queue_end = std::chrono::high_resolution_clock::now(); + + ul.lock(); + auto total_end = std::chrono::high_resolution_clock::now(); + + auto time_total = std::chrono::duration_cast(total_end - total_start); + auto time_bitstream = std::chrono::duration_cast(bitstream_end - bitstream_start); + auto time_wait = std::chrono::duration_cast(wait_end - wait_start); + auto time_queue = std::chrono::duration_cast(queue_end - queue_start); + PLOG_INFO("[Thread PostPr] Frame %" PRId64 ": " + "Total: %" PRId64 "ns, " + "Bitstream: %" PRId64 "ns, " + "Wait: %" PRId64 "ns, " + "Queue: %" PRId64 "ns", + std::get<1>(kv), + time_total.count(), + time_bitstream.count(), + time_wait.count(), + time_queue.count()); }