ScreenCapture: Single Header DirectX Library with HDR ability





5.00/5 (18 votes)
DirectX hardware screen capture and encoding with audio mixing capabilities. H264/H265/VP80/VP90/FLAC/MP3. HDR supported.
Introduction
Lots of stuff out there about it. Here is a simple, single header file, hardware accelerated. If using Windows 8 or later, you can easily include it in your projects.
Requirements
- Windows 8 or later
Video Capture
We need to enumerate our adapters and the number of the monitors, with the aid of DXGI:
static void GetAdapters(std::vector<CComPtr<IDXGIAdapter1>>& a)
{
CComPtr<IDXGIFactory1> df;
CreateDXGIFactory1(__uuidof(IDXGIFactory1),(void**)&df);
a.clear();
if (!df)
return;
int L = 0;
for (;;)
{
CComPtr<IDXGIAdapter1> lDxgiAdapter;
df->EnumAdapters1(L, &lDxgiAdapter);
if (!lDxgiAdapter)
break;
L++;
a.push_back(lDxgiAdapter);
}
}
Then, we would instantiate a DirectX 11 device with one of them, or the default:
HRESULT CreateDirect3DDevice(IDXGIAdapter1* g)
{
HRESULT hr = S_OK;
// Driver types supported
D3D_DRIVER_TYPE DriverTypes[] =
{
D3D_DRIVER_TYPE_HARDWARE,
D3D_DRIVER_TYPE_WARP,
D3D_DRIVER_TYPE_REFERENCE,
};
UINT NumDriverTypes = ARRAYSIZE(DriverTypes);
// Feature levels supported
D3D_FEATURE_LEVEL FeatureLevels[] =
{
D3D_FEATURE_LEVEL_11_0,
D3D_FEATURE_LEVEL_10_1,
D3D_FEATURE_LEVEL_10_0,
D3D_FEATURE_LEVEL_9_3,
D3D_FEATURE_LEVEL_9_2,
D3D_FEATURE_LEVEL_9_1
};
UINT NumFeatureLevels = ARRAYSIZE(FeatureLevels);
D3D_FEATURE_LEVEL FeatureLevel;
// Create device
for (UINT DriverTypeIndex = 0; DriverTypeIndex < NumDriverTypes; ++DriverTypeIndex)
{
hr = D3D11CreateDevice(g, DriverTypes[DriverTypeIndex],
nullptr, D3D11_CREATE_DEVICE_VIDEO_SUPPORT, FeatureLevels, NumFeatureLevels,
D3D11_SDK_VERSION, &device, &FeatureLevel, &context);
if (SUCCEEDED(hr))
{
// Device creation success, no need to loop anymore
break;
}
}
if (FAILED(hr))
return hr;
return S_OK;
}
We want to create the Desktop Duplication of the output then:
bool Prepare(UINT Output = 0)
{
// Get DXGI device
CComPtr<IDXGIDevice> lDxgiDevice;
lDxgiDevice = device;
if (!lDxgiDevice)
return 0;
// Get DXGI adapter
CComPtr<IDXGIAdapter> lDxgiAdapter;
auto hr = lDxgiDevice->GetParent(
__uuidof(IDXGIAdapter),
reinterpret_cast<void**>(&lDxgiAdapter));
if (FAILED(hr))
return 0;
lDxgiDevice = 0;
// Get output
CComPtr<IDXGIOutput> lDxgiOutput;
hr = lDxgiAdapter->EnumOutputs(Output, &lDxgiOutput);
if (FAILED(hr))
return 0;
lDxgiAdapter = 0;
DXGI_OUTPUT_DESC lOutputDesc;
hr = lDxgiOutput->GetDesc(&lOutputDesc);
// QI for Output 1
CComPtr<IDXGIOutput1> lDxgiOutput1;
lDxgiOutput1 = lDxgiOutput;
if (!lDxgiOutput1)
return 0;
lDxgiOutput = 0;
// Create desktop duplication
hr = lDxgiOutput1->DuplicateOutput(
device,
&lDeskDupl);
if (FAILED(hr))
return 0;
lDxgiOutput1 = 0;
// Create GUI drawing texture
lDeskDupl->GetDesc(&lOutputDuplDesc);
D3D11_TEXTURE2D_DESC desc = {};
desc.Width = lOutputDuplDesc.ModeDesc.Width;
desc.Height = lOutputDuplDesc.ModeDesc.Height;
desc.Format = lOutputDuplDesc.ModeDesc.Format;
desc.ArraySize = 1;
desc.BindFlags = D3D11_BIND_FLAG::D3D11_BIND_RENDER_TARGET;
desc.MiscFlags = D3D11_RESOURCE_MISC_GDI_COMPATIBLE;
desc.SampleDesc.Count = 1;
desc.SampleDesc.Quality = 0;
desc.MipLevels = 1;
desc.CPUAccessFlags = 0;
desc.Usage = D3D11_USAGE_DEFAULT;
hr = device->CreateTexture2D(&desc, NULL, &lGDIImage);
if (FAILED(hr))
return 0;
if (lGDIImage == nullptr)
return 0;
// Create CPU access texture
desc.Width = lOutputDuplDesc.ModeDesc.Width;
desc.Height = lOutputDuplDesc.ModeDesc.Height;
desc.Format = lOutputDuplDesc.ModeDesc.Format;
desc.ArraySize = 1;
desc.BindFlags = 0;
desc.MiscFlags = 0;
desc.SampleDesc.Count = 1;
desc.SampleDesc.Quality = 0;
desc.MipLevels = 1;
desc.CPUAccessFlags = D3D11_CPU_ACCESS_READ | D3D11_CPU_ACCESS_WRITE;
desc.Usage = D3D11_USAGE_STAGING;
hr = device->CreateTexture2D(&desc, NULL, &lDestImage);
if (FAILED(hr))
return 0;
if (lDestImage == nullptr)
return 0;
return 1;
}
To get the screenshot, we loop:
hr = cap.lDeskDupl->AcquireNextFrame(
0,
&lFrameInfo,
&lDesktopResource);
if (hr == DXGI_ERROR_WAIT_TIMEOUT)
hr = S_OK;
if (FAILED(hr))
break;
if (lDesktopResource && !cap.Get(lDesktopResource, dp.Cursor,
dp.rx.right && dp.rx.bottom ? &dp.rx : 0))
break;
The method get()
will return us the bitmap, cursor optionally included and cropped:
bool Get(IDXGIResource* lDesktopResource,bool Curs,RECT* rcx = 0)
{
// QI for ID3D11Texture2D
CComPtr<ID3D11Texture2D> lAcquiredDesktopImage;
if (!lDesktopResource)
return 0;
auto hr = lDesktopResource->QueryInterface(IID_PPV_ARGS(&lAcquiredDesktopImage));
if (!lAcquiredDesktopImage)
return 0;
lDesktopResource = 0;
// Copy image into GDI drawing texture
context->CopyResource(lGDIImage, lAcquiredDesktopImage);
// Draw cursor image into GDI drawing texture
CComPtr<IDXGISurface1> lIDXGISurface1;
lIDXGISurface1 = lGDIImage;
if (!lIDXGISurface1)
return 0;
CURSORINFO lCursorInfo = { 0 };
lCursorInfo.cbSize = sizeof(lCursorInfo);
auto lBoolres = GetCursorInfo(&lCursorInfo);
if (lBoolres == TRUE)
{
if (lCursorInfo.flags == CURSOR_SHOWING && Curs)
{
auto lCursorPosition = lCursorInfo.ptScreenPos;
// auto lCursorSize = lCursorInfo.cbSize;
HDC lHDC;
lIDXGISurface1->GetDC(FALSE, &lHDC);
DrawIconEx(
lHDC,
lCursorPosition.x,
lCursorPosition.y,
lCursorInfo.hCursor,
0,
0,
0,
0,
DI_NORMAL | DI_DEFAULTSIZE);
lIDXGISurface1->ReleaseDC(nullptr);
}
}
// Copy image into CPU access texture
context->CopyResource(lDestImage, lGDIImage);
// Copy from CPU access texture to bitmap buffer
D3D11_MAPPED_SUBRESOURCE resource;
UINT subresource = D3D11CalcSubresource(0, 0, 0);
hr = context->Map(lDestImage, subresource, D3D11_MAP_READ_WRITE, 0, &resource);
if (FAILED(hr))
return 0;
auto sz = lOutputDuplDesc.ModeDesc.Width
* lOutputDuplDesc.ModeDesc.Height * 4;
auto sz2 = sz;
buf.resize(sz);
if (rcx)
{
sz2 = (rcx->right - rcx->left) * (rcx->bottom - rcx->top) * 4;
buf.resize(sz2);
sz = sz2;
}
UINT lBmpRowPitch = lOutputDuplDesc.ModeDesc.Width * 4;
if (rcx)
lBmpRowPitch = (rcx->right - rcx->left) * 4;
UINT lRowPitch = std::min<UINT>(lBmpRowPitch, resource.RowPitch);
BYTE* sptr = reinterpret_cast<BYTE*>(resource.pData);
BYTE* dptr = buf.data() + sz - lBmpRowPitch;
if (rcx)
sptr += rcx->left * 4;
for (size_t h = 0; h < lOutputDuplDesc.ModeDesc.Height; ++h)
{
if (rcx && h < (size_t)rcx->top)
{
sptr += resource.RowPitch;
continue;
}
if (rcx && h >= (size_t)rcx->bottom)
break;
memcpy_s(dptr, lBmpRowPitch, sptr, lRowPitch);
sptr += resource.RowPitch;
dptr -= lBmpRowPitch;
}
context->Unmap(lDestImage, subresource);
return 1;
}
After that, you can feed the "buf
" data into media foundation's sink writer.
Audio Capture
You will use the IAudioClient to get an IAudioCaptureClient
to record audio in a separated thread.
void ThreadLoopCapture()
{
UINT64 up, uq;
while (Capturing)
{
if (hEv)
WaitForSingleObject(hEv, INFINITE);
if (!Capturing)
break;
auto hr = cap->GetBuffer(&pData, &framesAvailable, &flags, &up, &uq);
if (FAILED(hr))
break;
if (framesAvailable == 0)
continue;
auto ThisAudioBytes = framesAvailable * wfx.Format.nChannels *
wfx.Format.wBitsPerSample/8 ;
AudioDataX->PushX((const char*)pData, ThisAudioBytes);
cap->ReleaseBuffer(framesAvailable);
}
CapturingFin1 = true;
}
If the recording device is a playback device through loopback, you have to ensure that something is played, otherwise the Core Audio API records nothing. So we have to play silence:
void PlaySilence(REFERENCE_TIME rt)
{
// ns
rt /= 10000;
// in SR , 1000 ms
// ? , rt ms
auto ns = (wfx.Format.nSamplesPerSec * rt);
ns /= 1000;
while (Capturing)
{
if (!ren)
break;
Sleep((DWORD)(rt / 2));
if (!Capturing)
break;
// See how much buffer space is available.
UINT32 numFramesPadding = 0;
auto hr = ac2->GetCurrentPadding(&numFramesPadding);
if (FAILED(hr))
break;
auto numFramesAvailable = ns - numFramesPadding;
if (!numFramesAvailable)
continue;
BYTE* db = 0;
hr = ren->GetBuffer((UINT32)numFramesAvailable, &db);
if (FAILED(hr))
break;
auto bs = numFramesAvailable * wfx.Format.nChannels * wfx.Format.wBitsPerSample / 8;
memset(db, 0,(size_t) bs);
ren->ReleaseBuffer((UINT32)numFramesAvailable, 0); //AUDCLNT_BUFFERFLAGS_SILENT
}
CapturingFin2 = true;
}
When there are many audio streams, you have to mix them in a single buffer. This is done using my own REBUFFER
and MIXBUFFER
s:
struct REBUFFER
{
std::recursive_mutex m;
std::vector<char> d;
AHANDLE Has = CreateEvent(0, TRUE, 0, 0);
MIXBUFFER<float> mb;
void FinMix(size_t sz, float* A = 0)
{
mb.Fin(sz / sizeof(float), A);
}
size_t PushX(const char* dd, size_t sz, float* A = 0, float V = 1.0f)
{
REBUFFERLOCK l(m);
auto s = d.size();
d.resize(s + sz);
if (dd)
memcpy(d.data() + s, dd, sz);
else
memset(d.data() + s, 0, sz);
char* a1 = d.data();
a1 += s;
mb.Set((float*)a1);
mb.count = 1;
SetEvent(Has);
float* b = (float*)(d.data() + s);
if (V > 1.01f || V < 0.99f)
{
auto st = sz / sizeof(float);
for (size_t i = 0; i < st; i++)
b[i] *= V;
}
if (A)
{
*A = Peak<float>(b, sz / sizeof(float));
}
return s + sz;
}
size_t Av()
{
REBUFFERLOCK l(m);
return d.size();
}
size_t PopX(char* trg, size_t sz, DWORD wi = 0, bool NR = false)
{
if (wi)
WaitForSingleObject(Has, wi);
REBUFFERLOCK l(m);
if (sz >= d.size())
sz = d.size();
if (sz == 0)
return 0;
if (trg)
memcpy(trg, d.data(), sz);
if (NR == false)
d.erase(d.begin(), d.begin() + sz);
if (d.size() == 0)
ResetEvent(Has);
return sz;
}
void Clear()
{
REBUFFERLOCK l(m);
d.clear();
}
};
If you have audio, video is synchronized to it.
HDR Support
When your display is HDR, the following happens:
lDeskDupl
->GetDesc(&lOutputDuplDesc);
returns a description with a format ofDXGI_FORMAT_R16G16B16A16_FLOAT
which is not GDI compatible.- The cursor can't be drawn, hence the
Cursor
parameter is ignored. - Media Foundation can't create a HDR video. Therefore, you have to install Turbo Play and use my own Media Foundation library which can use NVidia Encoder to create true HDR-10 videos. If you install Turbo Play, in the installation directory, there's a file nvh64.dll. Run
regsvr32
with it as administrator and my filter will be registered for you to use with the Screen Capture.
Using the Library
#include "stdafx.h"
#include "capture.hpp"
#include <iostream>
int wmain()
{
CoInitializeEx(0, COINIT_APARTMENTTHREADED);
MFStartup(MF_VERSION);
std::cout << "Capturing screen for 10 seconds...";
DESKTOPCAPTUREPARAMS dp;
dp.f = L"capture.mp4";
dp.EndMS = 10000;
DesktopCapture(dp);
std::cout << "Done.\r\n";
return 0;
}
Where the DESKTOPCAPTUREPARAMS
is defined like that:
struct DESKTOPCAPTUREPARAMS
{
bool HasVideo = 1;
bool HasAudio = 1;
std::vector<std::tuple<std::wstring, std::vector<int>>> AudioFrom;
GUID VIDEO_ENCODING_FORMAT = MFVideoFormat_H264;
GUID AUDIO_ENCODING_FORMAT = MFAudioFormat_MP3;
std::wstring f;
void* cb = 0;
std::function<HRESULT(const BYTE* d, size_t sz,void* cb)> Streamer;
std::function<HRESULT(const BYTE* d, size_t sz,void* cb)> Framer;
std::function<void(IMFAttributes* a)> PrepareAttributes;
int fps = 25;
int NumThreads = 0;
int Qu = -1;
int vbrm = 0;
int vbrq = 0;
int BR = 4000;
int NCH = 2;
int SR = 44100;
int ABR = 192;
bool Cursor = true;
RECT rx = { 0,0,0,0 };
HWND hWnd = 0;
IDXGIAdapter1* ad = 0;
UINT nOutput = 0;
unsigned long long StartMS = 0; // 0, none
unsigned long long EndMS = 0; // 0, none
bool MustEnd = false;
bool Pause = false;
};
Where:
HasVideo = 1
-> You are capturing video. If this is set, the output file must be an MP4 or an ASF regardless of whether you have audio or not.HasAudio = 1
-> You are capturing audio. If this is set and you do not have a video, the output file must be an MP3 or FLAC.AudioFrom
= a vector of which audio devices you want to capture. Each element is a tuple of the device unique ID (as returned by the enumeration, seeVISTAMIXERS::EnumVistaMixers()
) and a vector of the channels you want to record from.
The library can also record from a playback device (like your speakers) in loopback. You can specify multiple sources of recording and the library will mix them all into the final audio stream.
VIDEO_ENCODING_FORMAT
-> One ofMFVideoFormat_H264
,MFVideoFormat_HEVC
,MFVideoFormat_VP90
,MFVideoFormat_VP80
. Use HEVC for HDR.AUDIO_ENCODING_FORMAT
-> One ofMFAudioFormat_MP3
orMFAudioFormat_FLAC
orMFAudioFormat_AAC
. MP3 and AAC support only 44100/48000 2 channel output.f
-> target file name (MP3/FLAC for audio only, MP4/ASF else)fps
-> Frames per secondNumThreads
-> Threads for the video encoder, 0 default. Can be 0-16.Qu
->If >= 0
and<= 0
, Quality Vs Speed video factorvbrm
andvbrq
-> If2
, thenvbrq
is a quality value between0
and100
(BR is ignored)BR
-> Video bitrate in KBps, default 4000. Ifvbrm
is2
,BR
is ignoredNCH
-> Audio output channelsSR
-> Audio output sample rateABR
-> Audio bitrate in Kbps for MP3Cursor
-> true to capture the cursor. Ignored if HDR.rx
-> If not {0}, capture this specific rect onlyhWnd
-> If not {0}, capture thisHWND
only. IfHWND
is0
andrx = {0}
, the entire screen is capturedad
-> If not0
, specifies which adapter you want to capture if you have more than 1 adapternOutput
-> The index of the monitor to capture.0
is the first monitor. For multiple monitors, this specifies the monitor.EndMS
-> If not0
, the library stops whenEndMs
milliseconds have been captured. Else, you have to stop the library by setting "MustEnd
" totrue
.MustEnd
-> Set totrue
for the library to stop capturingPause
-> Iftrue
, capture is paused
If you want to capture to a buffer, you must leave the "f
" parameter empty and use the Streamer
parameter. This calls your callback as long as you return S_OK
. If you use an ASF container, then you need not do anything. If you want to use an MP4 stream, then you must prepare the streaming sample description (see this post). You can use this to stream your desktop over HTTP.
Capturing Frames
Instead of capturing compressed video, you may use the 'Framer' callback. This returns a raw RGBA upside down array of your requested resolution as long as you return S_FALSE
. (or a half-float 64-bit array if HDR). Once you return S_OK
, then the function returns.
History
- 20th March, 2024: Added HDR support
- 2nd April, 2021: Capturing to stream, capturing frames
- 18th January, 2020: First release