Click here to Skip to main content
14,454,989 members

ScreenCapture: Single Header DirectX Library

Rate this:
5.00 (8 votes)
Please Sign up or sign in to vote.
5.00 (8 votes)
18 Jan 2020CPOL
DirectX hardware screen capture and encoding with audio mixing capabilities. H264/H265/VP80/VP90/FLAC/MP3

Introduction

Lots of stuff out there about it. Here is a simple, single header file, hardware accelerated. If using Windows 8 or later, you can easy include it in your projects. It's part of my Turbo Play.

Requirements

  • Windows 8 or later

Video Capture

We need to enumerate our adapters and the number of the monitors, with the aid of DXGI:

static void GetAdapters(std::vector<CComPtr<IDXGIAdapter1>>& a)
{
    CComPtr<IDXGIFactory1> df;
    CreateDXGIFactory1(__uuidof(IDXGIFactory1),(void**)&df);
    a.clear();
    if (!df)
        return;
    int L = 0;
    for (;;)
    {
        CComPtr<IDXGIAdapter1> lDxgiAdapter;
        df->EnumAdapters1(L, &lDxgiAdapter);
        if (!lDxgiAdapter)
            break;
        L++;
        a.push_back(lDxgiAdapter);
    }
}

Then, we would instantiate a DirectX 11 device with one of them, or the default:

HRESULT CreateDirect3DDevice(IDXGIAdapter1* g)
{
    HRESULT hr = S_OK;

    // Driver types supported
    D3D_DRIVER_TYPE DriverTypes[] =
    {
        D3D_DRIVER_TYPE_HARDWARE,
        D3D_DRIVER_TYPE_WARP,
        D3D_DRIVER_TYPE_REFERENCE,
    };
    UINT NumDriverTypes = ARRAYSIZE(DriverTypes);

    // Feature levels supported
    D3D_FEATURE_LEVEL FeatureLevels[] =
    {
        D3D_FEATURE_LEVEL_11_0,
        D3D_FEATURE_LEVEL_10_1,
        D3D_FEATURE_LEVEL_10_0,
        D3D_FEATURE_LEVEL_9_3,
        D3D_FEATURE_LEVEL_9_2,
        D3D_FEATURE_LEVEL_9_1
    };
    UINT NumFeatureLevels = ARRAYSIZE(FeatureLevels);

    D3D_FEATURE_LEVEL FeatureLevel;

    // Create device
    for (UINT DriverTypeIndex = 0; DriverTypeIndex < NumDriverTypes; ++DriverTypeIndex)
    {
        hr = D3D11CreateDevice(g, DriverTypes[DriverTypeIndex], 
             nullptr, D3D11_CREATE_DEVICE_VIDEO_SUPPORT, FeatureLevels, NumFeatureLevels,
             D3D11_SDK_VERSION, &device, &FeatureLevel, &context);
        if (SUCCEEDED(hr))
        {
            // Device creation success, no need to loop anymore
            break;
        }
    }
    if (FAILED(hr))
        return hr;

    return S_OK;
}

We want to create the Desktop Duplication of the output then:

bool Prepare(UINT Output = 0)
{
// Get DXGI device
    CComPtr<IDXGIDevice> lDxgiDevice;
    lDxgiDevice = device;
    if (!lDxgiDevice)
        return 0;

    // Get DXGI adapter
    CComPtr<IDXGIAdapter> lDxgiAdapter;
    auto hr = lDxgiDevice->GetParent(
        __uuidof(IDXGIAdapter),
        reinterpret_cast<void**>(&lDxgiAdapter));

    if (FAILED(hr))
        return 0;

    lDxgiDevice = 0;

    // Get output
    CComPtr<IDXGIOutput> lDxgiOutput;
    hr = lDxgiAdapter->EnumOutputs(Output, &lDxgiOutput);
    if (FAILED(hr))
        return 0;

    lDxgiAdapter = 0;

    DXGI_OUTPUT_DESC lOutputDesc;
    hr = lDxgiOutput->GetDesc(&lOutputDesc);

    // QI for Output 1
    CComPtr<IDXGIOutput1> lDxgiOutput1;
    lDxgiOutput1 = lDxgiOutput;
    if (!lDxgiOutput1)
        return 0;

    lDxgiOutput = 0;

    // Create desktop duplication
    hr = lDxgiOutput1->DuplicateOutput(
        device,
        &lDeskDupl);

    if (FAILED(hr))
        return 0;

    lDxgiOutput1 = 0;

    // Create GUI drawing texture
    lDeskDupl->GetDesc(&lOutputDuplDesc);
    D3D11_TEXTURE2D_DESC desc = {};
    desc.Width = lOutputDuplDesc.ModeDesc.Width;
    desc.Height = lOutputDuplDesc.ModeDesc.Height;
    desc.Format = lOutputDuplDesc.ModeDesc.Format;
    desc.ArraySize = 1;
    desc.BindFlags = D3D11_BIND_FLAG::D3D11_BIND_RENDER_TARGET;
    desc.MiscFlags = D3D11_RESOURCE_MISC_GDI_COMPATIBLE;
    desc.SampleDesc.Count = 1;
    desc.SampleDesc.Quality = 0;
    desc.MipLevels = 1;
    desc.CPUAccessFlags = 0;
    desc.Usage = D3D11_USAGE_DEFAULT;
    hr = device->CreateTexture2D(&desc, NULL, &lGDIImage);
    if (FAILED(hr))
        return 0;

    if (lGDIImage == nullptr)
        return 0;

    // Create CPU access texture
    desc.Width = lOutputDuplDesc.ModeDesc.Width;
    desc.Height = lOutputDuplDesc.ModeDesc.Height;
    desc.Format = lOutputDuplDesc.ModeDesc.Format;
    desc.ArraySize = 1;
    desc.BindFlags = 0;
    desc.MiscFlags = 0;
    desc.SampleDesc.Count = 1;
    desc.SampleDesc.Quality = 0;
    desc.MipLevels = 1;
    desc.CPUAccessFlags = D3D11_CPU_ACCESS_READ | D3D11_CPU_ACCESS_WRITE;
    desc.Usage = D3D11_USAGE_STAGING;
    hr = device->CreateTexture2D(&desc, NULL, &lDestImage);
    if (FAILED(hr))
        return 0;

    if (lDestImage == nullptr)
        return 0;

    return 1;
}

To get the screenshot, we loop:

hr = cap.lDeskDupl->AcquireNextFrame(
    0,
    &lFrameInfo,
    &lDesktopResource);
if (hr == DXGI_ERROR_WAIT_TIMEOUT)
    hr = S_OK;
if (FAILED(hr))
    break;
if (lDesktopResource && !cap.Get(lDesktopResource, dp.Cursor, 
                                 dp.rx.right && dp.rx.bottom ? &dp.rx : 0))
    break;

The method get() will return us the bitmap, cursor optionally included and cropped:

bool Get(IDXGIResource* lDesktopResource,bool Curs,RECT* rcx = 0)
{
    // QI for ID3D11Texture2D
    CComPtr<ID3D11Texture2D> lAcquiredDesktopImage;
    if (!lDesktopResource)
        return 0;
    auto hr = lDesktopResource->QueryInterface(IID_PPV_ARGS(&lAcquiredDesktopImage));
    if (!lAcquiredDesktopImage)
        return 0;
    lDesktopResource = 0;

    // Copy image into GDI drawing texture
    context->CopyResource(lGDIImage, lAcquiredDesktopImage);

    // Draw cursor image into GDI drawing texture
    CComPtr<IDXGISurface1> lIDXGISurface1;

    lIDXGISurface1 = lGDIImage;

    if (!lIDXGISurface1)
        return 0;

    CURSORINFO lCursorInfo = { 0 };
    lCursorInfo.cbSize = sizeof(lCursorInfo);
    auto lBoolres = GetCursorInfo(&lCursorInfo);
    if (lBoolres == TRUE)
    {
        if (lCursorInfo.flags == CURSOR_SHOWING && Curs)
        {
            auto lCursorPosition = lCursorInfo.ptScreenPos;
//                auto lCursorSize = lCursorInfo.cbSize;
            HDC  lHDC;
            lIDXGISurface1->GetDC(FALSE, &lHDC);
            DrawIconEx(
                lHDC,
                lCursorPosition.x,
                lCursorPosition.y,
                lCursorInfo.hCursor,
                0,
                0,
                0,
                0,
                DI_NORMAL | DI_DEFAULTSIZE);
            lIDXGISurface1->ReleaseDC(nullptr);
        }
    }

    // Copy image into CPU access texture
    context->CopyResource(lDestImage, lGDIImage);

    // Copy from CPU access texture to bitmap buffer
    D3D11_MAPPED_SUBRESOURCE resource;
    UINT subresource = D3D11CalcSubresource(0, 0, 0);
    hr = context->Map(lDestImage, subresource, D3D11_MAP_READ_WRITE, 0, &resource);
    if (FAILED(hr))
        return 0;

    auto sz = lOutputDuplDesc.ModeDesc.Width
        * lOutputDuplDesc.ModeDesc.Height * 4;
    auto sz2 = sz;
    buf.resize(sz);
    if (rcx)
    {
        sz2 = (rcx->right - rcx->left) * (rcx->bottom - rcx->top) * 4;
        buf.resize(sz2);
        sz = sz2;
    }

    UINT lBmpRowPitch = lOutputDuplDesc.ModeDesc.Width * 4;
    if (rcx)
        lBmpRowPitch = (rcx->right - rcx->left) * 4;
    UINT lRowPitch = std::min<UINT>(lBmpRowPitch, resource.RowPitch);

    BYTE* sptr = reinterpret_cast<BYTE*>(resource.pData);
    BYTE* dptr = buf.data() + sz - lBmpRowPitch;
    if (rcx)
        sptr += rcx->left * 4;
    for (size_t h = 0; h < lOutputDuplDesc.ModeDesc.Height; ++h)
    {
        if (rcx && h < (size_t)rcx->top)
        {
            sptr += resource.RowPitch;
            continue;
        }
        if (rcx && h >= (size_t)rcx->bottom)
            break;
        memcpy_s(dptr, lBmpRowPitch, sptr, lRowPitch);
        sptr += resource.RowPitch;
        dptr -= lBmpRowPitch;
    }
    context->Unmap(lDestImage, subresource);
    return 1;
}

After that, you can feed the "buf" data into media foundation's sink writer.

Audio Capture

You will use the IAudioClient to get an IAudioCaptureClient to record audio in a separated thread.

void ThreadLoopCapture()
{
    UINT64 up, uq;
    while (Capturing)
    {
        if (hEv)
            WaitForSingleObject(hEv, INFINITE);

        if (!Capturing)
            break;
        auto hr = cap->GetBuffer(&pData, &framesAvailable, &flags, &up, &uq);
        if (FAILED(hr))
            break;
        if (framesAvailable == 0)
            continue;

        auto ThisAudioBytes = framesAvailable * wfx.Format.nChannels * 
                                                wfx.Format.wBitsPerSample/8 ;

        AudioDataX->PushX((const char*)pData, ThisAudioBytes);
        cap->ReleaseBuffer(framesAvailable);
    }
    CapturingFin1 = true;
}

If the recording device is a playback device through loopback, you have to ensure that something is played, otherwise the Core Audio API records nothing. So we have to play silence:

void PlaySilence(REFERENCE_TIME rt)
{
    // ns
    rt /= 10000;
    // in SR , 1000 ms
    //  ?    , rt ms
    auto ns = (wfx.Format.nSamplesPerSec * rt);
    ns /= 1000;
    while (Capturing)
    {
        if (!ren)
            break;

        Sleep((DWORD)(rt / 2));

        if (!Capturing)
            break;

        // See how much buffer space is available.
        UINT32 numFramesPadding = 0;
        auto hr = ac2->GetCurrentPadding(&numFramesPadding);
        if (FAILED(hr))
            break;

        auto numFramesAvailable = ns - numFramesPadding;
        if (!numFramesAvailable)
            continue;

        BYTE* db = 0;
        hr = ren->GetBuffer((UINT32)numFramesAvailable, &db);
        if (FAILED(hr))
            break;
        auto bs = numFramesAvailable * wfx.Format.nChannels * wfx.Format.wBitsPerSample / 8;
        memset(db, 0,(size_t) bs);
        ren->ReleaseBuffer((UINT32)numFramesAvailable, 0); //AUDCLNT_BUFFERFLAGS_SILENT
    }
    CapturingFin2 = true;
}

When there are many audio streams, you have to mix them in a single buffer. This is done using my own REBUFFER and MIXBUFFERs:

struct REBUFFER
{
    std::recursive_mutex m;
    std::vector<char> d;
    AHANDLE Has = CreateEvent(0, TRUE, 0, 0);
    MIXBUFFER<float> mb;

    void FinMix(size_t sz, float* A = 0)
    {
        mb.Fin(sz / sizeof(float), A);
    }

    size_t PushX(const char* dd, size_t sz, float* A = 0, float V = 1.0f)
    {
        REBUFFERLOCK l(m);
        auto s = d.size();
        d.resize(s + sz);
        if (dd)
            memcpy(d.data() + s, dd, sz);
        else
            memset(d.data() + s, 0, sz);

        char* a1 = d.data();
        a1 += s;
        mb.Set((float*)a1);
        mb.count = 1;

        SetEvent(Has);

        float* b = (float*)(d.data() + s);
        if (V > 1.01f || V < 0.99f)
        {
            auto st = sz / sizeof(float);
            for (size_t i = 0; i < st; i++)
                b[i] *= V;
        }
        if (A)
        {
            *A = Peak<float>(b, sz / sizeof(float));
        }

        return s + sz;
    }

    size_t Av()
    {
        REBUFFERLOCK l(m);
        return d.size();
    }

    size_t PopX(char* trg, size_t sz, DWORD wi = 0, bool NR = false)
    {
        if (wi)
            WaitForSingleObject(Has, wi);
        REBUFFERLOCK l(m);
        if (sz >= d.size())
            sz = d.size();
        if (sz == 0)
            return 0;
        if (trg)
            memcpy(trg, d.data(), sz);
        if (NR == false)
            d.erase(d.begin(), d.begin() + sz);
        if (d.size() == 0)
            ResetEvent(Has);
        return sz;
    }

    void Clear()
    {
        REBUFFERLOCK l(m);
        d.clear();
    }
};

If you have audio, video is synchronized to it.

Using the Library

#include "stdafx.h"
#include "capture.hpp"
#include <iostream>

int wmain()
{
    CoInitializeEx(0, COINIT_APARTMENTTHREADED);
    MFStartup(MF_VERSION);
    std::cout << "Capturing screen for 10 seconds...";
    DESKTOPCAPTUREPARAMS dp;
    dp.f = L"capture.mp4";
    dp.EndMS = 10000;
    DesktopCapture(dp);
    std::cout << "Done.\r\n";
    return 0;
}

Where the DESKTOPCAPTUREPARAMS is defined like that:

struct DESKTOPCAPTUREPARAMS
{
    bool HasVideo = 1;
    bool HasAudio = 1;
    std::vector<std::tuple<std::wstring, std::vector<int>>> AudioFrom;
    GUID VIDEO_ENCODING_FORMAT = MFVideoFormat_H264;
    GUID AUDIO_ENCODING_FORMAT = MFAudioFormat_MP3;
    std::wstring f;
    int fps = 25;
    int NumThreads = 0;
    int Qu = -1;
    int vbrm = 0;
    int vbrq = 0;
    int BR = 4000;
    int NCH = 2;
    int SR = 44100;
    int ABR = 192;
    bool Cursor = true;
    RECT rx = { 0,0,0,0 };
    HWND hWnd = 0;
    IDXGIAdapter1* ad = 0;
    UINT nOutput = 0;

    unsigned long long StartMS = 0; // 0, none
    unsigned long long EndMS = 0; // 0, none
    bool MustEnd = false;
    bool Pause = false;
};

Where:

  • HasVideo = 1 -> You are capturing video. If this is set, the output file must be an MP4 or an ASF regardless of whether you have audio or not.
  • HasAudio = 1 -> You are capturing audio. If this is set and you do not have a video, the output file must be an MP3 or FLAC.
  • AudioFrom = a vector of which audio devices you want to capture. Each element is a tuple of the device unique ID (as returned by the enumeration, see VISTAMIXERS::EnumVistaMixers()) and a vector of the channels you want to record from.

The library can also record from a playback device (like your speakers) in loopback. You can specify multiple sources of recording and the library will mix them all into the final audio stream.

  • VIDEO_ENCODING_FORMAT -> One of MFVideoFormat_H264, MFVideoFormat_HEVC, MFVideoFormat_VP90, MFVideoFormat_VP80.
  • AUDIO_ENCODING_FORMAT -> One of MFAudioFormat_MP3 or MFAudioFormat_FLAC or MFAudioFormat_AAC . MP3 and AAC support only 44100/48000 2 channel output.
  • f -> target file name (MP3/FLAC for audio only, MP4/ASF else)
  • fps -> Frames per second
  • NumThreads -> Threads for the video encoder, 0 default. Can be 0-16.
  • Qu -> If >= 0 and <= 0, Quality Vs Speed video factor
  • vbrm and vbrq -> If 2, then vbrq is a quality value between 0 and 100 (BR is ignored)
  • BR -> Video bitrate in KBps, default 4000. If vbrm is 2, BR is ignored
  • NCH -> Audio output channels
  • SR -> Audio output sample rate
  • ABR -> Audio bitrate in Kbps for MP3
  • Cursor -> true to capture the cursor
  • rx -> If not {0}, capture this specific rect only
  • hWnd -> If not {0}, capture this HWND only. If HWND is 0 and rx = {0}, the entire screen is captured
  • ad -> If not 0, specifies which adapter you want to capture if you have more than 1 adapter
  • nOutput -> The index of the monitor to capture. 0 is the first monitor. For multiple monitors, this specifies the monitor.
  • EndMS -> If not 0, the library stops when EndMs milliseconds have been captured. Else, you have to stop the library by setting "MustEnd" to true.
  • MustEnd -> Set to true for the library to stop capturing
  • Pause -> If true, capture is paused

Have fun!

History

  • 18th January, 2020 : First release

License

This article, along with any associated source code and files, is licensed under The Code Project Open License (CPOL)

Share

About the Author

Michael Chourdakis
Software Developer
Greece Greece
I'm working in C++, PHP , Java, Windows, iOS, Android and Web (HTML/Javascript/CSS).

I 've a PhD in Digital Signal Processing and Artificial Intelligence and I specialize in Pro Audio and AI applications.

My home page: https://www.turboirc.com

Comments and Discussions

 
QuestionAlways emits error -10 Pin
Malopez24-Jan-20 15:58
MemberMalopez24-Jan-20 15:58 
AnswerRe: Always emits error -10 Pin
Michael Chourdakis25-Jan-20 12:23
mvaMichael Chourdakis25-Jan-20 12:23 
GeneralRe: Always emits error -10 Pin
Malopez25-Jan-20 18:30
MemberMalopez25-Jan-20 18:30 
GeneralRe: Always emits error -10 Pin
Malopez25-Jan-20 19:13
MemberMalopez25-Jan-20 19:13 
GeneralRe: Always emits error -10 Pin
Michael Chourdakis25-Jan-20 20:49
mvaMichael Chourdakis25-Jan-20 20:49 
QuestionFive Stars, One Bug Pin
Simon G4ELI21-Jan-20 22:30
MemberSimon G4ELI21-Jan-20 22:30 
AnswerRe: Five Stars, One Bug Pin
Michael Chourdakis22-Jan-20 9:47
mvaMichael Chourdakis22-Jan-20 9:47 
GeneralRe: Five Stars, One Bug Pin
Simon G4ELI22-Jan-20 12:31
MemberSimon G4ELI22-Jan-20 12:31 
GeneralRe: Five Stars, One Bug Pin
Michael Chourdakis23-Jan-20 3:15
mvaMichael Chourdakis23-Jan-20 3:15 
QuestionAnswer site Pin
Member 1472037818-Jan-20 19:34
MemberMember 1472037818-Jan-20 19:34 
QuestionWill it still work if there is no hardware acceleration? Pin
Shao Voon Wong18-Jan-20 15:04
mvaShao Voon Wong18-Jan-20 15:04 
AnswerRe: Will it still work if there is no hardware acceleration? Pin
Michael Chourdakis18-Jan-20 21:25
mvaMichael Chourdakis18-Jan-20 21:25 
GeneralMy vote of 5 Pin
Shao Voon Wong18-Jan-20 14:55
mvaShao Voon Wong18-Jan-20 14:55 

General General    News News    Suggestion Suggestion    Question Question    Bug Bug    Answer Answer    Joke Joke    Praise Praise    Rant Rant    Admin Admin   

Use Ctrl+Left/Right to switch messages, Ctrl+Up/Down to switch threads, Ctrl+Shift+Left/Right to switch pages.

Article
Posted 18 Jan 2020

Stats

3.6K views
11 bookmarked