[host] Added SMID BGRAToRGB support and implemented it in DXGI

This commit is contained in:
Geoffrey McRae 2017-11-16 22:43:29 +11:00
parent 6eb40a1897
commit 534e9425f1
3 changed files with 62 additions and 12 deletions

View file

@ -20,6 +20,7 @@ Place, Suite 330, Boston, MA 02111-1307 USA
using namespace Capture;
#include "common\debug.h"
#include "Util.h"
DXGI::DXGI() :
m_options(NULL),
@ -218,7 +219,7 @@ FrameType DXGI::GetFrameType()
if (!m_initialized)
return FRAME_TYPE_INVALID;
return FRAME_TYPE_ARGB;
return FRAME_TYPE_RGB;
}
FrameComp DXGI::GetFrameCompression()
@ -234,7 +235,7 @@ size_t DXGI::GetMaxFrameSize()
if (!m_initialized)
return 0;
return m_width * m_height * 4;
return (m_width * m_height * 3) + 4;
}
bool DXGI::GrabFrame(FrameInfo & frame)
@ -346,14 +347,12 @@ bool DXGI::GrabFrame(FrameInfo & frame)
m_width = desc.Width;
m_height = desc.Height;
const int pitch = m_width * 3;
frame.width = desc.Width;
frame.height = desc.Height;
frame.stride = rect.Pitch / 4;
frame.outSize = min(frame.bufferSize, m_height * rect.Pitch);
memcpy_s(frame.buffer, frame.bufferSize, rect.pBits, frame.outSize);
status = surface->Unmap();
frame.stride = desc.Width;
frame.outSize = min(frame.bufferSize, m_height * pitch);
// if we have a mouse update
if (frameInfo.LastMouseUpdateTime.QuadPart)
@ -376,7 +375,7 @@ bool DXGI::GrabFrame(FrameInfo & frame)
for (int x = abs(min(0, m_pointerPos.x)); x < maxWidth; ++x)
{
BYTE *src = (BYTE *)m_pointer + (m_shapeInfo.Pitch * y) + (x * 4);
BYTE *dst = (BYTE *)frame.buffer + (rect.Pitch * (y + m_pointerPos.y)) + ((x + m_pointerPos.x) * 4);
BYTE *dst = (BYTE *)rect.pBits + (rect.Pitch * (y + m_pointerPos.y)) + ((x + m_pointerPos.x) * 4);
const unsigned int alpha = src[3] + 1;
const unsigned int inv = 256 - alpha;
@ -393,7 +392,7 @@ bool DXGI::GrabFrame(FrameInfo & frame)
for (int x = abs(min(0, m_pointerPos.x)); x < maxWidth; ++x)
{
UINT32 *src = (UINT32 *)m_pointer + ((m_shapeInfo.Pitch/4) * y) + x;
UINT32 *dst = (UINT32 *)frame.buffer + (frame.stride * (y + m_pointerPos.y)) + (x + m_pointerPos.x);
UINT32 *dst = (UINT32 *)rect.pBits + (frame.stride * (y + m_pointerPos.y)) + (x + m_pointerPos.x);
if (*src & 0xff000000)
*dst = 0xff000000 | (*dst ^ *src);
else *dst = 0xff000000 | *src;
@ -408,7 +407,7 @@ bool DXGI::GrabFrame(FrameInfo & frame)
{
UINT8 *srcAnd = (UINT8 *)m_pointer + (m_shapeInfo.Pitch * y) + (x/8);
UINT8 *srcXor = srcAnd + m_shapeInfo.Pitch * (m_shapeInfo.Height / 2);
UINT32 *dst = (UINT32 *)frame.buffer + (frame.stride * (y + m_pointerPos.y)) + (x + m_pointerPos.x);
UINT32 *dst = (UINT32 *)rect.pBits + (frame.stride * (y + m_pointerPos.y)) + (x + m_pointerPos.x);
const BYTE mask = 0x80 >> (x % 8);
const UINT32 andMask = (*srcAnd & mask) ? 0xFFFFFFFF : 0xFF000000;
const UINT32 xorMask = (*srcXor & mask) ? 0x00FFFFFF : 0x00000000;
@ -419,6 +418,9 @@ bool DXGI::GrabFrame(FrameInfo & frame)
}
}
Util::BGRAtoRGB(rect.pBits, m_height * m_width, (uint8_t *)frame.buffer);
status = surface->Unmap();
if (FAILED(status))
{
DEBUG_ERROR("Failed to unmap surface: %08x", status);

View file

@ -114,8 +114,12 @@ bool Service::Process()
if (!m_initialized)
return false;
KVMGFXHeader * header = reinterpret_cast<KVMGFXHeader *>(m_memory);
const uint64_t dataOffset = sizeof(KVMGFXHeader) + m_frameIndex * m_capture->GetMaxFrameSize();
KVMGFXHeader * header = reinterpret_cast<KVMGFXHeader *>(m_memory);
// calculate the current offset and ensure it is 16-byte aligned for SMID performance
uint64_t dataOffset = sizeof(KVMGFXHeader) + m_frameIndex * m_capture->GetMaxFrameSize();
dataOffset = (dataOffset + 0xF) & ~0xF;
uint8_t * data = m_memory + dataOffset;
const size_t available = m_ivshmem->GetSize() - sizeof(KVMGFXHeader);

View file

@ -18,6 +18,9 @@ Place, Suite 330, Boston, MA 02111-1307 USA
#pragma once
#include <string>
#include <assert.h>
#include <inttypes.h>
#include <tmmintrin.h>
#include "common\debug.h"
@ -56,4 +59,45 @@ public:
#endif
return defaultPath;
}
static void BGRAtoRGB(uint8_t * orig, size_t imagesize, uint8_t * dest)
{
assert((uintptr_t)orig % 16 == 0);
assert((uintptr_t)dest % 16 == 0);
assert(imagesize % 16 == 0);
__m128i mask_right = _mm_set_epi8
(
12, 13, 14, 8,
9, 10, 4, 5,
6, 0, 1, 2,
-128, -128, -128, -128
);
__m128i mask_left = _mm_set_epi8
(
-128, -128, -128, -128,
12, 13, 14, 8,
9, 10, 4, 5,
6, 0, 1, 2
);
uint8_t *end = orig + imagesize * 4;
for (; orig != end; orig += 64, dest += 48)
{
__m128i v0 = _mm_shuffle_epi8(_mm_load_si128((__m128i *)&orig[0 ]), mask_right);
__m128i v1 = _mm_shuffle_epi8(_mm_load_si128((__m128i *)&orig[16]), mask_left );
__m128i v2 = _mm_shuffle_epi8(_mm_load_si128((__m128i *)&orig[32]), mask_left );
__m128i v3 = _mm_shuffle_epi8(_mm_load_si128((__m128i *)&orig[48]), mask_left );
v0 = _mm_alignr_epi8(v1, v0, 4);
v1 = _mm_alignr_epi8(v2, _mm_slli_si128(v1, 4), 8);
v2 = _mm_alignr_epi8(v3, _mm_slli_si128(v2, 4), 12);
_mm_stream_si128((__m128i *)&dest[0 ], v0);
_mm_stream_si128((__m128i *)&dest[16], v1);
_mm_stream_si128((__m128i *)&dest[32], v2);
}
}
};