From 43b096a5e7d6f4e392d89f3814e25beeca1927c6 Mon Sep 17 00:00:00 2001 From: Geoffrey McRae Date: Thu, 21 Dec 2017 13:48:57 +1100 Subject: [PATCH] [host] added multi-threaded memcopy for high resolutions --- host/Capture/DXGI.cpp | 7 ++- host/Capture/DXGI.h | 2 + host/MultiMemcpy.cpp | 70 ++++++++++++++++++++++++ host/MultiMemcpy.h | 48 +++++++++++++++++ host/Service.cpp | 9 +--- host/Util.h | 72 +++---------------------- host/looking-glass-host.vcxproj | 2 + host/looking-glass-host.vcxproj.filters | 6 +++ 8 files changed, 140 insertions(+), 76 deletions(-) create mode 100644 host/MultiMemcpy.cpp create mode 100644 host/MultiMemcpy.h diff --git a/host/Capture/DXGI.cpp b/host/Capture/DXGI.cpp index 5a34afaa..a529920b 100644 --- a/host/Capture/DXGI.cpp +++ b/host/Capture/DXGI.cpp @@ -21,7 +21,6 @@ Place, Suite 330, Boston, MA 02111-1307 USA using namespace Capture; #include "common/debug.h" -#include "common/memcpySSE.h" DXGI::DXGI() : m_options(NULL), @@ -33,11 +32,11 @@ DXGI::DXGI() : m_texture(), m_pointer(NULL) { + } DXGI::~DXGI() { - } bool DXGI::Initialize(CaptureOptions * options) @@ -310,7 +309,7 @@ GrabStatus DXGI::GrabFrame(FrameInfo & frame) frame.stride = m_mapping.RowPitch / 4; unsigned int size = m_height * m_mapping.RowPitch; - memcpySSE(frame.buffer, m_mapping.pData, size < frame.bufferSize ? size : frame.bufferSize); + m_memcpy.Copy(frame.buffer, m_mapping.pData, size < frame.bufferSize ? size : frame.bufferSize); return GRAB_STATUS_OK; } @@ -448,7 +447,7 @@ GrabStatus DXGI::GrabFrame(FrameInfo & frame) frame.stride = m_mapping.RowPitch / 4; unsigned int size = m_height * m_mapping.RowPitch; - memcpySSE(frame.buffer, m_mapping.pData, size < frame.bufferSize ? size : frame.bufferSize); + m_memcpy.Copy(frame.buffer, m_mapping.pData, size < frame.bufferSize ? size : frame.bufferSize); return GRAB_STATUS_OK; } \ No newline at end of file diff --git a/host/Capture/DXGI.h b/host/Capture/DXGI.h index 3dc6300a..ec6078ed 100644 --- a/host/Capture/DXGI.h +++ b/host/Capture/DXGI.h @@ -20,6 +20,7 @@ Place, Suite 330, Boston, MA 02111-1307 USA #pragma once #include "ICapture.h" +#include "MultiMemcpy.h" #define W32_LEAN_AND_MEAN #include @@ -75,6 +76,7 @@ namespace Capture unsigned int m_width; unsigned int m_height; + MultiMemcpy m_memcpy; IDXGIFactory1Ptr m_dxgiFactory; ID3D11DevicePtr m_device; D3D_FEATURE_LEVEL m_featureLevel; diff --git a/host/MultiMemcpy.cpp b/host/MultiMemcpy.cpp new file mode 100644 index 00000000..b92184ea --- /dev/null +++ b/host/MultiMemcpy.cpp @@ -0,0 +1,70 @@ +/* +Looking Glass - KVM FrameRelay (KVMFR) Client +Copyright (C) 2017 Geoffrey McRae +https://looking-glass.hostfission.com + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; either version 2 of the License, or (at your option) any later +version. + +This program is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A +PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 Temple +Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#include "MultiMemcpy.h" +#include "Util.h" +#include "common/memcpySSE.h" + +MultiMemcpy::MultiMemcpy() +{ + for (int i = 0; i < MULTIMEMCPY_THREADS; ++i) + { + m_workers[i].start = CreateSemaphore(NULL, 0, 1, NULL); + m_workers[i].stop = CreateSemaphore(NULL, 0, 1, NULL); + m_semaphores[i] = m_workers[i].stop; + + m_workers[i].thread = CreateThread(0, 0, WorkerFunction, &m_workers[i], 0, NULL); + } +} + +MultiMemcpy::~MultiMemcpy() +{ + for(int i = 0; i < MULTIMEMCPY_THREADS; ++i) + { + TerminateThread(m_workers[i].thread, 0); + CloseHandle(m_workers[i].start); + CloseHandle(m_workers[i].stop ); + } +} + +void MultiMemcpy::Copy(void * dst, void * src, size_t size) +{ + const size_t block = size / MULTIMEMCPY_THREADS; + for (int i = 0; i < MULTIMEMCPY_THREADS; ++i) + { + m_workers[i].dst = (uint8_t *)dst + i * block; + m_workers[i].src = (uint8_t *)src + i * block; + m_workers[i].size = (i + 1) * block - i * block; + ReleaseSemaphore(m_workers[i].start, 1, NULL); + } + + WaitForMultipleObjects(MULTIMEMCPY_THREADS, m_semaphores, TRUE, INFINITE); +} + +DWORD WINAPI MultiMemcpy::WorkerFunction(LPVOID param) +{ + struct Worker * w = (struct Worker *)param; + + for(;;) + { + WaitForSingleObject(w->start, INFINITE); + memcpySSE(w->dst, w->src, w->size); + ReleaseSemaphore(w->stop, 1, NULL); + } +} \ No newline at end of file diff --git a/host/MultiMemcpy.h b/host/MultiMemcpy.h new file mode 100644 index 00000000..69774b45 --- /dev/null +++ b/host/MultiMemcpy.h @@ -0,0 +1,48 @@ +/* +Looking Glass - KVM FrameRelay (KVMFR) Client +Copyright (C) 2017 Geoffrey McRae +https://looking-glass.hostfission.com + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; either version 2 of the License, or (at your option) any later +version. + +This program is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A +PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 Temple +Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#define MULTIMEMCPY_THREADS 4 + +#include +#include + +#pragma once +class MultiMemcpy +{ +public: + MultiMemcpy(); + ~MultiMemcpy(); + + void Copy(void * dst, void * src, size_t size); +private: + struct Worker + { + HANDLE start; + HANDLE stop; + HANDLE thread; + void * dst; + void * src; + size_t size; + }; + + HANDLE m_semaphores[MULTIMEMCPY_THREADS]; + struct Worker m_workers[MULTIMEMCPY_THREADS]; + static DWORD WINAPI WorkerFunction(LPVOID param); +}; + diff --git a/host/Service.cpp b/host/Service.cpp index 8d693b12..c512d704 100644 --- a/host/Service.cpp +++ b/host/Service.cpp @@ -23,16 +23,9 @@ Place, Suite 330, Boston, MA 02111-1307 USA #include "common/debug.h" #include "common/KVMFR.h" +#include "Util.h" #include "CaptureFactory.h" -#if __MINGW32__ -#define INTERLOCKED_AND8 __sync_and_and_fetch -#define INTERLOCKED_OR8 __sync_or_and_fetch -#else -#define INTERLOCKED_OR8 InterlockedOr8 -#define INTERLOCKED_AND8 InterlockedAnd8 -#endif - Service * Service::m_instance = NULL; Service::Service() : diff --git a/host/Util.h b/host/Util.h index b006a543..7d2632c2 100644 --- a/host/Util.h +++ b/host/Util.h @@ -29,6 +29,14 @@ Place, Suite 330, Boston, MA 02111-1307 USA #define min(a, b) ((a) < (b) ? (a) : (b)) #endif +#if __MINGW32__ +#define INTERLOCKED_AND8 __sync_and_and_fetch +#define INTERLOCKED_OR8 __sync_or_and_fetch +#else +#define INTERLOCKED_OR8 InterlockedOr8 +#define INTERLOCKED_AND8 InterlockedAnd8 +#endif + class Util { public: @@ -120,68 +128,4 @@ public: _mm_stream_si128((__m128i *)&dest[32], v2); } } - - static void DrawCursor( - const enum CursorType type, - const uint8_t * cursorData, - const POINT cursorRect, - const unsigned int cursorPitch, - const POINT cursorPos, - FrameInfo & frame - ) - { - const int maxHeight = min(cursorRect.y, (int)frame.height - cursorPos.y); - const int maxWidth = min(cursorRect.x, (int)frame.width - cursorPos.x); - - switch (type) - { - case CURSOR_TYPE_COLOR: - { - const unsigned int destPitch = frame.stride * 4; - for (int y = abs(min(0, cursorPos.y)); y < maxHeight; ++y) - for (int x = abs(min(0, cursorPos.x)); x < maxWidth; ++x) - { - uint8_t *src = (uint8_t *)cursorData + (cursorPitch * y) + (x * 4); - uint8_t *dst = (uint8_t *)frame.buffer + (destPitch * (y + cursorPos.y)) + ((x + cursorPos.x) * 4); - - const unsigned int alpha = src[3] + 1; - const unsigned int inv = 256 - alpha; - dst[0] = (uint8_t)((alpha * src[0] + inv * dst[0]) >> 8); - dst[1] = (uint8_t)((alpha * src[1] + inv * dst[1]) >> 8); - dst[2] = (uint8_t)((alpha * src[2] + inv * dst[2]) >> 8); - } - break; - } - - case CURSOR_TYPE_MASKED_COLOR: - { - for (int y = abs(min(0, cursorPos.y)); y < maxHeight; ++y) - for (int x = abs(min(0, cursorPos.x)); x < maxWidth; ++x) - { - uint32_t *src = (uint32_t *)cursorData + ((cursorPitch / 4) * y) + x; - uint32_t *dst = (uint32_t *)frame.buffer + (frame.stride * (y + cursorPos.y)) + (x + cursorPos.x); - if (*src & 0xff000000) - *dst = 0xff000000 | (*dst ^ *src); - else *dst = 0xff000000 | *src; - } - break; - } - - case CURSOR_TYPE_MONOCHROME: - { - for (int y = abs(min(0, cursorPos.y)); y < maxHeight / 2; ++y) - for (int x = abs(min(0, cursorPos.x)); x < maxWidth; ++x) - { - uint8_t *srcAnd = (uint8_t *)cursorData + (cursorPitch * y) + (x / 8); - uint8_t *srcXor = srcAnd + cursorPitch * (cursorRect.y / 2); - uint32_t *dst = (uint32_t *)frame.buffer + (frame.stride * (y + cursorPos.y)) + (x + cursorPos.x); - const uint8_t mask = 0x80 >> (x % 8); - const uint32_t andMask = (*srcAnd & mask) ? 0xFFFFFFFF : 0xFF000000; - const uint32_t xorMask = (*srcXor & mask) ? 0x00FFFFFF : 0x00000000; - *dst = (*dst & andMask) ^ xorMask; - } - break; - } - } - } }; \ No newline at end of file diff --git a/host/looking-glass-host.vcxproj b/host/looking-glass-host.vcxproj index 9dda88fa..d2599a41 100644 --- a/host/looking-glass-host.vcxproj +++ b/host/looking-glass-host.vcxproj @@ -335,6 +335,7 @@ + @@ -344,6 +345,7 @@ + diff --git a/host/looking-glass-host.vcxproj.filters b/host/looking-glass-host.vcxproj.filters index 5c9be6fe..cfbd4fde 100644 --- a/host/looking-glass-host.vcxproj.filters +++ b/host/looking-glass-host.vcxproj.filters @@ -42,6 +42,9 @@ Source Files + + Source Files + @@ -68,5 +71,8 @@ Header Files + + Header Files + \ No newline at end of file