[host] improved latency of multimemcpy with hybrid locking and preempt

This commit is contained in:
Geoffrey McRae 2017-12-29 07:00:27 +11:00
parent f6f4c8070a
commit 2f2813037b
2 changed files with 31 additions and 10 deletions

View file

@ -25,9 +25,9 @@ MultiMemcpy::MultiMemcpy()
{ {
for (int i = 0; i < MULTIMEMCPY_THREADS; ++i) for (int i = 0; i < MULTIMEMCPY_THREADS; ++i)
{ {
m_workers[i].start = CreateSemaphore(NULL, 0, 1, NULL); m_workers[i].id = (1 << i);
m_workers[i].stop = CreateSemaphore(NULL, 0, 1, NULL); m_workers[i].running = &m_running;
m_semaphores[i] = m_workers[i].stop; m_workers[i].start = CreateSemaphore(NULL, 0, 1, NULL);
m_workers[i].thread = CreateThread(0, 0, WorkerFunction, &m_workers[i], 0, NULL); m_workers[i].thread = CreateThread(0, 0, WorkerFunction, &m_workers[i], 0, NULL);
} }
@ -39,22 +39,26 @@ MultiMemcpy::~MultiMemcpy()
{ {
TerminateThread(m_workers[i].thread, 0); TerminateThread(m_workers[i].thread, 0);
CloseHandle(m_workers[i].start); CloseHandle(m_workers[i].start);
CloseHandle(m_workers[i].stop );
} }
} }
void MultiMemcpy::Copy(void * dst, void * src, size_t size) void MultiMemcpy::Copy(void * dst, void * src, size_t size)
{ {
if (!m_awake)
Wake();
const size_t block = size / MULTIMEMCPY_THREADS; const size_t block = size / MULTIMEMCPY_THREADS;
for (int i = 0; i < MULTIMEMCPY_THREADS; ++i) for (int i = 0; i < MULTIMEMCPY_THREADS; ++i)
{ {
m_workers[i].dst = (uint8_t *)dst + i * block; m_workers[i].dst = (uint8_t *)dst + i * block;
m_workers[i].src = (uint8_t *)src + i * block; m_workers[i].src = (uint8_t *)src + i * block;
m_workers[i].size = (i + 1) * block - i * block; m_workers[i].size = (i + 1) * block - i * block;
ReleaseSemaphore(m_workers[i].start, 1, NULL);
} }
WaitForMultipleObjects(MULTIMEMCPY_THREADS, m_semaphores, TRUE, INFINITE); INTERLOCKED_OR8(&m_running, (1 << MULTIMEMCPY_THREADS) - 1);
while(m_running) {}
m_awake = false;
} }
DWORD WINAPI MultiMemcpy::WorkerFunction(LPVOID param) DWORD WINAPI MultiMemcpy::WorkerFunction(LPVOID param)
@ -64,7 +68,9 @@ DWORD WINAPI MultiMemcpy::WorkerFunction(LPVOID param)
for(;;) for(;;)
{ {
WaitForSingleObject(w->start, INFINITE); WaitForSingleObject(w->start, INFINITE);
while(!(*w->running & w->id)) {}
memcpySSE(w->dst, w->src, w->size); memcpySSE(w->dst, w->src, w->size);
ReleaseSemaphore(w->stop, 1, NULL); INTERLOCKED_AND8(w->running, ~w->id);
} }
} }

View file

@ -29,19 +29,34 @@ public:
MultiMemcpy(); MultiMemcpy();
~MultiMemcpy(); ~MultiMemcpy();
// preempt the copy and wake up the threads early
inline void Wake()
{
if (m_awake)
return;
for (int i = 0; i < MULTIMEMCPY_THREADS; ++i)
ReleaseSemaphore(m_workers[i].start, 1, NULL);
m_awake = true;
}
void Copy(void * dst, void * src, size_t size); void Copy(void * dst, void * src, size_t size);
private: private:
struct Worker struct Worker
{ {
unsigned int id;
volatile char *running;
HANDLE start; HANDLE start;
HANDLE stop;
HANDLE thread; HANDLE thread;
void * dst; void * dst;
void * src; void * src;
size_t size; size_t size;
}; };
HANDLE m_semaphores[MULTIMEMCPY_THREADS]; bool m_awake;
volatile char m_running;
struct Worker m_workers[MULTIMEMCPY_THREADS]; struct Worker m_workers[MULTIMEMCPY_THREADS];
static DWORD WINAPI WorkerFunction(LPVOID param); static DWORD WINAPI WorkerFunction(LPVOID param);
}; };