From 7af053497ed52d997ae30becde3083960e1e24e5 Mon Sep 17 00:00:00 2001 From: Geoffrey McRae Date: Mon, 3 Aug 2020 12:24:11 +1000 Subject: [PATCH] [common] unroll the framebuffer write loop and increase the chunk size --- VERSION | 2 +- common/src/framebuffer.c | 40 ++++++++++++++++++++++++++++++++++------ 2 files changed, 35 insertions(+), 7 deletions(-) diff --git a/VERSION b/VERSION index 875a4101..2430f828 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -B2-rc2-16-g62d1bd1ea2+1 \ No newline at end of file +B2-rc2-17-gaa32c5ffad+1 \ No newline at end of file diff --git a/common/src/framebuffer.c b/common/src/framebuffer.c index ca5d1920..99bb3535 100644 --- a/common/src/framebuffer.c +++ b/common/src/framebuffer.c @@ -25,7 +25,7 @@ Place, Suite 330, Boston, MA 02111-1307 USA #include #include -#define FB_CHUNK_SIZE 1024 +#define FB_CHUNK_SIZE 1048576 struct stFrameBuffer { @@ -112,22 +112,50 @@ void framebuffer_prepare(FrameBuffer * frame) bool framebuffer_write(FrameBuffer * frame, const void * src, size_t size) { __m128i * s = (__m128i *)src; + __m128i * d = (__m128i *)frame->data; size_t wp = 0; /* copy in chunks */ - while(size > 15) + while(size > 63) { - _mm_stream_si128((__m128i *)(frame->data + wp), _mm_stream_load_si128(s++)); - size -= 16; - wp += 16; + _mm_stream_si128(d++, _mm_stream_load_si128(s++)); + _mm_stream_si128(d++, _mm_stream_load_si128(s++)); + _mm_stream_si128(d++, _mm_stream_load_si128(s++)); + _mm_stream_si128(d++, _mm_stream_load_si128(s++)); + size -= 64; + wp += 64; if (wp % FB_CHUNK_SIZE == 0) atomic_store_explicit(&frame->wp, wp, memory_order_release); } + if (size > 47) + { + _mm_stream_si128(d++, _mm_stream_load_si128(s++)); + _mm_stream_si128(d++, _mm_stream_load_si128(s++)); + _mm_stream_si128(d++, _mm_stream_load_si128(s++)); + size -= 48; + wp += 48; + } + + if (size > 31) + { + _mm_stream_si128(d++, _mm_stream_load_si128(s++)); + _mm_stream_si128(d++, _mm_stream_load_si128(s++)); + size -= 32; + wp += 32; + } + + if (size > 15) + { + _mm_stream_si128(d++, _mm_stream_load_si128(s++)); + size -= 16; + wp += 16; + } + if(size) { - memcpy(frame->data + frame->wp, s, size); + memcpy(frame->data + wp, s, size); wp += size; }