mirror of
https://github.com/gnif/LookingGlass.git
synced 2025-01-09 05:43:57 +00:00
[common] improve frambuffer copy to avoid cache pollution (SIMD)
This commit is contained in:
parent
c5ff8bd4ce
commit
da655b86c3
2 changed files with 29 additions and 12 deletions
2
VERSION
2
VERSION
|
@ -1 +1 @@
|
||||||
B2-rc2-11-gbd42445ea7+1
|
B2-rc2-13-gc5ff8bd4ce+1
|
|
@ -22,6 +22,7 @@ Place, Suite 330, Boston, MA 02111-1307 USA
|
||||||
|
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
#include <stdatomic.h>
|
#include <stdatomic.h>
|
||||||
|
#include <emmintrin.h>
|
||||||
|
|
||||||
#define FB_CHUNK_SIZE 1024
|
#define FB_CHUNK_SIZE 1024
|
||||||
|
|
||||||
|
@ -35,7 +36,7 @@ const size_t FrameBufferStructSize = sizeof(FrameBuffer);
|
||||||
|
|
||||||
void framebuffer_wait(const FrameBuffer * frame, size_t size)
|
void framebuffer_wait(const FrameBuffer * frame, size_t size)
|
||||||
{
|
{
|
||||||
while(atomic_load_explicit(&frame->wp, memory_order_relaxed) != size) {}
|
while(atomic_load_explicit(&frame->wp, memory_order_acquire) != size) {}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -46,6 +47,8 @@ bool framebuffer_read(const FrameBuffer * frame, void * dst, size_t dstpitch,
|
||||||
uint_least32_t rp = 0;
|
uint_least32_t rp = 0;
|
||||||
size_t y = 0;
|
size_t y = 0;
|
||||||
const size_t linewidth = width * bpp;
|
const size_t linewidth = width * bpp;
|
||||||
|
const size_t blocks = linewidth / 16;
|
||||||
|
const size_t left = linewidth % 16;
|
||||||
|
|
||||||
while(y < height)
|
while(y < height)
|
||||||
{
|
{
|
||||||
|
@ -53,13 +56,18 @@ bool framebuffer_read(const FrameBuffer * frame, void * dst, size_t dstpitch,
|
||||||
|
|
||||||
/* spinlock */
|
/* spinlock */
|
||||||
do
|
do
|
||||||
wp = atomic_load_explicit(&frame->wp, memory_order_relaxed);
|
wp = atomic_load_explicit(&frame->wp, memory_order_acquire);
|
||||||
while(wp - rp < pitch);
|
while(wp - rp < pitch);
|
||||||
|
|
||||||
memcpy(d, frame->data + rp, linewidth);
|
__m128i * s = (__m128i *)(frame->data + rp);
|
||||||
|
for(int i = 0; i < blocks; ++i, ++s, d += 16)
|
||||||
|
_mm_stream_si128((__m128i *)d, _mm_load_si128(s));
|
||||||
|
|
||||||
|
if (left)
|
||||||
|
memcpy(d, frame->data + rp + blocks * 16, left);
|
||||||
|
|
||||||
rp += pitch;
|
rp += pitch;
|
||||||
d += dstpitch;
|
d += dstpitch - blocks * 16;
|
||||||
++y;
|
++y;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -79,7 +87,7 @@ bool framebuffer_read_fn(const FrameBuffer * frame, size_t height, size_t width,
|
||||||
|
|
||||||
/* spinlock */
|
/* spinlock */
|
||||||
do
|
do
|
||||||
wp = atomic_load_explicit(&frame->wp, memory_order_relaxed);
|
wp = atomic_load_explicit(&frame->wp, memory_order_acquire);
|
||||||
while(wp - rp < pitch);
|
while(wp - rp < pitch);
|
||||||
|
|
||||||
if (!fn(opaque, frame->data + rp, linewidth))
|
if (!fn(opaque, frame->data + rp, linewidth))
|
||||||
|
@ -97,18 +105,27 @@ bool framebuffer_read_fn(const FrameBuffer * frame, size_t height, size_t width,
|
||||||
*/
|
*/
|
||||||
void framebuffer_prepare(FrameBuffer * frame)
|
void framebuffer_prepare(FrameBuffer * frame)
|
||||||
{
|
{
|
||||||
atomic_store(&frame->wp, 0);
|
atomic_store_explicit(&frame->wp, 0, memory_order_release);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool framebuffer_write(FrameBuffer * frame, const void * src, size_t size)
|
bool framebuffer_write(FrameBuffer * frame, const void * src, size_t size)
|
||||||
{
|
{
|
||||||
|
__m128i * s = (__m128i *)src;
|
||||||
|
|
||||||
/* copy in chunks */
|
/* copy in chunks */
|
||||||
while(size)
|
while(size > 15)
|
||||||
{
|
{
|
||||||
size_t copy = size < FB_CHUNK_SIZE ? FB_CHUNK_SIZE : size;
|
_mm_stream_si128((__m128i *)(frame->data + frame->wp), _mm_load_si128(s));
|
||||||
memcpy(frame->data + frame->wp, src, copy);
|
atomic_fetch_add_explicit(&frame->wp, 16, memory_order_release);
|
||||||
atomic_fetch_add(&frame->wp, copy);
|
++s;
|
||||||
size -= copy;
|
size -= 16;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if(size)
|
||||||
|
{
|
||||||
|
memcpy(frame->data + frame->wp, s, size);
|
||||||
|
atomic_fetch_add_explicit(&frame->wp, size, memory_order_release);
|
||||||
|
}
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue