mirror of
https://github.com/gnif/LookingGlass.git
synced 2025-01-23 12:08:10 +00:00
[common] framebuffer: fixed incorrect streaming usage
This commit is contained in:
parent
85b8c12abf
commit
43503222c7
2 changed files with 49 additions and 53 deletions
2
VERSION
2
VERSION
|
@ -1 +1 @@
|
||||||
B2-rc2-19-g7af053497e+1
|
B2-rc2-20-g85b8c12abf+1
|
|
@ -41,15 +41,15 @@ void framebuffer_wait(const FrameBuffer * frame, size_t size)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
bool framebuffer_read(const FrameBuffer * frame, void * dst, size_t dstpitch,
|
bool framebuffer_read(const FrameBuffer * frame, void * restrict dst,
|
||||||
size_t height, size_t width, size_t bpp, size_t pitch)
|
size_t dstpitch, size_t height, size_t width, size_t bpp, size_t pitch)
|
||||||
{
|
{
|
||||||
uint8_t *d = (uint8_t*)dst;
|
uint8_t * restrict d = (uint8_t*)dst;
|
||||||
uint_least32_t rp = 0;
|
uint_least32_t rp = 0;
|
||||||
size_t y = 0;
|
size_t y = 0;
|
||||||
const size_t linewidth = width * bpp;
|
const size_t linewidth = width * bpp;
|
||||||
const size_t blocks = linewidth / 16;
|
const size_t blocks = linewidth / 64;
|
||||||
const size_t left = linewidth % 16;
|
const size_t left = linewidth % 64;
|
||||||
|
|
||||||
while(y < height)
|
while(y < height)
|
||||||
{
|
{
|
||||||
|
@ -60,15 +60,34 @@ bool framebuffer_read(const FrameBuffer * frame, void * dst, size_t dstpitch,
|
||||||
wp = atomic_load_explicit(&frame->wp, memory_order_acquire);
|
wp = atomic_load_explicit(&frame->wp, memory_order_acquire);
|
||||||
while(wp - rp < pitch);
|
while(wp - rp < pitch);
|
||||||
|
|
||||||
__m128i * s = (__m128i *)(frame->data + rp);
|
_mm_mfence();
|
||||||
for(int i = 0; i < blocks; ++i, ++s, d += 16)
|
__m128i * restrict s = (__m128i *)(frame->data + rp);
|
||||||
_mm_stream_si128((__m128i *)d, _mm_stream_load_si128(s));
|
for(int i = 0; i < blocks; ++i)
|
||||||
|
{
|
||||||
|
__m128i *_d = (__m128i *)d;
|
||||||
|
__m128i *_s = (__m128i *)s;
|
||||||
|
__m128i v1 = _mm_stream_load_si128(_s + 0);
|
||||||
|
__m128i v2 = _mm_stream_load_si128(_s + 1);
|
||||||
|
__m128i v3 = _mm_stream_load_si128(_s + 2);
|
||||||
|
__m128i v4 = _mm_stream_load_si128(_s + 3);
|
||||||
|
|
||||||
|
_mm_storeu_si128(_d + 0, v1);
|
||||||
|
_mm_storeu_si128(_d + 1, v2);
|
||||||
|
_mm_storeu_si128(_d + 2, v3);
|
||||||
|
_mm_storeu_si128(_d + 3, v4);
|
||||||
|
|
||||||
|
d += 64;
|
||||||
|
s += 4;
|
||||||
|
}
|
||||||
|
|
||||||
if (left)
|
if (left)
|
||||||
memcpy(d, frame->data + rp + blocks * 16, left);
|
{
|
||||||
|
memcpy(d, s, left);
|
||||||
|
d += left;
|
||||||
|
}
|
||||||
|
|
||||||
rp += pitch;
|
rp += pitch;
|
||||||
d += dstpitch - blocks * 16;
|
d += dstpitch - linewidth;
|
||||||
++y;
|
++y;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -109,24 +128,31 @@ void framebuffer_prepare(FrameBuffer * frame)
|
||||||
atomic_store_explicit(&frame->wp, 0, memory_order_release);
|
atomic_store_explicit(&frame->wp, 0, memory_order_release);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool framebuffer_write(FrameBuffer * frame, const void * src, size_t size)
|
bool framebuffer_write(FrameBuffer * frame, const void * restrict src, size_t size)
|
||||||
{
|
{
|
||||||
__m128i * s = (__m128i *)src;
|
__m128i * restrict s = (__m128i *)src;
|
||||||
__m128i * d = (__m128i *)frame->data;
|
__m128i * restrict d = (__m128i *)frame->data;
|
||||||
size_t wp = 0;
|
size_t wp = 0;
|
||||||
|
|
||||||
|
_mm_mfence();
|
||||||
|
|
||||||
/* copy in chunks */
|
/* copy in chunks */
|
||||||
while(size > 63)
|
while(size > 63)
|
||||||
{
|
{
|
||||||
const __m128i v1 = _mm_stream_load_si128(s++);
|
__m128i *_d = (__m128i *)d;
|
||||||
const __m128i v2 = _mm_stream_load_si128(s++);
|
__m128i *_s = (__m128i *)s;
|
||||||
const __m128i v3 = _mm_stream_load_si128(s++);
|
__m128i v1 = _mm_stream_load_si128(_s + 0);
|
||||||
const __m128i v4 = _mm_stream_load_si128(s++);
|
__m128i v2 = _mm_stream_load_si128(_s + 1);
|
||||||
_mm_stream_si128(d++, v1);
|
__m128i v3 = _mm_stream_load_si128(_s + 2);
|
||||||
_mm_stream_si128(d++, v2);
|
__m128i v4 = _mm_stream_load_si128(_s + 3);
|
||||||
_mm_stream_si128(d++, v3);
|
|
||||||
_mm_stream_si128(d++, v4);
|
|
||||||
|
|
||||||
|
_mm_store_si128(_d + 0, v1);
|
||||||
|
_mm_store_si128(_d + 1, v2);
|
||||||
|
_mm_store_si128(_d + 2, v3);
|
||||||
|
_mm_store_si128(_d + 3, v4);
|
||||||
|
|
||||||
|
s += 4;
|
||||||
|
d += 4;
|
||||||
size -= 64;
|
size -= 64;
|
||||||
wp += 64;
|
wp += 64;
|
||||||
|
|
||||||
|
@ -134,36 +160,6 @@ bool framebuffer_write(FrameBuffer * frame, const void * src, size_t size)
|
||||||
atomic_store_explicit(&frame->wp, wp, memory_order_release);
|
atomic_store_explicit(&frame->wp, wp, memory_order_release);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (size > 47)
|
|
||||||
{
|
|
||||||
const __m128i v1 = _mm_stream_load_si128(s++);
|
|
||||||
const __m128i v2 = _mm_stream_load_si128(s++);
|
|
||||||
const __m128i v3 = _mm_stream_load_si128(s++);
|
|
||||||
_mm_stream_si128(d++, v1);
|
|
||||||
_mm_stream_si128(d++, v2);
|
|
||||||
_mm_stream_si128(d++, v3);
|
|
||||||
size -= 48;
|
|
||||||
wp += 48;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (size > 31)
|
|
||||||
{
|
|
||||||
const __m128i v1 = _mm_stream_load_si128(s++);
|
|
||||||
const __m128i v2 = _mm_stream_load_si128(s++);
|
|
||||||
_mm_stream_si128(d++, v1);
|
|
||||||
_mm_stream_si128(d++, v2);
|
|
||||||
size -= 32;
|
|
||||||
wp += 32;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (size > 15)
|
|
||||||
{
|
|
||||||
const __m128i v1 = _mm_stream_load_si128(s++);
|
|
||||||
_mm_stream_si128(d++, v1);
|
|
||||||
size -= 16;
|
|
||||||
wp += 16;
|
|
||||||
}
|
|
||||||
|
|
||||||
if(size)
|
if(size)
|
||||||
{
|
{
|
||||||
memcpy(frame->data + wp, s, size);
|
memcpy(frame->data + wp, s, size);
|
||||||
|
|
Loading…
Reference in a new issue