From 929e88b9d37aa1f35105bbc2c891f981e51174ad Mon Sep 17 00:00:00 2001 From: Geoffrey McRae Date: Sun, 12 Nov 2023 18:26:08 +1100 Subject: [PATCH] [all] provide conditional path optimization hints to the compiler --- client/renderers/EGL/desktop.c | 14 ++-- client/renderers/EGL/desktop_rects.c | 2 +- client/renderers/EGL/egl.c | 70 +++++++++++-------- client/renderers/EGL/texture_dmabuf.c | 10 +-- client/src/main.c | 33 +++++---- common/include/common/util.h | 3 + .../Windows/capture/NVFBC/src/nvfbc.c | 18 ++--- host/src/app.c | 61 +++++++++------- 8 files changed, 117 insertions(+), 94 deletions(-) diff --git a/client/renderers/EGL/desktop.c b/client/renderers/EGL/desktop.c index bfa20aa3..edc494f3 100644 --- a/client/renderers/EGL/desktop.c +++ b/client/renderers/EGL/desktop.c @@ -382,9 +382,9 @@ bool egl_desktopSetup(EGL_Desktop * desktop, const LG_RendererFormat format) bool egl_desktopUpdate(EGL_Desktop * desktop, const FrameBuffer * frame, int dmaFd, const FrameDamageRect * damageRects, int damageRectsCount) { - if (desktop->useDMA && dmaFd >= 0) + if (likely(desktop->useDMA && dmaFd >= 0)) { - if (egl_textureUpdateFromDMA(desktop->texture, frame, dmaFd)) + if (likely(egl_textureUpdateFromDMA(desktop->texture, frame, dmaFd))) { atomic_store(&desktop->processFrame, true); return true; @@ -420,8 +420,8 @@ bool egl_desktopUpdate(EGL_Desktop * desktop, const FrameBuffer * frame, int dma return false; } - if (egl_textureUpdateFromFrame(desktop->texture, frame, - damageRects, damageRectsCount)) + if (likely(egl_textureUpdateFromFrame(desktop->texture, frame, + damageRects, damageRectsCount))) { atomic_store(&desktop->processFrame, true); return true; @@ -443,7 +443,7 @@ bool egl_desktopRender(EGL_Desktop * desktop, unsigned int outputWidth, EGL_Texture * tex; int width, height; - if (desktop->useSpice) + if (unlikely(desktop->useSpice)) { tex = desktop->spiceTexture; width = desktop->spiceWidth; @@ -456,11 +456,11 @@ bool egl_desktopRender(EGL_Desktop * desktop, unsigned int outputWidth, height = desktop->height; } - if (outputWidth == 0 && outputHeight == 0) + if (unlikely(outputWidth == 0 && outputHeight == 0)) DEBUG_FATAL("outputWidth || outputHeight == 0"); enum EGL_TexStatus status; - if ((status = egl_textureProcess(tex)) != EGL_TEX_STATUS_OK) + if (unlikely((status = egl_textureProcess(tex)) != EGL_TEX_STATUS_OK)) { if (status != EGL_TEX_STATUS_NOTREADY) DEBUG_ERROR("Failed to process the desktop texture"); diff --git a/client/renderers/EGL/desktop_rects.c b/client/renderers/EGL/desktop_rects.c index b5efdd4a..ff3e0e25 100644 --- a/client/renderers/EGL/desktop_rects.c +++ b/client/renderers/EGL/desktop_rects.c @@ -296,7 +296,7 @@ bool egl_screenToDesktop(struct FrameDamageRect * output, const double matrix[6] void egl_desktopRectsRender(EGL_DesktopRects * rects) { - if (!rects->count) + if (unlikely(!rects->count)) return; glBindVertexArray(rects->vao); diff --git a/client/renderers/EGL/egl.c b/client/renderers/EGL/egl.c index bcb8ca82..8a401967 100644 --- a/client/renderers/EGL/egl.c +++ b/client/renderers/EGL/egl.c @@ -20,6 +20,7 @@ #include "interface/renderer.h" +#include "common/util.h" #include "common/debug.h" #include "common/KVMFR.h" #include "common/option.h" @@ -580,7 +581,7 @@ static bool egl_onFrameFormat(LG_Renderer * renderer, const LG_RendererFormat fo this->formatValid = true; /* this event runs in a second thread so we need to init it here */ - if (!this->frameContext) + if (unlikely(!this->frameContext)) { static EGLint attrs[] = { EGL_CONTEXT_CLIENT_VERSION, 2, @@ -600,7 +601,7 @@ static bool egl_onFrameFormat(LG_Renderer * renderer, const LG_RendererFormat fo } } - if (this->scalePointer) + if (likely(this->scalePointer)) { float scale = max(1.0f, (float)format.screenWidth / this->width); egl_cursorSetScale(this->cursor, scale); @@ -623,7 +624,8 @@ static bool egl_onFrame(LG_Renderer * renderer, const FrameBuffer * frame, int d struct Inst * this = UPCAST(struct Inst, renderer); uint64_t start = nanotime(); - if (!egl_desktopUpdate(this->desktop, frame, dmaFd, damageRects, damageRectsCount)) + if (unlikely(!egl_desktopUpdate( + this->desktop, frame, dmaFd, damageRects, damageRectsCount))) { DEBUG_INFO("Failed to to update the desktop"); return false; @@ -632,12 +634,17 @@ static bool egl_onFrame(LG_Renderer * renderer, const FrameBuffer * frame, int d INTERLOCKED_SECTION(this->desktopDamageLock, { struct DesktopDamage * damage = this->desktopDamage + this->desktopDamageIdx; - if (damage->count == -1 || damageRectsCount == 0 || - damage->count + damageRectsCount >= KVMFR_MAX_DAMAGE_RECTS) + if (unlikely( + damage->count == -1 || + damageRectsCount == 0 || + damage->count + damageRectsCount >= KVMFR_MAX_DAMAGE_RECTS)) + { damage->count = -1; + } else { - memcpy(damage->rects + damage->count, damageRects, damageRectsCount * sizeof(FrameDamageRect)); + memcpy(damage->rects + damage->count, damageRects, + damageRectsCount * sizeof(FrameDamageRect)); damage->count += damageRectsCount; } }); @@ -1056,14 +1063,14 @@ static bool egl_render(LG_Renderer * renderer, LG_RendererRotate rotate, accumulated->count = 0; INTERLOCKED_SECTION(this->desktopDamageLock, { - if (!renderAll) + if (likely(!renderAll)) { for (int i = 0; i < bufferAge; ++i) { struct DesktopDamage * damage = this->desktopDamage + IDX_AGO(this->desktopDamageIdx, i, DESKTOP_DAMAGE_COUNT); - if (damage->count < 0) + if (unlikely(damage->count < 0)) { renderAll = true; break; @@ -1087,7 +1094,7 @@ static bool egl_render(LG_Renderer * renderer, LG_RendererRotate rotate, this->desktopDamage[this->desktopDamageIdx].count = 0; }); - if (!renderAll) + if (likely(!renderAll)) { double matrix[6]; egl_screenToDesktopMatrix(matrix, @@ -1101,7 +1108,7 @@ static bool egl_render(LG_Renderer * renderer, LG_RendererRotate rotate, int count = this->overlayHistoryCount[idx]; struct Rect * damage = this->overlayHistory[idx]; - if (count < 0) + if (unlikely(count < 0)) { renderAll = true; break; @@ -1114,11 +1121,12 @@ static bool egl_render(LG_Renderer * renderer, LG_RendererRotate rotate, ); } - accumulated->count = rectsMergeOverlapping(accumulated->rects, accumulated->count); + accumulated->count = rectsMergeOverlapping(accumulated->rects, + accumulated->count); } ++this->overlayHistoryIdx; - if (this->destRect.w > 0 && this->destRect.h > 0) + if (likely(this->destRect.w > 0 && this->destRect.h > 0)) { if (egl_desktopRender(this->desktop, this->destRect.w, this->destRect.h, @@ -1136,41 +1144,39 @@ static bool egl_render(LG_Renderer * renderer, LG_RendererRotate rotate, renderLetterBox(this); - hasOverlay |= egl_damageRender(this->damage, rotate, newFrame ? desktopDamage : NULL); - hasOverlay |= invalidateWindow; + hasOverlay |= + egl_damageRender(this->damage, rotate, newFrame ? desktopDamage : NULL) | + invalidateWindow; struct Rect damage[KVMFR_MAX_DAMAGE_RECTS + MAX_OVERLAY_RECTS + 2]; int damageIdx = app_renderOverlay(damage, MAX_OVERLAY_RECTS); - - switch (damageIdx) + if (unlikely(damageIdx != 0)) { - case 0: // no overlay - break; - case -1: // full damage + if (damageIdx == -1) hasOverlay = true; - // fallthrough - default: - ImGui_ImplOpenGL3_NewFrame(); - ImGui_ImplOpenGL3_RenderDrawData(igGetDrawData()); - for (int i = 0; i < damageIdx; ++i) - damage[i].y = this->height - damage[i].y - damage[i].h; + ImGui_ImplOpenGL3_NewFrame(); + ImGui_ImplOpenGL3_RenderDrawData(igGetDrawData()); + + for (int i = 0; i < damageIdx; ++i) + damage[i].y = this->height - damage[i].y - damage[i].h; } - if (damageIdx >= 0 && cursorState.visible) + if (likely(damageIdx >= 0 && cursorState.visible)) damage[damageIdx++] = cursorState.rect; int overlayHistoryIdx = this->overlayHistoryIdx % DESKTOP_DAMAGE_COUNT; - if (hasOverlay) + if (unlikely(hasOverlay)) this->overlayHistoryCount[overlayHistoryIdx] = -1; else { - if (damageIdx > 0) - memcpy(this->overlayHistory[overlayHistoryIdx], damage, damageIdx * sizeof(struct Rect)); + if (unlikely(damageIdx > 0)) + memcpy(this->overlayHistory[overlayHistoryIdx], + damage, damageIdx * sizeof(struct Rect)); this->overlayHistoryCount[overlayHistoryIdx] = damageIdx; } - if (!hasOverlay && !this->hadOverlay) + if (unlikely(!hasOverlay && !this->hadOverlay)) { if (this->cursorLast.visible) damage[damageIdx++] = this->cursorLast.rect; @@ -1197,7 +1203,9 @@ static bool egl_render(LG_Renderer * renderer, LG_RendererRotate rotate, this->cursorLast = cursorState; preSwap(udata); - app_eglSwapBuffers(this->display, this->surface, damage, this->noSwapDamage ? 0 : damageIdx); + app_eglSwapBuffers(this->display, this->surface, damage, + this->noSwapDamage ? 0 : damageIdx); + return true; } diff --git a/client/renderers/EGL/texture_dmabuf.c b/client/renderers/EGL/texture_dmabuf.c index 3839c8da..e1e927e2 100644 --- a/client/renderers/EGL/texture_dmabuf.c +++ b/client/renderers/EGL/texture_dmabuf.c @@ -157,7 +157,7 @@ static bool egl_texDMABUFUpdate(EGL_Texture * texture, break; } - if (image == EGL_NO_IMAGE) + if (unlikely(image == EGL_NO_IMAGE)) { const uint64_t modifier = DRM_FORMAT_MOD_LINEAR; EGLAttrib attribs[] = @@ -184,16 +184,16 @@ static bool egl_texDMABUFUpdate(EGL_Texture * texture, (EGLClientBuffer)NULL, attribs); - if (image == EGL_NO_IMAGE) + if (unlikely(image == EGL_NO_IMAGE)) { DEBUG_EGL_ERROR("Failed to create EGLImage for DMA transfer"); return false; } - if (!vector_push(&this->images, &(struct FdImage) { + if (unlikely(!vector_push(&this->images, &(struct FdImage) { .fd = update->dmaFD, .image = image, - })) + }))) { DEBUG_ERROR("Failed to store EGLImage"); g_egl_dynProcs.eglDestroyImage(this->display, image); @@ -206,7 +206,7 @@ static bool egl_texDMABUFUpdate(EGL_Texture * texture, glBindTexture(GL_TEXTURE_EXTERNAL_OES, parent->tex[parent->bufIndex]); g_egl_dynProcs.glEGLImageTargetTexture2DOES(GL_TEXTURE_EXTERNAL_OES, image); - if (parent->sync) + if (likely(parent->sync)) glDeleteSync(parent->sync); parent->sync = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0); diff --git a/client/src/main.c b/client/src/main.c index f9a97ae6..d2d0984a 100644 --- a/client/src/main.c +++ b/client/src/main.c @@ -209,17 +209,16 @@ static int renderThread(void * unused) struct timespec time; clock_gettime(CLOCK_MONOTONIC, &time); - while(g_state.state != APP_STATE_SHUTDOWN) + while(likely(g_state.state != APP_STATE_SHUTDOWN)) { - bool forceRender = false; - if (g_state.jitRender) - forceRender = g_state.ds->waitFrame(); - - app_handleRenderEvent(microtime()); if (g_state.jitRender) { + const bool forceRender = g_state.ds->waitFrame(); + app_handleRenderEvent(microtime()); + const uint64_t pending = atomic_load_explicit(&g_state.pendingCount, memory_order_acquire); + if (!lgResetEvent(g_state.frameEvent) && !forceRender && !pending @@ -235,9 +234,13 @@ static int renderThread(void * unused) } else if (g_params.fpsMin != 0) { + app_handleRenderEvent(microtime()); + float ups = atomic_load_explicit(&g_state.ups, memory_order_relaxed); - if (!lgWaitEventAbs(g_state.frameEvent, &time) || ups > g_params.fpsMin) + if (unlikely( + !lgWaitEventAbs(g_state.frameEvent, &time) || + ups > g_params.fpsMin)) { /* only update the time if we woke up early */ clock_gettime(CLOCK_MONOTONIC, &time); @@ -247,7 +250,7 @@ static int renderThread(void * unused) } int resize = atomic_load(&g_state.lgrResize); - if (resize) + if (unlikely(resize)) { g_state.io->DisplaySize = (ImVec2) { .x = g_state.windowW, @@ -286,8 +289,8 @@ static int renderThread(void * unused) renderQueue_process(); - if (!RENDERER(render, g_params.winRotate, newFrame, invalidate, - preSwapCallback, (void *)&renderStart)) + if (unlikely(!RENDERER(render, g_params.winRotate, newFrame, invalidate, + preSwapCallback, (void *)&renderStart))) { LG_UNLOCK(g_state.lgrLock); break; @@ -300,7 +303,7 @@ static int renderThread(void * unused) g_state.lastRenderTime = t; atomic_fetch_add_explicit(&g_state.renderCount, 1, memory_order_relaxed); - if (g_state.lastRenderTimeValid) + if (likely(g_state.lastRenderTimeValid)) { const float fdelta = (float)delta / 1e6f; ringbuffer_push(g_state.renderTimings, &fdelta); @@ -308,7 +311,9 @@ static int renderThread(void * unused) g_state.lastRenderTimeValid = true; const uint64_t now = microtime(); - if (!g_state.resizeDone && g_state.resizeTimeout < now) + if (unlikely( + !g_state.resizeDone && + g_state.resizeTimeout < now)) { if (g_params.autoResize) { @@ -1739,9 +1744,9 @@ restart: return -1; } - while(g_state.state == APP_STATE_RUNNING) + while(likely(g_state.state == APP_STATE_RUNNING)) { - if (!lgmpClientSessionValid(g_state.lgmp)) + if (unlikely(!lgmpClientSessionValid(g_state.lgmp))) { g_state.lgHostConnected = false; DEBUG_INFO("Waiting for the host to restart..."); diff --git a/common/include/common/util.h b/common/include/common/util.h index 193d228c..944cdfb3 100644 --- a/common/include/common/util.h +++ b/common/include/common/util.h @@ -40,4 +40,7 @@ #define ALIGN_TO(value, align) (((value) + (align) - 1) & -(align)) +#define unlikely(expr) __builtin_expect(!!(expr), 0) +#define likely(expr) __builtin_expect(!!(expr), 1) + #endif diff --git a/host/platform/Windows/capture/NVFBC/src/nvfbc.c b/host/platform/Windows/capture/NVFBC/src/nvfbc.c index 537d1a49..12515338 100644 --- a/host/platform/Windows/capture/NVFBC/src/nvfbc.c +++ b/host/platform/Windows/capture/NVFBC/src/nvfbc.c @@ -457,7 +457,7 @@ static CaptureResult nvfbc_capture(void) unsigned int width, height; getDesktopSize(&width, &height); - if (this->width != width || this->height != height) + if (unlikely(this->width != width || this->height != height)) { this->resChanged = true; this->width = width; @@ -476,7 +476,7 @@ static CaptureResult nvfbc_capture(void) &grabInfo ); - if (result != CAPTURE_RESULT_OK) + if (unlikely(result != CAPTURE_RESULT_OK)) return result; bool changed = false; @@ -651,15 +651,15 @@ done: static CaptureResult nvfbc_waitFrame(CaptureFrame * frame, const size_t maxFrameSize) { - if (this->stop) + if (unlikely(this->stop)) return CAPTURE_RESULT_REINIT; - if ( + if (unlikely( this->grabInfo.dwWidth != this->grabWidth || this->grabInfo.dwHeight != this->grabHeight || this->grabInfo.dwBufferWidth != this->grabStride || this->grabInfo.bIsHDR != this->isHDR || - this->resChanged) + this->resChanged)) { this->grabWidth = this->grabInfo.dwWidth; this->grabHeight = this->grabInfo.dwHeight; @@ -820,11 +820,11 @@ static CaptureResult nvfbc_getFrame(FrameBuffer * frame, int frameIndex) static int pointerThread(void * unused) { - while (!this->stop) + while (likely(!this->stop)) { lgWaitEvent(this->cursorEvent, TIMEOUT_INFINITE); - if (this->stop) + if (unlikely(this->stop)) break; CaptureResult result; @@ -832,14 +832,14 @@ static int pointerThread(void * unused) void * data; uint32_t size; - if (!this->getPointerBufferFn(&data, &size)) + if (unlikely(!this->getPointerBufferFn(&data, &size))) { DEBUG_WARN("failed to get a pointer buffer"); continue; } result = NvFBCToSysGetCursor(this->nvfbc, &pointer, data, size); - if (result != CAPTURE_RESULT_OK) + if (unlikely(result != CAPTURE_RESULT_OK)) { DEBUG_WARN("NvFBCToSysGetCursor failed"); continue; diff --git a/host/src/app.c b/host/src/app.c index 9d15491a..0cb2df46 100644 --- a/host/src/app.c +++ b/host/src/app.c @@ -927,14 +927,16 @@ int app_main(int argc, char * argv[]) } } - while(app.state != APP_STATE_SHUTDOWN && ( + while(likely(app.state != APP_STATE_SHUTDOWN && ( lgmpHostQueueHasSubs(app.pointerQueue) || - lgmpHostQueueHasSubs(app.frameQueue))) + lgmpHostQueueHasSubs(app.frameQueue)))) { - if (app.state == APP_STATE_RESTART || app.state == APP_STATE_REINIT) + if (unlikely( + app.state == APP_STATE_RESTART || + app.state == APP_STATE_REINIT)) break; - if (lgmpHostQueueNewSubs(app.pointerQueue) > 0) + if (unlikely(lgmpHostQueueNewSubs(app.pointerQueue) > 0)) { LG_LOCK(app.pointerLock); sendPointer(true); @@ -951,32 +953,37 @@ int app_main(int argc, char * argv[]) } const uint64_t captureStart = microtime(); - switch(app.iface->capture()) + const CaptureResult result = app.iface->capture(); + if (likely(result == CAPTURE_RESULT_OK)) + previousFrameTime = captureStart; + else if (likely(result == CAPTURE_RESULT_TIMEOUT)) { - case CAPTURE_RESULT_OK: - previousFrameTime = captureStart; - break; + if (!app.iface->asyncCapture) + if (unlikely(app.frameValid && + lgmpHostQueueNewSubs(app.frameQueue) > 0)) + { + LGMP_STATUS status; + if ((status = lgmpHostQueuePost(app.frameQueue, 0, + app.frameMemory[app.frameIndex])) != LGMP_OK) + DEBUG_ERROR("%s", lgmpStatusString(status)); + } + } + else + { + switch(result) + { + case CAPTURE_RESULT_REINIT: + app.state = APP_STATE_RESTART; + continue; - case CAPTURE_RESULT_TIMEOUT: - if (!app.iface->asyncCapture) - if (app.frameValid && lgmpHostQueueNewSubs(app.frameQueue) > 0) - { - LGMP_STATUS status; - if ((status = lgmpHostQueuePost(app.frameQueue, 0, - app.frameMemory[app.frameIndex])) != LGMP_OK) - DEBUG_ERROR("%s", lgmpStatusString(status)); - } + case CAPTURE_RESULT_ERROR: + DEBUG_ERROR("Capture interface reported a fatal error"); + exitcode = LG_HOST_EXIT_FAILED; + goto fail_capture; - continue; - - case CAPTURE_RESULT_REINIT: - app.state = APP_STATE_RESTART; - continue; - - case CAPTURE_RESULT_ERROR: - DEBUG_ERROR("Capture interface reported a fatal error"); - exitcode = LG_HOST_EXIT_FAILED; - goto fail_capture; + default: + DEBUG_ASSERT("Invalid capture result"); + } } if (!app.iface->asyncCapture)