From 3d534c9a81704a7a64e5bc4f5d2a24c12f987e07 Mon Sep 17 00:00:00 2001 From: TuxSH Date: Wed, 14 Jun 2017 19:35:03 +0200 Subject: [PATCH] Unschedule threads properly... ...instead of using a shitty yield when opening the Rosalina menu --- k11_extension/Makefile | 2 +- k11_extension/include/globals.h | 6 +- k11_extension/include/kernel.h | 3 +- k11_extension/include/svc.h | 1 + k11_extension/include/svc/KernelSetState.h | 1 - k11_extension/include/synchronization.h | 58 ++++++++++ k11_extension/source/globals.c | 5 +- k11_extension/source/main.c | 11 +- k11_extension/source/start.s | 2 + k11_extension/source/svc.c | 25 ++--- k11_extension/source/svc/KernelSetState.c | 5 + k11_extension/source/svcHandler.s | 4 +- k11_extension/source/synchronization.c | 123 ++++++++++++++++++++- k11_extension/source/utils.s | 5 + source/patches.c | 8 ++ 15 files changed, 229 insertions(+), 30 deletions(-) diff --git a/k11_extension/Makefile b/k11_extension/Makefile index 893b2e6..b0fbae8 100644 --- a/k11_extension/Makefile +++ b/k11_extension/Makefile @@ -14,7 +14,7 @@ dir_build := build ARCH := -mcpu=mpcore -mfpu=vfp ASFLAGS := $(ARCH) -CFLAGS := -Wall -Wextra -MMD -MP -marm $(ASFLAGS) -I$(dir_include) -fno-builtin -std=c11 -Wno-main -O2 -flto -ffast-math \ +CFLAGS := -Wall -Wextra -MMD -MP -marm $(ASFLAGS) -I$(dir_include) -fno-builtin -std=c11 -Wno-main -g -flto -O2 -ffast-math \ -mword-relocations -ffunction-sections -fdata-sections LDFLAGS := -nostdlib -Wl,--gc-sections,--nmagic $(ARCH) diff --git a/k11_extension/include/globals.h b/k11_extension/include/globals.h index f575bd1..289e426 100644 --- a/k11_extension/include/globals.h +++ b/k11_extension/include/globals.h @@ -78,7 +78,7 @@ extern s32 (*kernelToUsrStrncpy)(char *dst, const char *src, u32 len); extern void (*svcFallbackHandler)(u8 svcId); extern void (*kernelpanic)(void); -extern void (*PostprocessSvc)(void); +extern void (*officialPostProcessSvc)(void); extern Result (*SignalDebugEvent)(DebugEventType type, u32 info, ...); @@ -96,6 +96,8 @@ extern vu8 *configPage; extern u32 kernelVersion; extern FcramLayout fcramLayout; +extern KCoreContext *coreCtxs; + extern void *originalHandlers[8]; extern u32 nbSection0Modules; @@ -126,5 +128,5 @@ typedef struct PACKED CfwInfo extern CfwInfo cfwInfo; -extern u32 rosalinaState; +extern vu32 rosalinaState; extern bool hasStartedRosalinaNetworkFuncsOnce; diff --git a/k11_extension/include/kernel.h b/k11_extension/include/kernel.h index 75d8710..2c0e941 100644 --- a/k11_extension/include/kernel.h +++ b/k11_extension/include/kernel.h @@ -206,7 +206,7 @@ typedef struct PACKED ALIGN(4) KThread KMutexLinkedList *mutexList; KLinkedList mutexesUsed; s32 dynamicPriority; - u32 processor; + u32 coreId; KPreemptionTimer *preemptionTimer; u32 unknown_1; bool isAlive; @@ -950,6 +950,7 @@ typedef struct KCoreContext } KCoreContext; static KCoreContext * const currentCoreContext = (KCoreContext *)0xFFFF1000; +extern KCoreContext *coreCtxs; #define DEFINE_CONSOLE_SPECIFIC_STRUCTS(console, nbCores) /* 60 */ diff --git a/k11_extension/include/svc.h b/k11_extension/include/svc.h index b6c9c73..cfe046c 100644 --- a/k11_extension/include/svc.h +++ b/k11_extension/include/svc.h @@ -33,5 +33,6 @@ extern void *officialSVCs[0x7E]; +void postprocessSvc(void); void svcDefaultHandler(u8 svcId); void *svcHook(u8 *pageEnd); diff --git a/k11_extension/include/svc/KernelSetState.h b/k11_extension/include/svc/KernelSetState.h index 6b6e227..0c04c90 100644 --- a/k11_extension/include/svc/KernelSetState.h +++ b/k11_extension/include/svc/KernelSetState.h @@ -30,6 +30,5 @@ #include "kernel.h" #include "svc.h" -extern u32 rosalinaState; bool shouldSignalSyscallDebugEvent(KProcess *process, u8 svcId); Result KernelSetStateHook(u32 type, u32 varg1, u32 varg2, u32 varg3); diff --git a/k11_extension/include/synchronization.h b/k11_extension/include/synchronization.h index 96e11f3..a6c09c4 100644 --- a/k11_extension/include/synchronization.h +++ b/k11_extension/include/synchronization.h @@ -34,6 +34,14 @@ typedef KSchedulableInterruptEvent* (*SGI0Handler_t)(KBaseInterruptEvent *this, // http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.ddi0360f/CCHDIFIJ.html void executeFunctionOnCores(SGI0Handler_t func, u8 targetList, u8 targetListFilter); +void KScheduler__TriggerCrossCoreInterrupt(KScheduler *this); +void KThread__DebugReschedule(KThread *this, bool lock); +bool rosalinaThreadLockPredicate(KThread *thread); +void rosalinaRescheduleThread(KThread *thread, bool lock); +void rosalinaLockThread(KThread *thread); +void rosalinaLockAllThreads(void); +void rosalinaUnlockAllThreads(void); + // Taken from ctrulib: static inline void __dsb(void) @@ -59,3 +67,53 @@ static inline bool __strex(s32* addr, s32 val) __asm__ __volatile__("strex %[res], %[val], %[addr]" : [res] "=&r" (res) : [val] "r" (val), [addr] "Q" (*addr)); return res; } + +static inline s8 __ldrex8(s8* addr) +{ + s8 val; + __asm__ __volatile__("ldrexb %[val], %[addr]" : [val] "=r" (val) : [addr] "Q" (*addr)); + return val; +} + +static inline bool __strex8(s8* addr, s8 val) +{ + bool res; + __asm__ __volatile__("strexb %[res], %[val], %[addr]" : [res] "=&r" (res) : [val] "r" (val), [addr] "Q" (*addr)); + return res; +} + +static inline s16 __ldrex16(s16* addr) +{ + s16 val; + __asm__ __volatile__("ldrexh %[val], %[addr]" : [val] "=r" (val) : [addr] "Q" (*addr)); + return val; +} + +static inline bool __strex16(s16* addr, s16 val) +{ + bool res; + __asm__ __volatile__("strexh %[res], %[val], %[addr]" : [res] "=&r" (res) : [val] "r" (val), [addr] "Q" (*addr)); + return res; +} + +static inline u32 __get_cpsr(void) +{ + u32 cpsr; + __asm__ __volatile__("mrs %0, cpsr" : "=r"(cpsr)); + return cpsr; +} + +static inline void __set_cpsr_cx(u32 cpsr) +{ + __asm__ __volatile__("msr cpsr_cx, %0" :: "r"(cpsr)); +} + +static inline void __enable_irq(void) +{ + __asm__ __volatile__("cpsie i"); +} + +static inline void __disable_irq(void) +{ + __asm__ __volatile__("cpsid i"); +} diff --git a/k11_extension/source/globals.c b/k11_extension/source/globals.c index 2978e4c..f648ae2 100644 --- a/k11_extension/source/globals.c +++ b/k11_extension/source/globals.c @@ -74,7 +74,7 @@ s32 (*kernelToUsrStrncpy)(char *dst, const char *src, u32 len); void (*svcFallbackHandler)(u8 svcId); void (*kernelpanic)(void); -void (*PostprocessSvc)(void); +void (*officialPostProcessSvc)(void); Result (*SignalDebugEvent)(DebugEventType type, u32 info, ...); @@ -91,6 +91,7 @@ bool *isDevUnit; vu8 *configPage; u32 kernelVersion; FcramLayout fcramLayout; +KCoreContext *coreCtxs; void *originalHandlers[8] = {NULL}; @@ -107,5 +108,5 @@ void (*coreBarrier)(void); CfwInfo cfwInfo; -u32 rosalinaState; +vu32 rosalinaState; bool hasStartedRosalinaNetworkFuncsOnce; diff --git a/k11_extension/source/main.c b/k11_extension/source/main.c index 40a65f6..6c33ae5 100644 --- a/k11_extension/source/main.c +++ b/k11_extension/source/main.c @@ -96,6 +96,7 @@ void configHook(vu8 *cfgPage) *isDevUnit = true; // enable debug features } +void wat(u32 a, ...); static void findUsefulSymbols(void) { u32 *off; @@ -111,7 +112,7 @@ static void findUsefulSymbols(void) for(off = (u32 *)originalHandlers[2]; *off != 0xE1A00009; off++); svcFallbackHandler = (void (*)(u8))decodeARMBranch(off + 1); for(; *off != 0xE92D000F; off++); - PostprocessSvc = (void (*)(void))decodeARMBranch(off + 1); + officialPostProcessSvc = (void (*)(void))decodeARMBranch(off + 1); KProcessHandleTable__ToKProcess = (KProcess * (*)(KProcessHandleTable *, Handle))decodeARMBranch(5 + (u32 *)officialSVCs[0x76]); @@ -135,7 +136,7 @@ static void findUsefulSymbols(void) for(off = (u32 *)officialSVCs[0x19]; *off != 0xE1A04005; off++); KEvent__Clear = (Result (*)(KEvent *))decodeARMBranch(off + 1); - for(off = (u32 *)KEvent__Clear; *off != 0xE8BD8070; off++) + for(off = (u32 *)KEvent__Clear; *off != 0xE8BD8070; off++); synchronizationMutex = *(KObjectMutex **)(off + 1); for(off = (u32 *)officialSVCs[0x24]; *off != 0xE59F004C; off++); @@ -247,7 +248,7 @@ static void findUsefulSymbols(void) } } -void main(FcramLayout *layout) +void main(FcramLayout *layout, KCoreContext *ctxs) { struct KExtParameters *p = &kExtParameters; u32 TTBCR_; @@ -255,8 +256,9 @@ void main(FcramLayout *layout) layout->systemSize -= __end__ - __start__; fcramLayout = *layout; + coreCtxs = ctxs; - __asm__ volatile("mrc p15, 0, %0, c2, c0, 2" : "=r"(TTBCR_)); + __asm__ __volatile__("mrc p15, 0, %0, c2, c0, 2" : "=r"(TTBCR_)); TTBCR = TTBCR_; isN3DS = getNumberOfCores() == 4; memcpy(L1MMUTableAddrs, (const void *)p->L1MMUTableAddrs, 16); @@ -275,4 +277,5 @@ void main(FcramLayout *layout) rosalinaState = 0; hasStartedRosalinaNetworkFuncsOnce = false; + //wat(0xAA, criticalSectionLock); } diff --git a/k11_extension/source/start.s b/k11_extension/source/start.s index 02d25e6..fcefbfe 100644 --- a/k11_extension/source/start.s +++ b/k11_extension/source/start.s @@ -41,6 +41,7 @@ _start: .word kExtParameters .word 1 @ enableUserExceptionHandlersForCPUExc + b KThread__DebugReschedule start: @ Only core0 executes this, the other cores are running coreBarrier @@ -50,6 +51,7 @@ start: push {r0-r12, lr} sub r0, r4, #8 + sub r1, r8, #0x8000 bl main pop {r0-r12, pc} diff --git a/k11_extension/source/svc.c b/k11_extension/source/svc.c index d4ca18a..8f66e02 100644 --- a/k11_extension/source/svc.c +++ b/k11_extension/source/svc.c @@ -25,6 +25,7 @@ */ #include "memory.h" +#include "synchronization.h" #include "svc.h" #include "svc/ControlMemory.h" #include "svc/GetProcessInfo.h" @@ -47,24 +48,11 @@ void *officialSVCs[0x7E] = {NULL}; -static inline void yieldDuringRosalinaMenu(void) -{ - KProcess *currentProcess = currentCoreContext->objectContext.currentProcess; - - u64 titleId = codeSetOfProcess(currentProcess)->titleId; - u32 highTitleId = (u32)(titleId >> 32), lowTitleId = (u32)titleId; - while((rosalinaState & 1) && idOfProcess(currentProcess) >= nbSection0Modules && - (highTitleId != 0x00040130 || (highTitleId == 0x00040130 && (lowTitleId == 0x1A02 || lowTitleId == 0x1C02)))) - SleepThread(25 * 1000 * 1000LL); -} - void signalSvcEntry(u8 *pageEnd) { u32 svcId = (u32) *(u8 *)(pageEnd - 0xB5); KProcess *currentProcess = currentCoreContext->objectContext.currentProcess; - yieldDuringRosalinaMenu(); - if(svcId == 0xFE) svcId = *(u32 *)(pageEnd - 0x110 + 8 * 4); // r12 ; note: max theortical SVC atm: 0x3FFFFFFF. We don't support catching svcIds >= 0x100 atm either @@ -78,8 +66,6 @@ void signalSvcReturn(u8 *pageEnd) u32 svcId = (u32) *(u8 *)(pageEnd - 0xB5); KProcess *currentProcess = currentCoreContext->objectContext.currentProcess; - yieldDuringRosalinaMenu(); - if(svcId == 0xFE) svcId = *(u32 *)(pageEnd - 0x110 + 8 * 4); // r12 ; note: max theortical SVC atm: 0x1FFFFFFF. We don't support catching svcIds >= 0x100 atm either @@ -88,6 +74,15 @@ void signalSvcReturn(u8 *pageEnd) SignalDebugEvent(DBGEVENT_OUTPUT_STRING, 0xFFFFFFFF, svcId); } +void postprocessSvc(void) +{ + KThread *currentThread = currentCoreContext->objectContext.currentThread; + if(!currentThread->shallTerminate && rosalinaThreadLockPredicate(currentThread)) + rosalinaRescheduleThread(currentThread, true); + + officialPostProcessSvc(); +} + static bool doingVeryShittyPmResLimitWorkaround = false; // I feel dirty void *svcHook(u8 *pageEnd) diff --git a/k11_extension/source/svc/KernelSetState.c b/k11_extension/source/svc/KernelSetState.c index 1773eaa..38a15ab 100644 --- a/k11_extension/source/svc/KernelSetState.c +++ b/k11_extension/source/svc/KernelSetState.c @@ -107,6 +107,11 @@ Result KernelSetStateHook(u32 type, u32 varg1, u32 varg2, u32 varg3) if(rosalinaState & 2) hasStartedRosalinaNetworkFuncsOnce = true; + if(rosalinaState & 1) + rosalinaLockAllThreads(); + else if(varg1 & 1) + rosalinaUnlockAllThreads(); + break; } case 0x10001: diff --git a/k11_extension/source/svcHandler.s b/k11_extension/source/svcHandler.s index b8b5a69..1b07a62 100644 --- a/k11_extension/source/svcHandler.s +++ b/k11_extension/source/svcHandler.s @@ -113,9 +113,7 @@ svcHandler: push {r0-r7, r12, lr} push {r0-r3} - ldr r0, =PostprocessSvc - ldr r0, [r0] - blx r0 + bl postprocessSvc pop {r0-r3} ldrb lr, [sp, #0x58+0] @ page end - 0xb8 + 0: scheduling flags diff --git a/k11_extension/source/synchronization.c b/k11_extension/source/synchronization.c index 82b7687..6fde998 100644 --- a/k11_extension/source/synchronization.c +++ b/k11_extension/source/synchronization.c @@ -27,6 +27,7 @@ #include "synchronization.h" #include "utils.h" #include "kernel.h" +#include "globals.h" extern SGI0Handler_t SGI0Handler; @@ -36,6 +37,126 @@ void executeFunctionOnCores(SGI0Handler_t handler, u8 targetList, u8 targetListF SGI0Handler = handler; if(targetListFilter == 0 && (targetListFilter & (1 << coreID)) != 0) - __asm__ volatile("cpsie i"); // make sure interrupts aren't masked + __enable_irq(); // make sure interrupts aren't masked MPCORE_GID_SGI = (targetListFilter << 24) | (targetList << 16) | 0; } + +void KScheduler__TriggerCrossCoreInterrupt(KScheduler *this) +{ + this->triggerCrossCoreInterrupt = false; + for(s16 i = 0; i < (s16)getNumberOfCores(); i++) + { + if(this->coreNumber != i) + MPCORE_GID_SGI = (1 << (16 + i)) | 8; + } +} + +void KThread__DebugReschedule(KThread *this, bool lock) +{ + KRecursiveLock__Lock(criticalSectionLock); + + u32 oldSchedulingMask = this->schedulingMask; + if(lock) // the original k11 function discards the other flags + this->schedulingMask |= 0x80; + else + this->schedulingMask &= ~0x80; + + KScheduler__AdjustThread(currentCoreContext->objectContext.currentScheduler, this, oldSchedulingMask); + + KRecursiveLock__Unlock(criticalSectionLock); +} + +bool rosalinaThreadLockPredicate(KThread *thread) +{ + KProcess *process = thread->ownerProcess; + if(process == NULL) + return false; + + u64 titleId = codeSetOfProcess(process)->titleId; + u32 highTitleId = (u32)(titleId >> 32), lowTitleId = (u32)titleId; + return + ((rosalinaState & 1) && idOfProcess(process) >= nbSection0Modules && + (highTitleId != 0x00040130 || (highTitleId == 0x00040130 && (lowTitleId == 0x1A02 || lowTitleId == 0x1C02)))); +} + +void rosalinaRescheduleThread(KThread *thread, bool lock) +{ + KRecursiveLock__Lock(criticalSectionLock); + + u32 oldSchedulingMask = thread->schedulingMask; + if(lock) + thread->schedulingMask |= 0x40; + else + thread->schedulingMask &= ~0x40; + + KScheduler__AdjustThread(currentCoreContext->objectContext.currentScheduler, thread, oldSchedulingMask); + + KRecursiveLock__Unlock(criticalSectionLock); +} + +void rosalinaLockThread(KThread *thread) +{ + KThread *syncThread = synchronizationMutex->owner; + s8 *eotc = (s8 *)thread->endOfThreadContext; + + if(syncThread == NULL || syncThread != thread) + rosalinaRescheduleThread(thread, true); +} + +void rosalinaLockAllThreads(void) +{ + bool currentThreadsFound = false; + + KRecursiveLock__Lock(criticalSectionLock); + for(KLinkedListNode *node = threadList->list.nodes.first; node != (KLinkedListNode *)&threadList->list.nodes; node = node->next) + { + KThread *thread = (KThread *)node->key; + if(!rosalinaThreadLockPredicate(thread)) + continue; + if(thread == coreCtxs[thread->coreId].objectContext.currentThread) + currentThreadsFound = true; + else + rosalinaLockThread(thread); + } + + if(currentThreadsFound) + { + for(KLinkedListNode *node = threadList->list.nodes.first; node != (KLinkedListNode *)&threadList->list.nodes; node = node->next) + { + KThread *thread = (KThread *)node->key; + if(!rosalinaThreadLockPredicate(thread)) + continue; + if(!(thread->schedulingMask & 0x40)) + { + rosalinaLockThread(thread); + KRecursiveLock__Lock(criticalSectionLock); + if(thread->coreId != getCurrentCoreID()) + { + u32 cpsr = __get_cpsr(); + __disable_irq(); + coreCtxs[thread->coreId].objectContext.currentScheduler->triggerCrossCoreInterrupt = true; + currentCoreContext->objectContext.currentScheduler->triggerCrossCoreInterrupt = true; + __set_cpsr_cx(cpsr); + } + KRecursiveLock__Unlock(criticalSectionLock); + } + } + KScheduler__TriggerCrossCoreInterrupt(currentCoreContext->objectContext.currentScheduler); + } + KRecursiveLock__Unlock(criticalSectionLock); +} + +void rosalinaUnlockAllThreads(void) +{ + for(KLinkedListNode *node = threadList->list.nodes.first; node != (KLinkedListNode *)&threadList->list.nodes; node = node->next) + { + KThread *thread = (KThread *)node->key; + s8 *eotc = (s8 *)thread->endOfThreadContext; + + if((thread->schedulingMask & 0xF) == 2) // thread is terminating + continue; + + if(thread->schedulingMask & 0x40) + rosalinaRescheduleThread(thread, false); + } +} diff --git a/k11_extension/source/utils.s b/k11_extension/source/utils.s index f5ea72a..16a48eb 100644 --- a/k11_extension/source/utils.s +++ b/k11_extension/source/utils.s @@ -118,6 +118,11 @@ safecpy: _safecpy_end: +.global wat +.type wat, %function +wat: + bkpt 1 + bx lr .section .rodata .global safecpy_sz diff --git a/source/patches.c b/source/patches.c index f1b1df7..2d5ad87 100644 --- a/source/patches.c +++ b/source/patches.c @@ -201,6 +201,7 @@ u32 installK11Extension(u8 *pos, u32 size, bool isSafeMode, u32 baseK11VA, u32 * u32 patchKernel11(u8 *pos, u32 size, u32 baseK11VA, u32 *arm11SvcTable, u32 *arm11ExceptionsPage) { static const u8 patternKPanic[] = {0x02, 0x0B, 0x44, 0xE2}; + static const u8 patternKThreadDebugReschedule[] = {0x34, 0x20, 0xD4, 0xE5, 0x00, 0x00, 0x55, 0xE3, 0x80, 0x00, 0xA0, 0x13}; //Assumption: ControlMemory, DebugActiveProcess and KernelSetState are in the first 0x20000 bytes //Patch ControlMemory @@ -240,6 +241,13 @@ u32 patchKernel11(u8 *pos, u32 size, u32 baseK11VA, u32 *arm11SvcTable, u32 *arm for(off = arm11ExceptionsPage; *off != 0x96007F9; off++); off[1] = 0x40000028; + off = (u32 *)memsearch(pos, patternKThreadDebugReschedule, size, sizeof(patternKThreadDebugReschedule)); + if(off == NULL) + return 1; + + off[-5] = 0xE51FF004; + off[-4] = 0x4000002C; + return 0; }