Unschedule threads properly...

...instead of using a shitty yield when opening the Rosalina menu
This commit is contained in:
TuxSH 2017-06-14 19:35:03 +02:00
parent 24de7c5272
commit 3d534c9a81
15 changed files with 229 additions and 30 deletions

View File

@ -14,7 +14,7 @@ dir_build := build
ARCH := -mcpu=mpcore -mfpu=vfp ARCH := -mcpu=mpcore -mfpu=vfp
ASFLAGS := $(ARCH) ASFLAGS := $(ARCH)
CFLAGS := -Wall -Wextra -MMD -MP -marm $(ASFLAGS) -I$(dir_include) -fno-builtin -std=c11 -Wno-main -O2 -flto -ffast-math \ CFLAGS := -Wall -Wextra -MMD -MP -marm $(ASFLAGS) -I$(dir_include) -fno-builtin -std=c11 -Wno-main -g -flto -O2 -ffast-math \
-mword-relocations -ffunction-sections -fdata-sections -mword-relocations -ffunction-sections -fdata-sections
LDFLAGS := -nostdlib -Wl,--gc-sections,--nmagic $(ARCH) LDFLAGS := -nostdlib -Wl,--gc-sections,--nmagic $(ARCH)

View File

@ -78,7 +78,7 @@ extern s32 (*kernelToUsrStrncpy)(char *dst, const char *src, u32 len);
extern void (*svcFallbackHandler)(u8 svcId); extern void (*svcFallbackHandler)(u8 svcId);
extern void (*kernelpanic)(void); extern void (*kernelpanic)(void);
extern void (*PostprocessSvc)(void); extern void (*officialPostProcessSvc)(void);
extern Result (*SignalDebugEvent)(DebugEventType type, u32 info, ...); extern Result (*SignalDebugEvent)(DebugEventType type, u32 info, ...);
@ -96,6 +96,8 @@ extern vu8 *configPage;
extern u32 kernelVersion; extern u32 kernelVersion;
extern FcramLayout fcramLayout; extern FcramLayout fcramLayout;
extern KCoreContext *coreCtxs;
extern void *originalHandlers[8]; extern void *originalHandlers[8];
extern u32 nbSection0Modules; extern u32 nbSection0Modules;
@ -126,5 +128,5 @@ typedef struct PACKED CfwInfo
extern CfwInfo cfwInfo; extern CfwInfo cfwInfo;
extern u32 rosalinaState; extern vu32 rosalinaState;
extern bool hasStartedRosalinaNetworkFuncsOnce; extern bool hasStartedRosalinaNetworkFuncsOnce;

View File

@ -206,7 +206,7 @@ typedef struct PACKED ALIGN(4) KThread
KMutexLinkedList *mutexList; KMutexLinkedList *mutexList;
KLinkedList mutexesUsed; KLinkedList mutexesUsed;
s32 dynamicPriority; s32 dynamicPriority;
u32 processor; u32 coreId;
KPreemptionTimer *preemptionTimer; KPreemptionTimer *preemptionTimer;
u32 unknown_1; u32 unknown_1;
bool isAlive; bool isAlive;
@ -950,6 +950,7 @@ typedef struct KCoreContext
} KCoreContext; } KCoreContext;
static KCoreContext * const currentCoreContext = (KCoreContext *)0xFFFF1000; static KCoreContext * const currentCoreContext = (KCoreContext *)0xFFFF1000;
extern KCoreContext *coreCtxs;
#define DEFINE_CONSOLE_SPECIFIC_STRUCTS(console, nbCores) #define DEFINE_CONSOLE_SPECIFIC_STRUCTS(console, nbCores)
/* 60 */ /* 60 */

View File

@ -33,5 +33,6 @@
extern void *officialSVCs[0x7E]; extern void *officialSVCs[0x7E];
void postprocessSvc(void);
void svcDefaultHandler(u8 svcId); void svcDefaultHandler(u8 svcId);
void *svcHook(u8 *pageEnd); void *svcHook(u8 *pageEnd);

View File

@ -30,6 +30,5 @@
#include "kernel.h" #include "kernel.h"
#include "svc.h" #include "svc.h"
extern u32 rosalinaState;
bool shouldSignalSyscallDebugEvent(KProcess *process, u8 svcId); bool shouldSignalSyscallDebugEvent(KProcess *process, u8 svcId);
Result KernelSetStateHook(u32 type, u32 varg1, u32 varg2, u32 varg3); Result KernelSetStateHook(u32 type, u32 varg1, u32 varg2, u32 varg3);

View File

@ -34,6 +34,14 @@ typedef KSchedulableInterruptEvent* (*SGI0Handler_t)(KBaseInterruptEvent *this,
// http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.ddi0360f/CCHDIFIJ.html // http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.ddi0360f/CCHDIFIJ.html
void executeFunctionOnCores(SGI0Handler_t func, u8 targetList, u8 targetListFilter); void executeFunctionOnCores(SGI0Handler_t func, u8 targetList, u8 targetListFilter);
void KScheduler__TriggerCrossCoreInterrupt(KScheduler *this);
void KThread__DebugReschedule(KThread *this, bool lock);
bool rosalinaThreadLockPredicate(KThread *thread);
void rosalinaRescheduleThread(KThread *thread, bool lock);
void rosalinaLockThread(KThread *thread);
void rosalinaLockAllThreads(void);
void rosalinaUnlockAllThreads(void);
// Taken from ctrulib: // Taken from ctrulib:
static inline void __dsb(void) static inline void __dsb(void)
@ -59,3 +67,53 @@ static inline bool __strex(s32* addr, s32 val)
__asm__ __volatile__("strex %[res], %[val], %[addr]" : [res] "=&r" (res) : [val] "r" (val), [addr] "Q" (*addr)); __asm__ __volatile__("strex %[res], %[val], %[addr]" : [res] "=&r" (res) : [val] "r" (val), [addr] "Q" (*addr));
return res; return res;
} }
static inline s8 __ldrex8(s8* addr)
{
s8 val;
__asm__ __volatile__("ldrexb %[val], %[addr]" : [val] "=r" (val) : [addr] "Q" (*addr));
return val;
}
static inline bool __strex8(s8* addr, s8 val)
{
bool res;
__asm__ __volatile__("strexb %[res], %[val], %[addr]" : [res] "=&r" (res) : [val] "r" (val), [addr] "Q" (*addr));
return res;
}
static inline s16 __ldrex16(s16* addr)
{
s16 val;
__asm__ __volatile__("ldrexh %[val], %[addr]" : [val] "=r" (val) : [addr] "Q" (*addr));
return val;
}
static inline bool __strex16(s16* addr, s16 val)
{
bool res;
__asm__ __volatile__("strexh %[res], %[val], %[addr]" : [res] "=&r" (res) : [val] "r" (val), [addr] "Q" (*addr));
return res;
}
static inline u32 __get_cpsr(void)
{
u32 cpsr;
__asm__ __volatile__("mrs %0, cpsr" : "=r"(cpsr));
return cpsr;
}
static inline void __set_cpsr_cx(u32 cpsr)
{
__asm__ __volatile__("msr cpsr_cx, %0" :: "r"(cpsr));
}
static inline void __enable_irq(void)
{
__asm__ __volatile__("cpsie i");
}
static inline void __disable_irq(void)
{
__asm__ __volatile__("cpsid i");
}

View File

@ -74,7 +74,7 @@ s32 (*kernelToUsrStrncpy)(char *dst, const char *src, u32 len);
void (*svcFallbackHandler)(u8 svcId); void (*svcFallbackHandler)(u8 svcId);
void (*kernelpanic)(void); void (*kernelpanic)(void);
void (*PostprocessSvc)(void); void (*officialPostProcessSvc)(void);
Result (*SignalDebugEvent)(DebugEventType type, u32 info, ...); Result (*SignalDebugEvent)(DebugEventType type, u32 info, ...);
@ -91,6 +91,7 @@ bool *isDevUnit;
vu8 *configPage; vu8 *configPage;
u32 kernelVersion; u32 kernelVersion;
FcramLayout fcramLayout; FcramLayout fcramLayout;
KCoreContext *coreCtxs;
void *originalHandlers[8] = {NULL}; void *originalHandlers[8] = {NULL};
@ -107,5 +108,5 @@ void (*coreBarrier)(void);
CfwInfo cfwInfo; CfwInfo cfwInfo;
u32 rosalinaState; vu32 rosalinaState;
bool hasStartedRosalinaNetworkFuncsOnce; bool hasStartedRosalinaNetworkFuncsOnce;

View File

@ -96,6 +96,7 @@ void configHook(vu8 *cfgPage)
*isDevUnit = true; // enable debug features *isDevUnit = true; // enable debug features
} }
void wat(u32 a, ...);
static void findUsefulSymbols(void) static void findUsefulSymbols(void)
{ {
u32 *off; u32 *off;
@ -111,7 +112,7 @@ static void findUsefulSymbols(void)
for(off = (u32 *)originalHandlers[2]; *off != 0xE1A00009; off++); for(off = (u32 *)originalHandlers[2]; *off != 0xE1A00009; off++);
svcFallbackHandler = (void (*)(u8))decodeARMBranch(off + 1); svcFallbackHandler = (void (*)(u8))decodeARMBranch(off + 1);
for(; *off != 0xE92D000F; off++); for(; *off != 0xE92D000F; off++);
PostprocessSvc = (void (*)(void))decodeARMBranch(off + 1); officialPostProcessSvc = (void (*)(void))decodeARMBranch(off + 1);
KProcessHandleTable__ToKProcess = (KProcess * (*)(KProcessHandleTable *, Handle))decodeARMBranch(5 + (u32 *)officialSVCs[0x76]); KProcessHandleTable__ToKProcess = (KProcess * (*)(KProcessHandleTable *, Handle))decodeARMBranch(5 + (u32 *)officialSVCs[0x76]);
@ -135,7 +136,7 @@ static void findUsefulSymbols(void)
for(off = (u32 *)officialSVCs[0x19]; *off != 0xE1A04005; off++); for(off = (u32 *)officialSVCs[0x19]; *off != 0xE1A04005; off++);
KEvent__Clear = (Result (*)(KEvent *))decodeARMBranch(off + 1); KEvent__Clear = (Result (*)(KEvent *))decodeARMBranch(off + 1);
for(off = (u32 *)KEvent__Clear; *off != 0xE8BD8070; off++) for(off = (u32 *)KEvent__Clear; *off != 0xE8BD8070; off++);
synchronizationMutex = *(KObjectMutex **)(off + 1); synchronizationMutex = *(KObjectMutex **)(off + 1);
for(off = (u32 *)officialSVCs[0x24]; *off != 0xE59F004C; off++); for(off = (u32 *)officialSVCs[0x24]; *off != 0xE59F004C; off++);
@ -247,7 +248,7 @@ static void findUsefulSymbols(void)
} }
} }
void main(FcramLayout *layout) void main(FcramLayout *layout, KCoreContext *ctxs)
{ {
struct KExtParameters *p = &kExtParameters; struct KExtParameters *p = &kExtParameters;
u32 TTBCR_; u32 TTBCR_;
@ -255,8 +256,9 @@ void main(FcramLayout *layout)
layout->systemSize -= __end__ - __start__; layout->systemSize -= __end__ - __start__;
fcramLayout = *layout; fcramLayout = *layout;
coreCtxs = ctxs;
__asm__ volatile("mrc p15, 0, %0, c2, c0, 2" : "=r"(TTBCR_)); __asm__ __volatile__("mrc p15, 0, %0, c2, c0, 2" : "=r"(TTBCR_));
TTBCR = TTBCR_; TTBCR = TTBCR_;
isN3DS = getNumberOfCores() == 4; isN3DS = getNumberOfCores() == 4;
memcpy(L1MMUTableAddrs, (const void *)p->L1MMUTableAddrs, 16); memcpy(L1MMUTableAddrs, (const void *)p->L1MMUTableAddrs, 16);
@ -275,4 +277,5 @@ void main(FcramLayout *layout)
rosalinaState = 0; rosalinaState = 0;
hasStartedRosalinaNetworkFuncsOnce = false; hasStartedRosalinaNetworkFuncsOnce = false;
//wat(0xAA, criticalSectionLock);
} }

View File

@ -41,6 +41,7 @@ _start:
.word kExtParameters .word kExtParameters
.word 1 @ enableUserExceptionHandlersForCPUExc .word 1 @ enableUserExceptionHandlersForCPUExc
b KThread__DebugReschedule
start: start:
@ Only core0 executes this, the other cores are running coreBarrier @ Only core0 executes this, the other cores are running coreBarrier
@ -50,6 +51,7 @@ start:
push {r0-r12, lr} push {r0-r12, lr}
sub r0, r4, #8 sub r0, r4, #8
sub r1, r8, #0x8000
bl main bl main
pop {r0-r12, pc} pop {r0-r12, pc}

View File

@ -25,6 +25,7 @@
*/ */
#include "memory.h" #include "memory.h"
#include "synchronization.h"
#include "svc.h" #include "svc.h"
#include "svc/ControlMemory.h" #include "svc/ControlMemory.h"
#include "svc/GetProcessInfo.h" #include "svc/GetProcessInfo.h"
@ -47,24 +48,11 @@
void *officialSVCs[0x7E] = {NULL}; void *officialSVCs[0x7E] = {NULL};
static inline void yieldDuringRosalinaMenu(void)
{
KProcess *currentProcess = currentCoreContext->objectContext.currentProcess;
u64 titleId = codeSetOfProcess(currentProcess)->titleId;
u32 highTitleId = (u32)(titleId >> 32), lowTitleId = (u32)titleId;
while((rosalinaState & 1) && idOfProcess(currentProcess) >= nbSection0Modules &&
(highTitleId != 0x00040130 || (highTitleId == 0x00040130 && (lowTitleId == 0x1A02 || lowTitleId == 0x1C02))))
SleepThread(25 * 1000 * 1000LL);
}
void signalSvcEntry(u8 *pageEnd) void signalSvcEntry(u8 *pageEnd)
{ {
u32 svcId = (u32) *(u8 *)(pageEnd - 0xB5); u32 svcId = (u32) *(u8 *)(pageEnd - 0xB5);
KProcess *currentProcess = currentCoreContext->objectContext.currentProcess; KProcess *currentProcess = currentCoreContext->objectContext.currentProcess;
yieldDuringRosalinaMenu();
if(svcId == 0xFE) if(svcId == 0xFE)
svcId = *(u32 *)(pageEnd - 0x110 + 8 * 4); // r12 ; note: max theortical SVC atm: 0x3FFFFFFF. We don't support catching svcIds >= 0x100 atm either svcId = *(u32 *)(pageEnd - 0x110 + 8 * 4); // r12 ; note: max theortical SVC atm: 0x3FFFFFFF. We don't support catching svcIds >= 0x100 atm either
@ -78,8 +66,6 @@ void signalSvcReturn(u8 *pageEnd)
u32 svcId = (u32) *(u8 *)(pageEnd - 0xB5); u32 svcId = (u32) *(u8 *)(pageEnd - 0xB5);
KProcess *currentProcess = currentCoreContext->objectContext.currentProcess; KProcess *currentProcess = currentCoreContext->objectContext.currentProcess;
yieldDuringRosalinaMenu();
if(svcId == 0xFE) if(svcId == 0xFE)
svcId = *(u32 *)(pageEnd - 0x110 + 8 * 4); // r12 ; note: max theortical SVC atm: 0x1FFFFFFF. We don't support catching svcIds >= 0x100 atm either svcId = *(u32 *)(pageEnd - 0x110 + 8 * 4); // r12 ; note: max theortical SVC atm: 0x1FFFFFFF. We don't support catching svcIds >= 0x100 atm either
@ -88,6 +74,15 @@ void signalSvcReturn(u8 *pageEnd)
SignalDebugEvent(DBGEVENT_OUTPUT_STRING, 0xFFFFFFFF, svcId); SignalDebugEvent(DBGEVENT_OUTPUT_STRING, 0xFFFFFFFF, svcId);
} }
void postprocessSvc(void)
{
KThread *currentThread = currentCoreContext->objectContext.currentThread;
if(!currentThread->shallTerminate && rosalinaThreadLockPredicate(currentThread))
rosalinaRescheduleThread(currentThread, true);
officialPostProcessSvc();
}
static bool doingVeryShittyPmResLimitWorkaround = false; // I feel dirty static bool doingVeryShittyPmResLimitWorkaround = false; // I feel dirty
void *svcHook(u8 *pageEnd) void *svcHook(u8 *pageEnd)

View File

@ -107,6 +107,11 @@ Result KernelSetStateHook(u32 type, u32 varg1, u32 varg2, u32 varg3)
if(rosalinaState & 2) if(rosalinaState & 2)
hasStartedRosalinaNetworkFuncsOnce = true; hasStartedRosalinaNetworkFuncsOnce = true;
if(rosalinaState & 1)
rosalinaLockAllThreads();
else if(varg1 & 1)
rosalinaUnlockAllThreads();
break; break;
} }
case 0x10001: case 0x10001:

View File

@ -113,9 +113,7 @@ svcHandler:
push {r0-r7, r12, lr} push {r0-r7, r12, lr}
push {r0-r3} push {r0-r3}
ldr r0, =PostprocessSvc bl postprocessSvc
ldr r0, [r0]
blx r0
pop {r0-r3} pop {r0-r3}
ldrb lr, [sp, #0x58+0] @ page end - 0xb8 + 0: scheduling flags ldrb lr, [sp, #0x58+0] @ page end - 0xb8 + 0: scheduling flags

View File

@ -27,6 +27,7 @@
#include "synchronization.h" #include "synchronization.h"
#include "utils.h" #include "utils.h"
#include "kernel.h" #include "kernel.h"
#include "globals.h"
extern SGI0Handler_t SGI0Handler; extern SGI0Handler_t SGI0Handler;
@ -36,6 +37,126 @@ void executeFunctionOnCores(SGI0Handler_t handler, u8 targetList, u8 targetListF
SGI0Handler = handler; SGI0Handler = handler;
if(targetListFilter == 0 && (targetListFilter & (1 << coreID)) != 0) if(targetListFilter == 0 && (targetListFilter & (1 << coreID)) != 0)
__asm__ volatile("cpsie i"); // make sure interrupts aren't masked __enable_irq(); // make sure interrupts aren't masked
MPCORE_GID_SGI = (targetListFilter << 24) | (targetList << 16) | 0; MPCORE_GID_SGI = (targetListFilter << 24) | (targetList << 16) | 0;
} }
void KScheduler__TriggerCrossCoreInterrupt(KScheduler *this)
{
this->triggerCrossCoreInterrupt = false;
for(s16 i = 0; i < (s16)getNumberOfCores(); i++)
{
if(this->coreNumber != i)
MPCORE_GID_SGI = (1 << (16 + i)) | 8;
}
}
void KThread__DebugReschedule(KThread *this, bool lock)
{
KRecursiveLock__Lock(criticalSectionLock);
u32 oldSchedulingMask = this->schedulingMask;
if(lock) // the original k11 function discards the other flags
this->schedulingMask |= 0x80;
else
this->schedulingMask &= ~0x80;
KScheduler__AdjustThread(currentCoreContext->objectContext.currentScheduler, this, oldSchedulingMask);
KRecursiveLock__Unlock(criticalSectionLock);
}
bool rosalinaThreadLockPredicate(KThread *thread)
{
KProcess *process = thread->ownerProcess;
if(process == NULL)
return false;
u64 titleId = codeSetOfProcess(process)->titleId;
u32 highTitleId = (u32)(titleId >> 32), lowTitleId = (u32)titleId;
return
((rosalinaState & 1) && idOfProcess(process) >= nbSection0Modules &&
(highTitleId != 0x00040130 || (highTitleId == 0x00040130 && (lowTitleId == 0x1A02 || lowTitleId == 0x1C02))));
}
void rosalinaRescheduleThread(KThread *thread, bool lock)
{
KRecursiveLock__Lock(criticalSectionLock);
u32 oldSchedulingMask = thread->schedulingMask;
if(lock)
thread->schedulingMask |= 0x40;
else
thread->schedulingMask &= ~0x40;
KScheduler__AdjustThread(currentCoreContext->objectContext.currentScheduler, thread, oldSchedulingMask);
KRecursiveLock__Unlock(criticalSectionLock);
}
void rosalinaLockThread(KThread *thread)
{
KThread *syncThread = synchronizationMutex->owner;
s8 *eotc = (s8 *)thread->endOfThreadContext;
if(syncThread == NULL || syncThread != thread)
rosalinaRescheduleThread(thread, true);
}
void rosalinaLockAllThreads(void)
{
bool currentThreadsFound = false;
KRecursiveLock__Lock(criticalSectionLock);
for(KLinkedListNode *node = threadList->list.nodes.first; node != (KLinkedListNode *)&threadList->list.nodes; node = node->next)
{
KThread *thread = (KThread *)node->key;
if(!rosalinaThreadLockPredicate(thread))
continue;
if(thread == coreCtxs[thread->coreId].objectContext.currentThread)
currentThreadsFound = true;
else
rosalinaLockThread(thread);
}
if(currentThreadsFound)
{
for(KLinkedListNode *node = threadList->list.nodes.first; node != (KLinkedListNode *)&threadList->list.nodes; node = node->next)
{
KThread *thread = (KThread *)node->key;
if(!rosalinaThreadLockPredicate(thread))
continue;
if(!(thread->schedulingMask & 0x40))
{
rosalinaLockThread(thread);
KRecursiveLock__Lock(criticalSectionLock);
if(thread->coreId != getCurrentCoreID())
{
u32 cpsr = __get_cpsr();
__disable_irq();
coreCtxs[thread->coreId].objectContext.currentScheduler->triggerCrossCoreInterrupt = true;
currentCoreContext->objectContext.currentScheduler->triggerCrossCoreInterrupt = true;
__set_cpsr_cx(cpsr);
}
KRecursiveLock__Unlock(criticalSectionLock);
}
}
KScheduler__TriggerCrossCoreInterrupt(currentCoreContext->objectContext.currentScheduler);
}
KRecursiveLock__Unlock(criticalSectionLock);
}
void rosalinaUnlockAllThreads(void)
{
for(KLinkedListNode *node = threadList->list.nodes.first; node != (KLinkedListNode *)&threadList->list.nodes; node = node->next)
{
KThread *thread = (KThread *)node->key;
s8 *eotc = (s8 *)thread->endOfThreadContext;
if((thread->schedulingMask & 0xF) == 2) // thread is terminating
continue;
if(thread->schedulingMask & 0x40)
rosalinaRescheduleThread(thread, false);
}
}

View File

@ -118,6 +118,11 @@ safecpy:
_safecpy_end: _safecpy_end:
.global wat
.type wat, %function
wat:
bkpt 1
bx lr
.section .rodata .section .rodata
.global safecpy_sz .global safecpy_sz

View File

@ -201,6 +201,7 @@ u32 installK11Extension(u8 *pos, u32 size, bool isSafeMode, u32 baseK11VA, u32 *
u32 patchKernel11(u8 *pos, u32 size, u32 baseK11VA, u32 *arm11SvcTable, u32 *arm11ExceptionsPage) u32 patchKernel11(u8 *pos, u32 size, u32 baseK11VA, u32 *arm11SvcTable, u32 *arm11ExceptionsPage)
{ {
static const u8 patternKPanic[] = {0x02, 0x0B, 0x44, 0xE2}; static const u8 patternKPanic[] = {0x02, 0x0B, 0x44, 0xE2};
static const u8 patternKThreadDebugReschedule[] = {0x34, 0x20, 0xD4, 0xE5, 0x00, 0x00, 0x55, 0xE3, 0x80, 0x00, 0xA0, 0x13};
//Assumption: ControlMemory, DebugActiveProcess and KernelSetState are in the first 0x20000 bytes //Assumption: ControlMemory, DebugActiveProcess and KernelSetState are in the first 0x20000 bytes
//Patch ControlMemory //Patch ControlMemory
@ -240,6 +241,13 @@ u32 patchKernel11(u8 *pos, u32 size, u32 baseK11VA, u32 *arm11SvcTable, u32 *arm
for(off = arm11ExceptionsPage; *off != 0x96007F9; off++); for(off = arm11ExceptionsPage; *off != 0x96007F9; off++);
off[1] = 0x40000028; off[1] = 0x40000028;
off = (u32 *)memsearch(pos, patternKThreadDebugReschedule, size, sizeof(patternKThreadDebugReschedule));
if(off == NULL)
return 1;
off[-5] = 0xE51FF004;
off[-4] = 0x4000002C;
return 0; return 0;
} }