From 4d42b2d100fc11551f3cee9dd2e7bf4ca57eca42 Mon Sep 17 00:00:00 2001 From: Angus Gratton Date: Mon, 13 Feb 2017 14:46:37 +1100 Subject: [PATCH] freertos spinlock/portmux: Reduce spinlocking overhead Ref TW7117 Microbenchmarks in unit tests: (All numbers in cycles per benchmarked operation): Release mode No lock contention lock/unlock - 301 -> 167 (-45%) Recursive no contention lock/unlock - 289 -> 148 (-49%) Lock contention two CPUs (lock/unlock) 699 -> 400 (-43%) Debug mode No lock contention lock/unlock - 355 -> 203 (-43%) Recursive no contention lock/unlock - 345 -> 188 (-46%) Lock contention two CPUs (lock/unlock) 761 -> 483 (-36%) --- .../freertos/include/freertos/FreeRTOS.h | 15 +- .../freertos/include/freertos/portable.h | 2 +- .../freertos/include/freertos/portmacro.h | 33 ++-- .../include/freertos/xtensa_context.h | 10 +- components/freertos/port.c | 165 +++++++++++------- components/freertos/tasks.c | 37 ++-- 6 files changed, 152 insertions(+), 110 deletions(-) diff --git a/components/freertos/include/freertos/FreeRTOS.h b/components/freertos/include/freertos/FreeRTOS.h index 31b66fb850..d1e5d4eed1 100644 --- a/components/freertos/include/freertos/FreeRTOS.h +++ b/components/freertos/include/freertos/FreeRTOS.h @@ -978,13 +978,14 @@ typedef struct xSTATIC_QUEUE uint8_t ucDummy9; #endif - struct { - volatile uint32_t ucDummy10; - #ifdef CONFIG_FREERTOS_PORTMUX_DEBUG - void *pvDummy8; - UBaseType_t uxDummy11; - #endif - } sDummy12; + struct { + volatile uint32_t ucDummy10; + uint32_t ucDummy11; + #ifdef CONFIG_FREERTOS_PORTMUX_DEBUG + void *pvDummy8; + UBaseType_t uxDummy12; + #endif + } sDummy1; } StaticQueue_t; typedef StaticQueue_t StaticSemaphore_t; diff --git a/components/freertos/include/freertos/portable.h b/components/freertos/include/freertos/portable.h index a628cf031a..096e481e00 100644 --- a/components/freertos/include/freertos/portable.h +++ b/components/freertos/include/freertos/portable.h @@ -199,7 +199,7 @@ BaseType_t xPortInIsrContext(); /* Multi-core: get current core ID */ static inline uint32_t IRAM_ATTR xPortGetCoreID() { int id; - asm volatile( + asm ( "rsr.prid %0\n" " extui %0,%0,13,1" :"=r"(id)); diff --git a/components/freertos/include/freertos/portmacro.h b/components/freertos/include/freertos/portmacro.h index b747662835..63e4bda667 100644 --- a/components/freertos/include/freertos/portmacro.h +++ b/components/freertos/include/freertos/portmacro.h @@ -127,7 +127,8 @@ typedef unsigned portBASE_TYPE UBaseType_t; typedef struct { - volatile uint32_t mux; + volatile uint32_t owner; + uint32_t count; #ifdef CONFIG_FREERTOS_PORTMUX_DEBUG const char *lastLockedFn; int lastLockedLine; @@ -143,24 +144,19 @@ typedef struct { * The magic number in the top 16 bits is there so we can detect uninitialized and corrupted muxes. */ -#define portMUX_MAGIC_VAL 0xB33F0000 #define portMUX_FREE_VAL 0xB33FFFFF -#define portMUX_MAGIC_MASK 0xFFFF0000 -#define portMUX_MAGIC_SHIFT 16 -#define portMUX_CNT_MASK 0x0000FF00 -#define portMUX_CNT_SHIFT 8 -#define portMUX_VAL_MASK 0x000000FF -#define portMUX_VAL_SHIFT 0 //Keep this in sync with the portMUX_TYPE struct definition please. #ifndef CONFIG_FREERTOS_PORTMUX_DEBUG -#define portMUX_INITIALIZER_UNLOCKED { \ - .mux = portMUX_MAGIC_VAL|portMUX_FREE_VAL \ +#define portMUX_INITIALIZER_UNLOCKED { \ + .owner = portMUX_FREE_VAL, \ + .count = 0, \ } #else -#define portMUX_INITIALIZER_UNLOCKED { \ - .mux = portMUX_MAGIC_VAL|portMUX_FREE_VAL, \ - .lastLockedFn = "(never locked)", \ +#define portMUX_INITIALIZER_UNLOCKED { \ + .owner = portMUX_FREE_VAL, \ + .count = 0, \ + .lastLockedFn = "(never locked)", \ .lastLockedLine = -1 \ } #endif @@ -173,8 +169,7 @@ typedef struct { #define portASSERT_IF_IN_ISR() vPortAssertIfInISR() void vPortAssertIfInISR(); - -#define portCRITICAL_NESTING_IN_TCB 1 +#define portCRITICAL_NESTING_IN_TCB 1 /* Modifications to portENTER_CRITICAL: @@ -204,6 +199,8 @@ void vPortCPUInitializeMutex(portMUX_TYPE *mux); #ifdef CONFIG_FREERTOS_PORTMUX_DEBUG void vPortCPUAcquireMutex(portMUX_TYPE *mux, const char *function, int line); portBASE_TYPE vPortCPUReleaseMutex(portMUX_TYPE *mux, const char *function, int line); +void vPortCPUAcquireMutexIntsDisabled(portMUX_TYPE *mux, const char *function, int line); +portBASE_TYPE vPortCPUReleaseMutexIntsDisabled(portMUX_TYPE *mux, const char *function, int line); void vTaskEnterCritical( portMUX_TYPE *mux, const char *function, int line ); void vTaskExitCritical( portMUX_TYPE *mux, const char *function, int line ); #define portENTER_CRITICAL(mux) vTaskEnterCritical(mux, __FUNCTION__, __LINE__) @@ -215,6 +212,9 @@ void vTaskExitCritical( portMUX_TYPE *mux ); void vTaskEnterCritical( portMUX_TYPE *mux ); void vPortCPUAcquireMutex(portMUX_TYPE *mux); portBASE_TYPE vPortCPUReleaseMutex(portMUX_TYPE *mux); +void vPortCPUAcquireMutexIntsDisabled(portMUX_TYPE *mux); +portBASE_TYPE vPortCPUReleaseMutexIntsDisabled(portMUX_TYPE *mux); + #define portENTER_CRITICAL(mux) vTaskEnterCritical(mux) #define portEXIT_CRITICAL(mux) vTaskExitCritical(mux) #define portENTER_CRITICAL_ISR(mux) vTaskEnterCritical(mux) @@ -243,9 +243,8 @@ static inline unsigned portENTER_CRITICAL_NESTED() { unsigned state = XTOS_SET_I * ESP32, though. (Would show up directly if it did because the magic wouldn't match.) */ static inline void uxPortCompareSet(volatile uint32_t *addr, uint32_t compare, uint32_t *set) { - __asm__ __volatile__( + __asm__ __volatile__ ( "WSR %2,SCOMPARE1 \n" - "ISYNC \n" "S32C1I %0, %1, 0 \n" :"=r"(*set) :"r"(addr), "r"(compare), "0"(*set) diff --git a/components/freertos/include/freertos/xtensa_context.h b/components/freertos/include/freertos/xtensa_context.h index b748a0d1a7..58520f8538 100644 --- a/components/freertos/include/freertos/xtensa_context.h +++ b/components/freertos/include/freertos/xtensa_context.h @@ -314,11 +314,10 @@ STRUCT_END(XtSolFrame) /* Macro to get the current core ID. Only uses the reg given as an argument. - Reading PRID on the ESP108 architecture gives us 0xCDCD on the PRO processor - and 0xABAB on the APP CPU. We distinguish between the two by simply checking - bit 1: it's 1 on the APP and 0 on the PRO processor. + Reading PRID on the ESP32 gives us 0xCDCD on the PRO processor (0) + and 0xABAB on the APP CPU (1). We distinguish between the two by simply checking + bit 13: it's 1 on the APP and 0 on the PRO processor. */ - #ifdef __ASSEMBLER__ .macro getcoreid reg rsr.prid \reg @@ -326,7 +325,8 @@ STRUCT_END(XtSolFrame) .endm #endif - +#define CORE_ID_PRO 0xCDCD +#define CORE_ID_APP 0xABAB /* ------------------------------------------------------------------------------- diff --git a/components/freertos/port.c b/components/freertos/port.c index df298fc4ac..48669472ac 100644 --- a/components/freertos/port.c +++ b/components/freertos/port.c @@ -97,6 +97,7 @@ #include "xtensa_rtos.h" #include "rom/ets_sys.h" +#include "soc/cpu.h" #include "FreeRTOS.h" #include "task.h" @@ -302,118 +303,154 @@ void vPortCPUInitializeMutex(portMUX_TYPE *mux) { mux->lastLockedFn="(never locked)"; mux->lastLockedLine=-1; #endif - mux->mux=portMUX_FREE_VAL; + mux->owner=portMUX_FREE_VAL; + mux->count=0; } +#ifdef CONFIG_FREERTOS_PORTMUX_DEBUG +void vPortCPUAcquireMutex(portMUX_TYPE *mux, const char *fnName, int line) { + unsigned int irqStatus = portENTER_CRITICAL_NESTED(); + vPortCPUAcquireMutexIntsDisabled(mux, fnName, line); + portEXIT_CRITICAL_NESTED(irqStatus); +} +#else +void vPortCPUAcquireMutex(portMUX_TYPE *mux) { + unsigned int irqStatus = portENTER_CRITICAL_NESTED(); + vPortCPUAcquireMutexIntsDisabled(mux); + portEXIT_CRITICAL_NESTED(irqStatus); +} +#endif + +/* XOR one core ID with this value to get the other core ID */ +#define CORE_ID_XOR_SWAP (CORE_ID_PRO ^ CORE_ID_APP) + /* * For kernel use: Acquire a per-CPU mux. Spinlocks, so don't hold on to these muxes for too long. */ #ifdef CONFIG_FREERTOS_PORTMUX_DEBUG -void vPortCPUAcquireMutex(portMUX_TYPE *mux, const char *fnName, int line) { +void vPortCPUAcquireMutexIntsDisabled(portMUX_TYPE *mux, const char *fnName, int line) { #else -void vPortCPUAcquireMutex(portMUX_TYPE *mux) { +void vPortCPUAcquireMutexIntsDisabled(portMUX_TYPE *mux) { #endif #if !CONFIG_FREERTOS_UNICORE uint32_t res; - uint32_t recCnt; - unsigned int irqStatus; + portBASE_TYPE coreID, otherCoreID; #ifdef CONFIG_FREERTOS_PORTMUX_DEBUG - uint32_t cnt=(1<<16); - if ( (mux->mux & portMUX_MAGIC_MASK) != portMUX_MAGIC_VAL ) { - ets_printf("ERROR: vPortCPUAcquireMutex: mux %p is uninitialized (0x%X)! Called from %s line %d.\n", mux, mux->mux, fnName, line); - mux->mux=portMUX_FREE_VAL; + uint32_t timeout=(1<<16); + uint32_t owner = mux->owner; + if (owner != portMUX_FREE_VAL && owner != CORE_ID_PRO && owner != CORE_ID_APP) { + ets_printf("ERROR: vPortCPUAcquireMutex: mux %p is uninitialized (0x%X)! Called from %s line %d.\n", mux, owner, fnName, line); + mux->owner=portMUX_FREE_VAL; } #endif - irqStatus=portENTER_CRITICAL_NESTED(); + /* Spin until we own the core */ + + RSR(PRID, coreID); + /* Note: coreID is the full 32 bit core ID (CORE_ID_PRO/CORE_ID_APP), + not the 0/1 value returned by xPortGetCoreID() + */ + otherCoreID = CORE_ID_XOR_SWAP ^ coreID; do { - //Lock mux if it's currently unlocked - res=(xPortGetCoreID()<mux, portMUX_FREE_VAL, &res); - //If it wasn't free and we're the owner of the lock, we are locking recursively. - if ( (res != portMUX_FREE_VAL) && (((res&portMUX_VAL_MASK)>>portMUX_VAL_SHIFT) == xPortGetCoreID()) ) { - //Mux was already locked by us. Just bump the recurse count by one. - recCnt=(res&portMUX_CNT_MASK)>>portMUX_CNT_SHIFT; - recCnt++; -#ifdef CONFIG_FREERTOS_PORTMUX_DEBUG_RECURSIVE - ets_printf("Recursive lock: recCnt=%d last non-recursive lock %s line %d, curr %s line %d\n", recCnt, mux->lastLockedFn, mux->lastLockedLine, fnName, line); -#endif - mux->mux=portMUX_MAGIC_VAL|(recCnt<owner should be one of portMUX_FREE_VAL, CORE_ID_PRO, + CORE_ID_APP: + + - If portMUX_FREE_VAL, we want to atomically set to 'coreID'. + - If "our" coreID, we can drop through immediately. + - If "otherCoreID", we spin here. + */ + res = coreID; + uxPortCompareSet(&mux->owner, portMUX_FREE_VAL, &res); + #ifdef CONFIG_FREERTOS_PORTMUX_DEBUG - cnt--; - if (cnt==0) { + timeout--; + if (timeout == 0) { ets_printf("Timeout on mux! last non-recursive lock %s line %d, curr %s line %d\n", mux->lastLockedFn, mux->lastLockedLine, fnName, line); - ets_printf("Mux value %X\n", mux->mux); + ets_printf("Owner 0x%x count %d\n", mux->owner, mux->count); } #endif - } while (res!=portMUX_FREE_VAL); + } while (res == otherCoreID); + + assert(res == coreID || res == portMUX_FREE_VAL); /* any other value implies memory corruption */ + assert((res == portMUX_FREE_VAL) == (mux->count == 0)); /* we're first to lock iff count is zero */ + + /* now we own it, we can increment the refcount */ + mux->count++; + #ifdef CONFIG_FREERTOS_PORTMUX_DEBUG if (res==portMUX_FREE_VAL) { //initial lock mux->lastLockedFn=fnName; mux->lastLockedLine=line; + } else { + ets_printf("Recursive lock: count=%d last non-recursive lock %s line %d, curr %s line %d\n", mux->count-1, + mux->lastLockedFn, mux->lastLockedLine, fnName, line); } #endif - portEXIT_CRITICAL_NESTED(irqStatus); #endif } +#ifdef CONFIG_FREERTOS_PORTMUX_DEBUG +portBASE_TYPE vPortCPUReleaseMutex(portMUX_TYPE *mux, const char *fnName, int line) { + unsigned int irqStatus = portENTER_CRITICAL_NESTED(); + portBASE_TYPE res = vPortCPUReleaseMutexIntsDisabled(mux, fnName, line); + portEXIT_CRITICAL_NESTED(irqStatus); + return res; +} +#else +portBASE_TYPE vPortCPUReleaseMutex(portMUX_TYPE *mux) { + unsigned int irqStatus = portENTER_CRITICAL_NESTED(); + portBASE_TYPE res = vPortCPUReleaseMutexIntsDisabled(mux); + portEXIT_CRITICAL_NESTED(irqStatus); + return res; +} +#endif + /* * For kernel use: Release a per-CPU mux. Returns true if everything is OK, false if mux * was already unlocked or is locked by a different core. */ #ifdef CONFIG_FREERTOS_PORTMUX_DEBUG -portBASE_TYPE vPortCPUReleaseMutex(portMUX_TYPE *mux, const char *fnName, int line) { +portBASE_TYPE vPortCPUReleaseMutexIntsDisabled(portMUX_TYPE *mux, const char *fnName, int line) { #else -portBASE_TYPE vPortCPUReleaseMutex(portMUX_TYPE *mux) { +portBASE_TYPE vPortCPUReleaseMutexIntsDisabled(portMUX_TYPE *mux) { #endif #if !CONFIG_FREERTOS_UNICORE - uint32_t res=0; - uint32_t recCnt; - unsigned int irqStatus; - portBASE_TYPE ret=pdTRUE; -// ets_printf("Unlock %p\n", mux); - irqStatus=portENTER_CRITICAL_NESTED(); + portBASE_TYPE res; + portBASE_TYPE coreID; #ifdef CONFIG_FREERTOS_PORTMUX_DEBUG const char *lastLockedFn=mux->lastLockedFn; int lastLockedLine=mux->lastLockedLine; mux->lastLockedFn=fnName; mux->lastLockedLine=line; - if ( (mux->mux & portMUX_MAGIC_MASK) != portMUX_MAGIC_VAL ) ets_printf("ERROR: vPortCPUReleaseMutex: mux %p is uninitialized (0x%X)!\n", mux, mux->mux); -#endif - //Unlock mux if it's currently locked with a recurse count of 0 - res=portMUX_FREE_VAL; - uxPortCompareSet(&mux->mux, (xPortGetCoreID()<>portMUX_VAL_SHIFT) == xPortGetCoreID() ) { - //Lock is valid, we can return safely. Just need to check if it's a recursive lock; if so we need to decrease the refcount. - if ( ((res&portMUX_CNT_MASK)>>portMUX_CNT_SHIFT)!=0) { - //We locked this, but the reccount isn't zero. Decrease refcount and continue. - recCnt=(res&portMUX_CNT_MASK)>>portMUX_CNT_SHIFT; - recCnt--; -#ifdef CONFIG_FREERTOS_PORTMUX_DEBUG_RECURSIVE - ets_printf("Recursive unlock: recCnt=%d last locked %s line %d, curr %s line %d\n", recCnt, lastLockedFn, lastLockedLine, fnName, line); + uint32_t owner = mux->owner; + if (owner != portMUX_FREE_VAL && owner != CORE_ID_PRO && owner != CORE_ID_APP) { + ets_printf("ERROR: vPortCPUReleaseMutex: mux %p is uninitialized (0x%x)!\n", mux, mux->owner); + } #endif - mux->mux=portMUX_MAGIC_VAL|(recCnt<owner); + #ifdef CONFIG_FREERTOS_PORTMUX_DEBUG + if (!res) { ets_printf("ERROR: vPortCPUReleaseMutex: mux %p was already unlocked!\n", mux); ets_printf("Last non-recursive unlock %s line %d, curr unlock %s line %d\n", lastLockedFn, lastLockedLine, fnName, line); + } #endif - ret=pdFALSE; - } else { -#ifdef CONFIG_FREERTOS_PORTMUX_DEBUG - ets_printf("ERROR: vPortCPUReleaseMutex: mux %p wasn't locked by this core (%d) but by core %d (ret=%x, mux=%x).\n", mux, xPortGetCoreID(), ((res&portMUX_VAL_MASK)>>portMUX_VAL_SHIFT), res, mux->mux); - ets_printf("Last non-recursive lock %s line %d\n", lastLockedFn, lastLockedLine); - ets_printf("Called by %s line %d\n", fnName, line); -#endif - ret=pdFALSE; + + mux->count--; + if(mux->count == 0) { + mux->owner = portMUX_FREE_VAL; } - portEXIT_CRITICAL_NESTED(irqStatus); - return ret; +#ifdef CONFIG_FREERTOS_PORTMUX_DEBUG_RECURSIVE + else { + ets_printf("Recursive unlock: count=%d last locked %s line %d, curr %s line %d\n", mux->count, lastLockedFn, lastLockedLine, fnName, line); + } +#endif + + return res; + #else //!CONFIG_FREERTOS_UNICORE return 0; #endif diff --git a/components/freertos/tasks.c b/components/freertos/tasks.c index 4e1a0c0e21..74dec7fbde 100644 --- a/components/freertos/tasks.c +++ b/components/freertos/tasks.c @@ -4111,7 +4111,8 @@ For ESP32 FreeRTOS, vTaskEnterCritical implements both portENTER_CRITICAL and po #endif { BaseType_t oldInterruptLevel=0; - if( xSchedulerRunning != pdFALSE ) + BaseType_t schedulerRunning = xSchedulerRunning; + if( schedulerRunning != pdFALSE ) { //Interrupts may already be disabled (because we're doing this recursively) but we can't get the interrupt level after //vPortCPUAquireMutex, because it also may mess with interrupts. Get it here first, then later figure out if we're nesting @@ -4119,26 +4120,27 @@ For ESP32 FreeRTOS, vTaskEnterCritical implements both portENTER_CRITICAL and po oldInterruptLevel=portENTER_CRITICAL_NESTED(); } #ifdef CONFIG_FREERTOS_PORTMUX_DEBUG - vPortCPUAcquireMutex( mux, function, line ); + vPortCPUAcquireMutexIntsDisabled( mux, function, line ); #else - vPortCPUAcquireMutex( mux ); + vPortCPUAcquireMutexIntsDisabled( mux ); #endif - if( xSchedulerRunning != pdFALSE ) + if( schedulerRunning != pdFALSE ) { - ( pxCurrentTCB[ xPortGetCoreID() ]->uxCriticalNesting )++; - - if( xSchedulerRunning != pdFALSE && pxCurrentTCB[ xPortGetCoreID() ]->uxCriticalNesting == 1 ) + TCB_t *tcb = pxCurrentTCB[xPortGetCoreID()]; + BaseType_t newNesting = tcb->uxCriticalNesting + 1; + tcb->uxCriticalNesting = newNesting; + if( newNesting == 1 ) { //This is the first time we get called. Save original interrupt level. - pxCurrentTCB[ xPortGetCoreID() ]->uxOldInterruptState=oldInterruptLevel; + tcb->uxOldInterruptState = oldInterruptLevel; } /* Original FreeRTOS comment, saved for reference: This is not the interrupt safe version of the enter critical function so assert() if it is being called from an interrupt context. Only API functions that end in "FromISR" can be used in an - interrupt. Only assert if the critical nesting count is 1 to + interrupt. Only assert if the critical nesting count is 1 to protect against recursive calls if the assert function also uses a critical section. */ @@ -4149,7 +4151,7 @@ For ESP32 FreeRTOS, vTaskEnterCritical implements both portENTER_CRITICAL and po both from ISR as well as non-ISR code, thus we re-organized vTaskEnterCritical to also work in ISRs. */ #if 0 - if( pxCurrentTCB[ xPortGetCoreID() ]->uxCriticalNesting == 1 ) + if( newNesting == 1 ) { portASSERT_IF_IN_ISR(); } @@ -4178,19 +4180,22 @@ For ESP32 FreeRTOS, vTaskExitCritical implements both portEXIT_CRITICAL and port #endif { #ifdef CONFIG_FREERTOS_PORTMUX_DEBUG - vPortCPUReleaseMutex( mux, function, line ); + vPortCPUReleaseMutexIntsDisabled( mux, function, line ); #else - vPortCPUReleaseMutex( mux ); + vPortCPUReleaseMutexIntsDisabled( mux ); #endif if( xSchedulerRunning != pdFALSE ) { - if( pxCurrentTCB[ xPortGetCoreID() ]->uxCriticalNesting > 0U ) + TCB_t *tcb = pxCurrentTCB[xPortGetCoreID()]; + BaseType_t nesting = tcb->uxCriticalNesting; + if( nesting > 0U ) { - ( pxCurrentTCB[ xPortGetCoreID() ]->uxCriticalNesting )--; + nesting--; + tcb->uxCriticalNesting = nesting; - if( pxCurrentTCB[ xPortGetCoreID() ]->uxCriticalNesting == 0U ) + if( nesting == 0U ) { - portEXIT_CRITICAL_NESTED(pxCurrentTCB[ xPortGetCoreID() ]->uxOldInterruptState); + portEXIT_CRITICAL_NESTED(tcb->uxOldInterruptState); } else { -- 2.40.0