From: Ivan Grokhotkov Date: Wed, 18 Jan 2017 07:07:27 +0000 (+0800) Subject: spi_flash: fix race condition when doing operations in unpinned tasks X-Git-Tag: v2.0-rc1~15^2~2 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=4676d159adc6e92ab90546a618c340b9a5814459;p=esp-idf spi_flash: fix race condition when doing operations in unpinned tasks spi_flash_enable_interrupts_caches_and_other_cpu function used to enable non-IRAM interrupts after giving up flash operation lock, which would cause problems if another task was waiting on the lock to start a flash operation. In fact, non-IRAM interrupts should be re-enabled before the task scheduler is resumed. Otherwise non-pinned task can be moved to the other CPU due to preemption, causing esp_intr_noniram_enable to be called on the other CPU, causing an abort to be triggered. Fixes the issue reported in https://github.com/espressif/esp-idf/pull/258 --- diff --git a/components/spi_flash/cache_utils.c b/components/spi_flash/cache_utils.c index 983ff5d256..df9d18c443 100644 --- a/components/spi_flash/cache_utils.c +++ b/components/spi_flash/cache_utils.c @@ -41,6 +41,9 @@ static uint32_t s_flash_op_cache_state[2]; static SemaphoreHandle_t s_flash_op_mutex; static volatile bool s_flash_op_can_start = false; static volatile bool s_flash_op_complete = false; +#ifndef NDEBUG +static volatile int s_flash_op_cpu = -1; +#endif void spi_flash_init_lock() { @@ -85,6 +88,11 @@ void IRAM_ATTR spi_flash_disable_interrupts_caches_and_other_cpu() const uint32_t cpuid = xPortGetCoreID(); const uint32_t other_cpuid = (cpuid == 0) ? 1 : 0; +#ifndef NDEBUG + // For sanity check later: record the CPU which has started doing flash operation + assert(s_flash_op_cpu == -1); + s_flash_op_cpu = cpuid; +#endif if (xTaskGetSchedulerState() == taskSCHEDULER_NOT_STARTED) { // Scheduler hasn't been started yet, it means that spi_flash API is being @@ -98,12 +106,13 @@ void IRAM_ATTR spi_flash_disable_interrupts_caches_and_other_cpu() // disable cache there and block other tasks from executing. s_flash_op_can_start = false; s_flash_op_complete = false; - esp_ipc_call(other_cpuid, &spi_flash_op_block_func, (void*) other_cpuid); + esp_err_t ret = esp_ipc_call(other_cpuid, &spi_flash_op_block_func, (void*) other_cpuid); + assert(ret == ESP_OK); while (!s_flash_op_can_start) { // Busy loop and wait for spi_flash_op_block_func to disable cache // on the other CPU } - // Disable scheduler on CPU cpuid + // Disable scheduler on the current CPU vTaskSuspendAll(); // This is guaranteed to run on CPU because the other CPU is now // occupied by highest priority task @@ -119,6 +128,11 @@ void IRAM_ATTR spi_flash_enable_interrupts_caches_and_other_cpu() { const uint32_t cpuid = xPortGetCoreID(); const uint32_t other_cpuid = (cpuid == 0) ? 1 : 0; +#ifndef NDEBUG + // Sanity check: flash operation ends on the same CPU as it has started + assert(cpuid == s_flash_op_cpu); + s_flash_op_cpu = -1; +#endif // Re-enable cache on this CPU spi_flash_restore_cache(cpuid, s_flash_op_cache_state[cpuid]); @@ -132,13 +146,21 @@ void IRAM_ATTR spi_flash_enable_interrupts_caches_and_other_cpu() } else { // Signal to spi_flash_op_block_task that flash operation is complete s_flash_op_complete = true; - // Resume tasks on the current CPU + } + // Re-enable non-iram interrupts + esp_intr_noniram_enable(); + + // Resume tasks on the current CPU, if the scheduler has started. + // NOTE: enabling non-IRAM interrupts has to happen before this, + // because once the scheduler has started, due to preemption the + // current task can end up being moved to the other CPU. + // But esp_intr_noniram_enable has to be called on the same CPU which + // called esp_intr_noniram_disable + if (xTaskGetSchedulerState() != taskSCHEDULER_NOT_STARTED) { xTaskResumeAll(); } // Release API lock spi_flash_op_unlock(); - // Re-enable non-iram interrupts - esp_intr_noniram_enable(); } void IRAM_ATTR spi_flash_disable_interrupts_caches_and_other_cpu_no_os() diff --git a/components/spi_flash/test/test_spi_flash.c b/components/spi_flash/test/test_spi_flash.c index 330e37bb82..4f3171049e 100644 --- a/components/spi_flash/test/test_spi_flash.c +++ b/components/spi_flash/test/test_spi_flash.c @@ -8,26 +8,25 @@ #include struct flash_test_ctx { - uint32_t offset[2]; - bool fail[2]; + uint32_t offset; + bool fail; SemaphoreHandle_t done; }; static void flash_test_task(void *arg) { - const uint32_t coreid = xPortGetCoreID(); - ets_printf("t%d\n", coreid); struct flash_test_ctx *ctx = (struct flash_test_ctx *) arg; vTaskDelay(100 / portTICK_PERIOD_MS); - const uint32_t sector = ctx->offset[coreid]; - ets_printf("es%d\n", coreid); + const uint32_t sector = ctx->offset; + printf("t%d\n", sector); + printf("es%d\n", sector); if (spi_flash_erase_sector(sector) != ESP_OK) { - ctx->fail[coreid] = true; - ets_printf("Erase failed\r\n"); + ctx->fail = true; + printf("Erase failed\r\n"); xSemaphoreGive(ctx->done); vTaskDelete(NULL); } - ets_printf("ed%d\n", coreid); + printf("ed%d\n", sector); vTaskDelay(0 / portTICK_PERIOD_MS); @@ -35,58 +34,52 @@ static void flash_test_task(void *arg) const uint32_t n = 4096; for (uint32_t offset = 0; offset < n; offset += 4) { if (spi_flash_write(sector * SPI_FLASH_SEC_SIZE + offset, (const uint8_t *) &val, 4) != ESP_OK) { - ets_printf("Write failed at offset=%d\r\n", offset); - ctx->fail[coreid] = true; + printf("Write failed at offset=%d\r\n", offset); + ctx->fail = true; break; } } - ets_printf("wd%d\n", coreid); + printf("wd%d\n", sector); vTaskDelay(0 / portTICK_PERIOD_MS); uint32_t val_read; for (uint32_t offset = 0; offset < n; offset += 4) { if (spi_flash_read(sector * SPI_FLASH_SEC_SIZE + offset, (uint8_t *) &val_read, 4) != ESP_OK) { - ets_printf("Read failed at offset=%d\r\n", offset); - ctx->fail[coreid] = true; + printf("Read failed at offset=%d\r\n", offset); + ctx->fail = true; break; } if (val_read != val) { - ets_printf("Read invalid value=%08x at offset=%d\r\n", val_read, offset); - ctx->fail[coreid] = true; + printf("Read invalid value=%08x at offset=%d\r\n", val_read, offset); + ctx->fail = true; break; } } - ets_printf("td%d\n", coreid); + printf("td%d\n", sector); xSemaphoreGive(ctx->done); vTaskDelete(NULL); } TEST_CASE("flash write and erase work both on PRO CPU and on APP CPU", "[spi_flash]") { - TaskHandle_t procpu_task; - TaskHandle_t appcpu_task; - struct flash_test_ctx ctx; + SemaphoreHandle_t done = xSemaphoreCreateCounting(4, 0); + struct flash_test_ctx ctx[4] = { + { .offset = 0x100 + 6, .done = done }, + { .offset = 0x100 + 7, .done = done }, + { .offset = 0x100 + 8, .done = done }, + { .offset = 0x100 + 9, .done = done } + }; - ctx.offset[0] = 6; - ctx.offset[1] = 7; - ctx.fail[0] = 0; - ctx.fail[1] = 0; - ctx.done = xSemaphoreCreateBinary(); + xTaskCreatePinnedToCore(flash_test_task, "1", 2048, &ctx[0], 3, NULL, 0); + xTaskCreatePinnedToCore(flash_test_task, "2", 2048, &ctx[1], 3, NULL, 1); + xTaskCreatePinnedToCore(flash_test_task, "3", 2048, &ctx[2], 3, NULL, tskNO_AFFINITY); + xTaskCreatePinnedToCore(flash_test_task, "4", 2048, &ctx[3], 3, NULL, tskNO_AFFINITY); - xTaskCreatePinnedToCore(flash_test_task, "1", 2048, &ctx, 3, &procpu_task, 0); - if (portNUM_PROCESSORS == 2) { - xTaskCreatePinnedToCore(flash_test_task, "2", 2048, &ctx, 3, &appcpu_task, 1); - } - - xSemaphoreTake(ctx.done, portMAX_DELAY); - if (portNUM_PROCESSORS == 2) { - xSemaphoreTake(ctx.done, portMAX_DELAY); - } - - TEST_ASSERT_EQUAL(false, ctx.fail[0]); - if (portNUM_PROCESSORS == 2) { - TEST_ASSERT_EQUAL(false, ctx.fail[1]); + for (int i = 0; i < 4; ++i) { + xSemaphoreTake(done, portMAX_DELAY); + TEST_ASSERT_FALSE(ctx[i].fail); } + vSemaphoreDelete(done); }