From 81e35a142ab810de68d1ea99b4cbf7336c72d30e Mon Sep 17 00:00:00 2001 From: Jeroen Domburg Date: Fri, 29 Jun 2018 11:05:36 +0800 Subject: [PATCH] Spiram: Add option to reserve MMU banks; add himem API to make use of those banks --- .gitlab-ci.yml | 21 + components/esp32/CMakeLists.txt | 2 + components/esp32/Kconfig | 34 +- components/esp32/cache_sram_mmu.c | 141 +++++++ components/esp32/esp_himem.c | 366 ++++++++++++++++++ components/esp32/include/esp_himem.h | 152 ++++++++ components/esp32/include/rom/cache.h | 16 +- components/esp32/spiram.c | 59 ++- components/esp32/test/test_himem.c | 104 +++++ docs/Doxyfile | 2 + docs/en/api-reference/system/himem.rst | 33 ++ docs/en/api-reference/system/index.rst | 1 + docs/en/api-reference/system/mem_alloc.rst | 3 + docs/zh_CN/api-reference/system/himem.rst | 1 + examples/system/himem/CMakeLists.txt | 6 + examples/system/himem/Makefile | 9 + examples/system/himem/README.md | 5 + examples/system/himem/main/CMakeLists.txt | 4 + examples/system/himem/main/component.mk | 3 + examples/system/himem/main/himem_test_main.c | 98 +++++ examples/system/himem/sdkconfig.defaults | 12 + tools/unit-test-app/configs/psram | 3 +- tools/unit-test-app/configs/psram_8m | 4 + .../unit-test-app/tools/ConfigDependency.yml | 3 +- 24 files changed, 1055 insertions(+), 27 deletions(-) create mode 100644 components/esp32/cache_sram_mmu.c create mode 100644 components/esp32/esp_himem.c create mode 100644 components/esp32/include/esp_himem.h create mode 100644 components/esp32/test/test_himem.c create mode 100644 docs/en/api-reference/system/himem.rst create mode 100644 docs/zh_CN/api-reference/system/himem.rst create mode 100644 examples/system/himem/CMakeLists.txt create mode 100644 examples/system/himem/Makefile create mode 100644 examples/system/himem/README.md create mode 100644 examples/system/himem/main/CMakeLists.txt create mode 100644 examples/system/himem/main/component.mk create mode 100644 examples/system/himem/main/himem_test_main.c create mode 100644 examples/system/himem/sdkconfig.defaults create mode 100644 tools/unit-test-app/configs/psram_8m diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index f3f3fbe6df..b1d4e6b1b2 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -1310,6 +1310,27 @@ UT_011_03: - ESP32_IDF - EMMC +UT_012_01: + <<: *unit_test_template + tags: + - ESP32_IDF + - UT_T1_1 + - 8Mpsram + +UT_012_02: + <<: *unit_test_template + tags: + - ESP32_IDF + - UT_T1_1 + - 8Mpsram + +UT_012_03: + <<: *unit_test_template + tags: + - ESP32_IDF + - UT_T1_1 + - 8Mpsram + UT_601_01: <<: *unit_test_template tags: diff --git a/components/esp32/CMakeLists.txt b/components/esp32/CMakeLists.txt index 007a6c4358..c1b7c6e38e 100644 --- a/components/esp32/CMakeLists.txt +++ b/components/esp32/CMakeLists.txt @@ -19,6 +19,7 @@ else() set(COMPONENT_SRCS "brownout.c" "cache_err_int.c" + "cache_sram_mmu.c" "clk.c" "coexist.c" "core_dump.c" @@ -30,6 +31,7 @@ else() "esp_err_to_name.c" "esp_timer.c" "esp_timer_esp32.c" + "esp_himem.c" "ets_timer_legacy.c" "event_default_handlers.c" "event_loop.c" diff --git a/components/esp32/Kconfig b/components/esp32/Kconfig index 8402caeccb..edfea9c5a4 100644 --- a/components/esp32/Kconfig +++ b/components/esp32/Kconfig @@ -67,15 +67,24 @@ endchoice choice SPIRAM_TYPE prompt "Type of SPI RAM chip in use" - default SPIRAM_TYPE_ESPPSRAM32 + default SPIRAM_TYPE_AUTO + +config SPIRAM_TYPE_AUTO + bool "Auto-detect" config SPIRAM_TYPE_ESPPSRAM32 bool "ESP-PSRAM32 or IS25WP032" + +config SPIRAM_TYPE_ESPPSRAM64 + bool "ESP-PSRAM64 or LY68L6400" + endchoice config SPIRAM_SIZE int + default -1 if SPIRAM_TYPE_AUTO default 4194304 if SPIRAM_TYPE_ESPPSRAM32 + default 8388608 if SPIRAM_TYPE_ESPPSRAM64 default 0 choice SPIRAM_SPEED @@ -120,6 +129,29 @@ config SPIRAM_CACHE_WORKAROUND This will also not use any bits of newlib that are located in ROM, opting for a version that is compiled with the workaround and located in flash instead. +config SPIRAM_BANKSWITCH_ENABLE + bool "Enable bank switching for >4MiB external RAM" + default y + depends on SPIRAM_USE_MEMMAP || SPIRAM_USE_CAPS_ALLOC || SPIRAM_USE_MALLOC + help + The ESP32 only supports 4MiB of external RAM in its address space. The hardware does support larger + memories, but these have to be bank-switched in and out of this address space. Enabling this allows you + to reserve some MMU pages for this, which allows the use of the esp_himem api to manage these banks. + +#Note that this is limited to 62 banks, as esp_spiram_writeback_cache needs some kind of mapping of some banks +#below that mark to work. We cannot at this moment guarantee this to exist when himem is enabled. +config SPIRAM_BANKSWITCH_RESERVE + int "Amount of 32K pages to reserve for bank switching" + depends on SPIRAM_BANKSWITCH_ENABLE + default 8 + range 1 62 + help + Select the amount of banks reserved for bank switching. Note that the amount of RAM allocatable with + malloc/esp_heap_alloc_caps will decrease by 32K for each page reserved here. + + Note that this reservation is only actually done if your program actually uses the himem API. Without + any himem calls, the reservation is not done and the original amount of memory will be available + to malloc/esp_heap_alloc_caps. config SPIRAM_MALLOC_ALWAYSINTERNAL int "Maximum malloc() size, in bytes, to always put in internal memory" diff --git a/components/esp32/cache_sram_mmu.c b/components/esp32/cache_sram_mmu.c new file mode 100644 index 0000000000..6be8a1dee7 --- /dev/null +++ b/components/esp32/cache_sram_mmu.c @@ -0,0 +1,141 @@ +// Copyright 2010-2018 Espressif Systems (Shanghai) PTE LTD +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include "soc/soc.h" +#include "soc/dport_reg.h" +#include "string.h" +#include "esp_spi_flash.h" + +//Errors that can be returned +#define MMU_SET_ADDR_ALIGNED_ERROR 1 +#define MMU_SET_PAGE_SIZE_ERROR 3 +#define MMU_SET_VADDR_OUT_RANGE 5 + + +#define PROCACHE_MMU_ADDR_BASE 0x3FF10000 +#define APPCACHE_MMU_ADDR_BASE 0x3FF12000 + + +//sram +#define PRO_DRAM1_START_ADDR 0x3F800000 +#define PRO_DRAM1_END_ADDR(psize) (PRO_DRAM1_START_ADDR + ((psize) << 17)) +//cache mmu register file address +#define CACHE_MMU_ADDRESS_BASE(cpu_no) ((cpu_no) ? (APPCACHE_MMU_ADDR_BASE) : (PROCACHE_MMU_ADDR_BASE)) +//virtual address, physical address check +#define ADDRESS_CHECK(addr,psize) (((addr) & (0xFFFF >>((64/(psize))-1))) != 0) +//CPU number check +#define CPU_NUMBER_CHECK(cpu_no) (((cpu_no)<0) || ((cpu_no)>1)) +//PID check +#define PID_CHECK(pid) (((pid)<0) || ((pid)>7)) +//flash MMU edge check (flash size default : 16*1024 K) +#define FLASH_MMU_EDGE_CHECK(mmu_val,num) (((mmu_val) + (num)) > 256) +//sram MMU edge check (sram size default : 8*1024 K) +#define SRAM_MMU_EDGE_CHECK(mmu_val,num,psize) (((mmu_val) + (num)) > ((8*1024)/(psize))) + +//We can relegate to the ROM version if the 2nd core isn't running (yet) and the RTOS is not started yet, for instance +//in the bootloader and in the app start process. The ROM code manually disables the cache, without using +//cache guards. +unsigned int cache_sram_mmu_set_rom(int cpu_no, int pid, unsigned int vaddr, unsigned int paddr, int psize, int num); + + +#ifndef BOOTLOADER_BUILD + +/* +Note that this function is a replacement for the ROM function with the same name, with these differences: +- It uses the DPORT workarounds +- It fixes a bug where the ROM version throws an error when vaddr is more than 2MiB into the memory region +- It uses the SPI cache guards to make sure the MMU is idle +*/ +unsigned int IRAM_ATTR cache_sram_mmu_set(int cpu_no, int pid, unsigned int vaddr, unsigned int paddr, int psize, int num) +{ + const spi_flash_guard_funcs_t *guard=spi_flash_guard_get(); + if (!guard) { + //Still starting up; guards not available yet. Use ROM version of code. + return cache_sram_mmu_set_rom(cpu_no, pid, vaddr, paddr, psize, num); + } + + unsigned int i,shift,mask_s; + unsigned int mmu_addr; + unsigned int mmu_table_val; + //address check + if( (ADDRESS_CHECK(vaddr,psize)) || (ADDRESS_CHECK(paddr,psize)) ){ + return MMU_SET_ADDR_ALIGNED_ERROR; + } + //psize check + if(psize == 32) { + shift = 15; + mask_s = 0; + } else if(psize == 16) { + shift = 14; + mask_s = 1; + } else if(psize == 8) { + shift = 13; + mask_s = 2; + } else if(psize == 4) { + shift = 12; + mask_s = 3; + } else if(psize == 2) { + shift = 11; + mask_s = 4; + } else { + return MMU_SET_PAGE_SIZE_ERROR; + } + //mmu value + mmu_table_val = paddr >> shift; + //mmu_addr + if(pid == 0 || pid == 1){ + if(vaddr >= PRO_DRAM1_START_ADDR && vaddr < PRO_DRAM1_END_ADDR(psize)){ + mmu_addr = 1152 + ((vaddr & (0x3FFFFF >> mask_s)) >> shift); + } else{ + return MMU_SET_VADDR_OUT_RANGE; + } + } else { + if(vaddr >= PRO_DRAM1_START_ADDR && vaddr < PRO_DRAM1_END_ADDR(psize)){ + mmu_addr = (1024 + (pid<<7)) + ((vaddr & (0x3FFFFF >> mask_s)) >> shift); + } else{ + return MMU_SET_VADDR_OUT_RANGE; + } + } + + //The MMU registers are implemented in such a way that lookups from the cache subsystem may collide with + //CPU access to the MMU registers. We use the flash guards to make sure the cache is disabled. + guard->start(); + + //mmu change + for ( i = 0; i < num; i++){ + *(volatile unsigned int *)(CACHE_MMU_ADDRESS_BASE(cpu_no) + mmu_addr * 4) = mmu_table_val + i; //write table + mmu_addr++; + } + + if(cpu_no == 0){ + DPORT_REG_SET_FIELD(DPORT_PRO_CACHE_CTRL1_REG, DPORT_PRO_CMMU_SRAM_PAGE_MODE, mask_s); + } else { + DPORT_REG_SET_FIELD(DPORT_APP_CACHE_CTRL1_REG, DPORT_APP_CMMU_SRAM_PAGE_MODE, mask_s); + } + + guard->end(); + + return 0; +} + + +#else + +//For the bootloader, we can always use the ROM version of this: it works well enough and keeps the size of the bootloader binary down. +unsigned int cache_sram_mmu_set(int cpu_no, int pid, unsigned int vaddr, unsigned int paddr, int psize, int num) { + return cache_sram_mmu_set_rom(cpu_no, pid, vaddr, paddr, psize, num); +} + +#endif diff --git a/components/esp32/esp_himem.c b/components/esp32/esp_himem.c new file mode 100644 index 0000000000..89e5b3237b --- /dev/null +++ b/components/esp32/esp_himem.c @@ -0,0 +1,366 @@ +// Copyright 2018 Espressif Systems (Shanghai) PTE LTD +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "freertos/FreeRTOS.h" +#include "freertos/task.h" +#include "esp_spiram.h" +#include "rom/cache.h" +#include "sdkconfig.h" +#include "esp_himem.h" +#include "soc/soc.h" +#include "esp_log.h" + +/* +So, why does the API look this way and is so inflexible to not allow any maps beyond the full 32K chunks? Most of +it has to do with the fact that the cache works on the *virtual* addresses What this comes down to is that while it's +allowed to map a range of physical memory into the address space two times, there's no cache consistency between the +two regions. + +This means that a write to region A may or may not show up, perhaps delayed, in region B, as it depends on +the time that the writeback to SPI RAM is done on A and the time before the corresponding cache line is invalidated +on B. Note that this goes for every 32-byte cache line: this implies that if a program writes to address X and Y within +A, the write to Y may show up before the write to X does. + +It gets even worse when both A and B are written: theoretically, a write to a 32-byte cache line in A can be entirely +undone because of a write to a different addres in B that happens to be in the same 32-byte cache line. + +Because of these reasons, we do not allow double mappings at all. This, however, has other implications that make +supporting ranges not really useful. Because the lack of double mappings, applications will need to do their own +management of mapped regions, meaning they will normally map in and out blocks at a time anyway, as mapping more +fluent regions would result in the chance of accidentally mapping two overlapping regions. As this is the case, +to keep the code simple, at the moment we just force these blocks to be equal to the 32K MMU page size. The API +itself does allow for more granular allocations, so if there's a pressing need for a more complex solution in the +future, we can do this. + +Note: In the future, we can expand on this api to do a memcpy() between SPI RAM and (internal) memory using the SPI1 +peripheral. This needs support for SPI1 to be in the SPI driver, however. +*/ + +#if CONFIG_SPIRAM_BANKSWITCH_ENABLE +#define SPIRAM_BANKSWITCH_RESERVE CONFIG_SPIRAM_BANKSWITCH_RESERVE +#else +#define SPIRAM_BANKSWITCH_RESERVE 0 +#endif + +#define CACHE_BLOCKSIZE (32*1024) + +//Start of the virtual address range reserved for himem use +#define VIRT_HIMEM_RANGE_START (SOC_EXTRAM_DATA_LOW+(128-SPIRAM_BANKSWITCH_RESERVE)*CACHE_BLOCKSIZE) +//Start MMU block reserved for himem use +#define VIRT_HIMEM_RANGE_BLOCKSTART (128-SPIRAM_BANKSWITCH_RESERVE) +//Start physical block +#define PHYS_HIMEM_BLOCKSTART (128-SPIRAM_BANKSWITCH_RESERVE) + +#define TAG "esp_himem" + +#define HIMEM_CHECK(cond, str, err) if (cond) do {ESP_LOGE(TAG, "%s: %s", __FUNCTION__, str); return err; } while(0) + +// Metadata for a block of physical RAM +typedef struct { + unsigned int is_alloced: 1; + unsigned int is_mapped: 1; +} ramblock_t; + +//Metadata for a 32-K memory address range +typedef struct { + unsigned int is_alloced: 1; + unsigned int is_mapped: 1; + unsigned int ram_block: 16; +} rangeblock_t; + +static ramblock_t *s_ram_descriptor = NULL; +static rangeblock_t *s_range_descriptor = NULL; +static int s_ramblockcnt = 0; +static const int s_rangeblockcnt = SPIRAM_BANKSWITCH_RESERVE; + +//Handle for a window of address space +typedef struct esp_himem_rangedata_t { + int block_ct; + int block_start; +} esp_himem_rangedata_t; + +//Handle for a range of physical memory +typedef struct esp_himem_ramdata_t { + int block_ct; + uint16_t *block; +} esp_himem_ramdata_t; + +static portMUX_TYPE spinlock = portMUX_INITIALIZER_UNLOCKED; + +static inline int ramblock_idx_valid(int ramblock_idx) +{ + return (ramblock_idx >= 0 && ramblock_idx < s_ramblockcnt); +} + +static inline int rangeblock_idx_valid(int rangeblock_idx) +{ + return (rangeblock_idx >= 0 && rangeblock_idx < s_rangeblockcnt); +} + +static void set_bank(int virt_bank, int phys_bank, int ct) +{ + int r; + r = cache_sram_mmu_set( 0, 0, SOC_EXTRAM_DATA_LOW + CACHE_BLOCKSIZE * virt_bank, phys_bank * CACHE_BLOCKSIZE, 32, ct ); + assert(r == 0); + r = cache_sram_mmu_set( 1, 0, SOC_EXTRAM_DATA_LOW + CACHE_BLOCKSIZE * virt_bank, phys_bank * CACHE_BLOCKSIZE, 32, ct ); + assert(r == 0); +} + +size_t esp_himem_get_phys_size() +{ + int paddr_start = (4096 * 1024) - (CACHE_BLOCKSIZE * SPIRAM_BANKSWITCH_RESERVE); + return esp_spiram_get_size()-paddr_start; +} + +size_t esp_himem_get_free_size() +{ + size_t ret=0; + for (int i = 0; i < s_ramblockcnt; i++) { + if (!s_ram_descriptor[i].is_alloced) ret+=CACHE_BLOCKSIZE; + } + return ret; +} + +size_t esp_himem_reserved_area_size() { + return CACHE_BLOCKSIZE * SPIRAM_BANKSWITCH_RESERVE; +} + + +void __attribute__((constructor)) esp_himem_init() +{ + if (SPIRAM_BANKSWITCH_RESERVE == 0) return; + int maxram=esp_spiram_get_size(); + //catch double init + HIMEM_CHECK(s_ram_descriptor != NULL, "already initialized", ); //Looks weird; last arg is empty so it expands to 'return ;' + HIMEM_CHECK(s_range_descriptor != NULL, "already initialized", ); + //need to have some reserved banks + HIMEM_CHECK(SPIRAM_BANKSWITCH_RESERVE == 0, "No banks reserved for himem", ); + //Start and end of physical reserved memory. Note it starts slightly under + //the 4MiB mark as the reserved banks can't have an unity mapping to be used by malloc + //anymore; we treat them as himem instead. + int paddr_start = (4096 * 1024) - (CACHE_BLOCKSIZE * SPIRAM_BANKSWITCH_RESERVE); + int paddr_end = maxram; + s_ramblockcnt = ((paddr_end - paddr_start) / CACHE_BLOCKSIZE); + //Allocate data structures + s_ram_descriptor = calloc(sizeof(ramblock_t), s_ramblockcnt); + s_range_descriptor = calloc(sizeof(rangeblock_t), SPIRAM_BANKSWITCH_RESERVE); + if (s_ram_descriptor == NULL || s_range_descriptor == NULL) { + ESP_EARLY_LOGE(TAG, "Cannot allocate memory for meta info. Not initializing!"); + free(s_ram_descriptor); + free(s_range_descriptor); + return; + } + ESP_EARLY_LOGI(TAG, "Initialized. Using last %d 32KB address blocks for bank switching on %d KB of physical memory.", + SPIRAM_BANKSWITCH_RESERVE, (paddr_end - paddr_start)/1024); +} + + +//Allocate count not-necessarily consecutive physical RAM blocks, return numbers in blocks[]. Return +//true if blocks can be allocated, false if not. +static bool allocate_blocks(int count, uint16_t *blocks_out) +{ + int n = 0; + for (int i = 0; i < s_ramblockcnt && n != count; i++) { + if (!s_ram_descriptor[i].is_alloced) { + blocks_out[n] = i; + n++; + } + } + if (n == count) { + //All blocks could be allocated. Mark as in use. + for (int i = 0; i < count; i++) { + s_ram_descriptor[blocks_out[i]].is_alloced = true; + assert(s_ram_descriptor[blocks_out[i]].is_mapped == false); + } + return true; + } else { + //Error allocating blocks + return false; + } +} + + +esp_err_t esp_himem_alloc(size_t size, esp_himem_handle_t *handle_out) +{ + if (size % CACHE_BLOCKSIZE != 0) { + return ESP_ERR_INVALID_SIZE; + } + int blocks = size / CACHE_BLOCKSIZE; + esp_himem_ramdata_t *r = calloc(sizeof(esp_himem_ramdata_t), 1); + if (!r) { + goto nomem; + } + r->block = calloc(sizeof(uint16_t), blocks); + if (!r->block) { + goto nomem; + } + portENTER_CRITICAL(&spinlock); + int ok = allocate_blocks(blocks, r->block); + portEXIT_CRITICAL(&spinlock); + if (!ok) { + goto nomem; + } + r->block_ct = blocks; + *handle_out = r; + return ESP_OK; +nomem: + if (r) { + free(r->block); + } + free(r); + return ESP_ERR_NO_MEM; +} + +esp_err_t esp_himem_free(esp_himem_handle_t handle) +{ + //Check if any of the blocks is still mapped; fail if this is the case. + for (int i = 0; i < handle->block_ct; i++) { + assert(ramblock_idx_valid(handle->block[i])); + HIMEM_CHECK(s_ram_descriptor[handle->block[i]].is_mapped, "block in range still mapped", ESP_ERR_INVALID_ARG); + } + //Mark blocks as free + portENTER_CRITICAL(&spinlock); + for (int i = 0; i < handle->block_ct; i++) { + s_ram_descriptor[handle->block[i]].is_alloced = false; + } + portEXIT_CRITICAL(&spinlock); + + //Free handle + free(handle->block); + free(handle); + return ESP_OK; +} + + +esp_err_t esp_himem_alloc_map_range(size_t size, esp_himem_rangehandle_t *handle_out) +{ + HIMEM_CHECK(s_ram_descriptor == NULL, "Himem not available!", ESP_ERR_INVALID_STATE); + HIMEM_CHECK(size % CACHE_BLOCKSIZE != 0, "requested size not aligned to blocksize", ESP_ERR_INVALID_SIZE); + int blocks = size / CACHE_BLOCKSIZE; + esp_himem_rangedata_t *r = calloc(sizeof(esp_himem_rangedata_t), 1); + if (!r) { + return ESP_ERR_NO_MEM; + } + r->block_ct = blocks; + r->block_start = -1; + int start_free = 0; + portENTER_CRITICAL(&spinlock); + for (int i = 0; i < s_rangeblockcnt; i++) { + if (s_range_descriptor[i].is_alloced) { + start_free = i + 1; //optimistically assume next block is free... + } else if (i - start_free == blocks - 1) { + //We found a span of blocks that's big enough to allocate the requested range in. + r->block_start = start_free; + break; + } + } + + if (r->block_start == -1) { + //Couldn't find enough free blocks + free(r); + portEXIT_CRITICAL(&spinlock); + return ESP_ERR_NO_MEM; + } + //Range is found. Mark the blocks as in use. + for (int i = 0; i < blocks; i++) { + s_range_descriptor[r->block_start + i].is_alloced = 1; + } + portEXIT_CRITICAL(&spinlock); + //All done. + *handle_out = r; + return ESP_OK; +} + +esp_err_t esp_himem_free_map_range(esp_himem_rangehandle_t handle) +{ + //Check if any of the blocks in the range have a mapping + for (int i = 0; i < handle->block_ct; i++) { + assert(rangeblock_idx_valid(handle->block_start + i)); + assert(s_range_descriptor[i + handle->block_start].is_alloced == 1); //should be, if handle is valid + HIMEM_CHECK(s_range_descriptor[i + handle->block_start].is_mapped, "memory still mapped to range", ESP_ERR_INVALID_ARG); + } + //We should be good to free this. Mark blocks as free. + portENTER_CRITICAL(&spinlock); + for (int i = 0; i < handle->block_ct; i++) { + s_range_descriptor[i + handle->block_start].is_alloced = 0; + } + portEXIT_CRITICAL(&spinlock); + free(handle); + return ESP_OK; +} + + +esp_err_t esp_himem_map(esp_himem_handle_t handle, esp_himem_rangehandle_t range, size_t ram_offset, size_t range_offset, size_t len, int flags, void **out_ptr) +{ + int ram_block = ram_offset / CACHE_BLOCKSIZE; + int range_block = range_offset / CACHE_BLOCKSIZE; + int blockcount = len / CACHE_BLOCKSIZE; + HIMEM_CHECK(s_ram_descriptor == NULL, "Himem not available!", ESP_ERR_INVALID_STATE); + //Offsets and length must be block-aligned + HIMEM_CHECK(ram_offset % CACHE_BLOCKSIZE != 0, "ram offset not aligned to blocksize", ESP_ERR_INVALID_ARG); + HIMEM_CHECK(range_offset % CACHE_BLOCKSIZE != 0, "range not aligned to blocksize", ESP_ERR_INVALID_ARG); + HIMEM_CHECK(len % CACHE_BLOCKSIZE != 0, "length not aligned to blocksize", ESP_ERR_INVALID_ARG); + //ram and range should be within allocated range + HIMEM_CHECK(ram_block + blockcount > handle->block_ct, "args not in range of phys ram handle", ESP_ERR_INVALID_SIZE); + HIMEM_CHECK(range_block + blockcount > range->block_ct, "args not in range of range handle", ESP_ERR_INVALID_SIZE); + + //Check if ram blocks aren't already mapped, and if memory range is unmapped + for (int i = 0; i < blockcount; i++) { + HIMEM_CHECK(s_ram_descriptor[handle->block[i + ram_block]].is_mapped, "ram already mapped", ESP_ERR_INVALID_STATE); + HIMEM_CHECK(s_range_descriptor[range->block_start + i + range_block].is_mapped, "range already mapped", ESP_ERR_INVALID_STATE); + } + + //Map and mark as mapped + portENTER_CRITICAL(&spinlock); + for (int i = 0; i < blockcount; i++) { + assert(ramblock_idx_valid(handle->block[i + ram_block])); + s_ram_descriptor[handle->block[i + ram_block]].is_mapped = 1; + s_range_descriptor[range->block_start + i + range_block].is_mapped = 1; + s_range_descriptor[range->block_start + i + range_block].ram_block = handle->block[i + ram_block]; + } + portEXIT_CRITICAL(&spinlock); + for (int i = 0; i < blockcount; i++) { + set_bank(VIRT_HIMEM_RANGE_BLOCKSTART + range->block_start + i + range_block, handle->block[i + ram_block] + PHYS_HIMEM_BLOCKSTART, 1); + } + + //Set out pointer + *out_ptr = (void *)(VIRT_HIMEM_RANGE_START + (range->block_start + range_offset) * CACHE_BLOCKSIZE); + return ESP_OK; +} + +esp_err_t esp_himem_unmap(esp_himem_rangehandle_t range, void *ptr, size_t len) +{ + //Note: doesn't actually unmap, just clears cache and marks blocks as unmapped. + //Future optimization: could actually lazy-unmap here: essentially, do nothing and only clear the cache when we re-use + //the block for a different physical address. + int range_offset = (uint32_t)ptr - VIRT_HIMEM_RANGE_START; + int range_block = (range_offset / CACHE_BLOCKSIZE) - range->block_start; + int blockcount = len / CACHE_BLOCKSIZE; + HIMEM_CHECK(range_offset % CACHE_BLOCKSIZE != 0, "range offset not block-aligned", ESP_ERR_INVALID_ARG); + HIMEM_CHECK(len % CACHE_BLOCKSIZE != 0, "map length not block-aligned", ESP_ERR_INVALID_ARG); + HIMEM_CHECK(range_block + blockcount > range->block_ct, "range out of bounds for handle", ESP_ERR_INVALID_ARG); + + portENTER_CRITICAL(&spinlock); + for (int i = 0; i < blockcount; i++) { + int ramblock = s_range_descriptor[range->block_start + i + range_block].ram_block; + assert(ramblock_idx_valid(ramblock)); + s_ram_descriptor[ramblock].is_mapped = 0; + s_range_descriptor[range->block_start + i + range_block].is_mapped = 0; + } + esp_spiram_writeback_cache(); + portEXIT_CRITICAL(&spinlock); + return ESP_OK; +} + + diff --git a/components/esp32/include/esp_himem.h b/components/esp32/include/esp_himem.h new file mode 100644 index 0000000000..099d926015 --- /dev/null +++ b/components/esp32/include/esp_himem.h @@ -0,0 +1,152 @@ +// Copyright 2018 Espressif Systems (Shanghai) PTE LTD +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#pragma once + +#include +#include "esp_err.h" + +#ifdef __cplusplus +extern "C" { +#endif + +//Opaque pointers as handles for ram/range data +typedef struct esp_himem_ramdata_t *esp_himem_handle_t; +typedef struct esp_himem_rangedata_t *esp_himem_rangehandle_t; + +//ESP32 MMU block size +#define ESP_HIMEM_BLKSZ (0x8000) + +#define ESP_HIMEM_MAPFLAG_RO 1 /*!< Indicates that a mapping will only be read from. Note that this is unused for now. */ + +/** + * @brief Allocate a block in high memory + * + * @param size Size of the to-be-allocated block, in bytes. Note that this needs to be + * a multiple of the external RAM mmu block size (32K). + * @param[out] handle_out Handle to be returned + * @returns - ESP_OK if succesful + * - ESP_ERR_NO_MEM if out of memory + * - ESP_ERR_INVALID_SIZE if size is not a multiple of 32K + */ +esp_err_t esp_himem_alloc(size_t size, esp_himem_handle_t *handle_out); + + +/** + * @brief Allocate a memory region to map blocks into + * + * This allocates a contiguous CPU memory region that can be used to map blocks + * of physical memory into. + * + * @param size Size of the range to be allocated. Note this needs to be a multiple of + * the external RAM mmu block size (32K). + * @param[out] handle_out Handle to be returned + * @returns - ESP_OK if succesful + * - ESP_ERR_NO_MEM if out of memory or address space + * - ESP_ERR_INVALID_SIZE if size is not a multiple of 32K + */ +esp_err_t esp_himem_alloc_map_range(size_t size, esp_himem_rangehandle_t *handle_out); + +/** + * @brief Map a block of high memory into the CPUs address space + * + * This effectively makes the block available for read/write operations. + * + * @note The region to be mapped needs to have offsets and sizes that are aligned to the + * SPI RAM MMU block size (32K) + * + * @param handle Handle to the block of memory, as given by esp_himem_alloc + * @param range Range handle to map the memory in + * @param ram_offset Offset into the block of physical memory of the block to map + * @param range_offset Offset into the address range where the block will be mapped + * @param len Length of region to map + * @param flags One of ESP_HIMEM_MAPFLAG_* + * @param[out] out_ptr Pointer to variable to store resulting memory pointer in + * @returns - ESP_OK if the memory could be mapped + * - ESP_ERR_INVALID_ARG if offset, range or len aren't MMU-block-aligned (32K) + * - ESP_ERR_INVALID_SIZE if the offsets/lengths don't fit in the allocated memory or range + * - ESP_ERR_INVALID_STATE if a block in the selected ram offset/length is already mapped, or + * if a block in the selected range offset/length already has a mapping. + */ +esp_err_t esp_himem_map(esp_himem_handle_t handle, esp_himem_rangehandle_t range, size_t ram_offset, size_t range_offset, size_t len, int flags, void **out_ptr); + + +/** + * @brief Free a block of physical memory + * + * This clears out the associated handle making the memory available for re-allocation again. + * This will only succeed if none of the memory blocks currently have a mapping. + * + * @param handle Handle to the block of memory, as given by esp_himem_alloc + * @returns - ESP_OK if the memory is succesfully freed + * - ESP_ERR_INVALID_ARG if the handle still is (partially) mapped + */ +esp_err_t esp_himem_free(esp_himem_handle_t handle); + + + +/** + * @brief Free a mapping range + * + * This clears out the associated handle making the range available for re-allocation again. + * This will only succeed if none of the range blocks currently are used for a mapping. + * + * @param handle Handle to the range block, as given by esp_himem_alloc_map_range + * @returns - ESP_OK if the memory is succesfully freed + * - ESP_ERR_INVALID_ARG if the handle still is (partially) mapped to + */ +esp_err_t esp_himem_free_map_range(esp_himem_rangehandle_t handle); + + +/** + * @brief Unmap a region + * + * @param range Range handle + * @param ptr Pointer returned by esp_himem_map + * @param len Length of the block to be unmapped. Must be aligned to the SPI RAM MMU blocksize (32K) + * @returns - ESP_OK if the memory is succesfully unmapped, + * - ESP_ERR_INVALID_ARG if ptr or len are invalid. + */ +esp_err_t esp_himem_unmap(esp_himem_rangehandle_t range, void *ptr, size_t len); + + +/** + * @brief Get total amount of memory under control of himem API + * + * @returns Amount of memory, in bytes + */ +size_t esp_himem_get_phys_size(); + +/** + * @brief Get free amount of memory under control of himem API + * + * @returns Amount of free memory, in bytes + */ +size_t esp_himem_get_free_size(); + + +/** + * @brief Get amount of SPI memory address space needed for bankswitching + * + * @note This is also weakly defined in esp32/spiram.c and returns 0 there, so + * if no other function in this file is used, no memory is reserved. + * + * @returns Amount of reserved area, in bytes + */ +size_t esp_himem_reserved_area_size(); + + +#ifdef __cplusplus +} +#endif + diff --git a/components/esp32/include/rom/cache.h b/components/esp32/include/rom/cache.h index 72aa33aaab..4b923e669d 100644 --- a/components/esp32/include/rom/cache.h +++ b/components/esp32/include/rom/cache.h @@ -83,6 +83,9 @@ static inline unsigned int IRAM_ATTR cache_flash_mmu_set(int cpu_no, int pid, un * @brief Set Ext-SRAM-Cache mmu mapping. * Please do not call this function in your SDK application. * + * Note that this code lives in IRAM and has a bugfix in respect to the ROM version + * of this function (which erroneously refused a vaddr > 2MiB + * * @param int cpu_no : CPU number, 0 for PRO cpu, 1 for APP cpu. * * @param int pod : process identifier. Range 0~7. @@ -106,18 +109,7 @@ static inline unsigned int IRAM_ATTR cache_flash_mmu_set(int cpu_no, int pid, un * 4 : mmu table to be written is out of range * 5 : vaddr is out of range */ -static inline unsigned int IRAM_ATTR cache_sram_mmu_set(int cpu_no, int pid, unsigned int vaddr, unsigned int paddr, int psize, int num) -{ - extern unsigned int cache_sram_mmu_set_rom(int cpu_no, int pid, unsigned int vaddr, unsigned int paddr, int psize, int num); - - unsigned int ret; - - DPORT_STALL_OTHER_CPU_START(); - ret = cache_sram_mmu_set_rom(cpu_no, pid, vaddr, paddr, psize, num); - DPORT_STALL_OTHER_CPU_END(); - - return ret; -} +unsigned int IRAM_ATTR cache_sram_mmu_set(int cpu_no, int pid, unsigned int vaddr, unsigned int paddr, int psize, int num); /** * @brief Initialise cache access for the cpu. diff --git a/components/esp32/spiram.c b/components/esp32/spiram.c index 5345c5a374..58360aa0ef 100644 --- a/components/esp32/spiram.c +++ b/components/esp32/spiram.c @@ -34,6 +34,7 @@ we add more types of external RAM memory, this can be made into a more intellige #include "soc/soc_memory_layout.h" #include "soc/dport_reg.h" #include "rom/cache.h" +#include "esp_himem.h" #if CONFIG_FREERTOS_UNICORE #define PSRAM_MODE PSRAM_VADDR_MODE_NORMAL @@ -65,6 +66,22 @@ extern int _ext_ram_bss_start, _ext_ram_bss_end; static bool spiram_inited=false; +//If no function in esp_himem.c is used, this function will be linked into the +//binary instead of the one in esp_himem.c, automatically making sure no memory +//is reserved if no himem function is used. +size_t __attribute__((weak)) esp_himem_reserved_area_size() { + return 0; +} + + +static int spiram_size_usable_for_malloc() +{ + int s=esp_spiram_get_size(); + if (s>4*1024*1024) s=4*1024*1024; //we can map at most 4MiB + return s-esp_himem_reserved_area_size(); +} + + /* Simple RAM test. Writes a word every 32 bytes. Takes about a second to complete for 4MiB. Returns true when RAM seems OK, false when test fails. WARNING: Do not run this before the 2nd cpu has been @@ -74,7 +91,7 @@ bool esp_spiram_test() { volatile int *spiram=(volatile int*)SOC_EXTRAM_DATA_LOW; size_t p; - size_t s=CONFIG_SPIRAM_SIZE; + size_t s=spiram_size_usable_for_malloc(); int errct=0; int initial_err=-1; for (p=0; p<(s/sizeof(int)); p+=8) { @@ -109,7 +126,7 @@ void IRAM_ATTR esp_spiram_init_cache() esp_spiram_size_t esp_spiram_get_chip_size() { if (!spiram_inited) { - ESP_LOGE(TAG, "SPI RAM not initialized"); + ESP_EARLY_LOGE(TAG, "SPI RAM not initialized"); return ESP_SPIRAM_SIZE_INVALID; } psram_size_t psram_size = psram_get_size(); @@ -134,6 +151,16 @@ esp_err_t esp_spiram_init() return r; } + spiram_inited=true; //note: this needs to be set before esp_spiram_get_chip_*/esp_spiram_get_size calls +#if (CONFIG_SPIRAM_SIZE != -1) + if (esp_spiram_get_size()!=CONFIG_SPIRAM_SIZE) { + ESP_EARLY_LOGE(TAG, "Expected %dKiB chip but found %dKiB chip. Bailing out..", CONFIG_SPIRAM_SIZE/1024, esp_spiram_get_size()/1024); + return ESP_ERR_INVALID_SIZE; + } +#endif + + ESP_EARLY_LOGI(TAG, "Found %dMBit SPI RAM device", + (esp_spiram_get_size()*8)/(1024*1024)); ESP_EARLY_LOGI(TAG, "SPI RAM mode: %s", PSRAM_SPEED == PSRAM_CACHE_F40M_S40M ? "flash 40m sram 40m" : \ PSRAM_SPEED == PSRAM_CACHE_F80M_S40M ? "flash 80m sram 40m" : \ PSRAM_SPEED == PSRAM_CACHE_F80M_S80M ? "flash 80m sram 80m" : "ERROR"); @@ -141,21 +168,20 @@ esp_err_t esp_spiram_init() (PSRAM_MODE==PSRAM_VADDR_MODE_EVENODD)?"even/odd (2-core)": \ (PSRAM_MODE==PSRAM_VADDR_MODE_LOWHIGH)?"low/high (2-core)": \ (PSRAM_MODE==PSRAM_VADDR_MODE_NORMAL)?"normal (1-core)":"ERROR"); - spiram_inited=true; return ESP_OK; } esp_err_t esp_spiram_add_to_heapalloc() -{ +{ //Add entire external RAM region to heap allocator. Heap allocator knows the capabilities of this type of memory, so there's //no need to explicitly specify them. #if CONFIG_SPIRAM_ALLOW_BSS_SEG_EXTERNAL_MEMORY - ESP_EARLY_LOGI(TAG, "Adding pool of %dK of external SPI memory to heap allocator", (CONFIG_SPIRAM_SIZE - (&_ext_ram_bss_end - &_ext_ram_bss_start))/1024); - return heap_caps_add_region((intptr_t)&_ext_ram_bss_end, (intptr_t)SOC_EXTRAM_DATA_LOW + CONFIG_SPIRAM_SIZE-1); + ESP_EARLY_LOGI(TAG, "Adding pool of %dK of external SPI memory to heap allocator", (spiram_size_usable_for_malloc() - (&_ext_ram_bss_end - &_ext_ram_bss_start))/1024); + return heap_caps_add_region((intptr_t)&_ext_ram_bss_end, (intptr_t)SOC_EXTRAM_DATA_LOW + spiram_size_usable_for_malloc()-1); #else - ESP_EARLY_LOGI(TAG, "Adding pool of %dK of external SPI memory to heap allocator", CONFIG_SPIRAM_SIZE/1024); - return heap_caps_add_region((intptr_t)SOC_EXTRAM_DATA_LOW, (intptr_t)SOC_EXTRAM_DATA_LOW + CONFIG_SPIRAM_SIZE-1); + ESP_EARLY_LOGI(TAG, "Adding pool of %dK of external SPI memory to heap allocator", spiram_size_usable_for_malloc()/1024); + return heap_caps_add_region((intptr_t)SOC_EXTRAM_DATA_LOW, (intptr_t)SOC_EXTRAM_DATA_LOW + spiram_size_usable_for_malloc()-1); #endif } @@ -187,12 +213,17 @@ esp_err_t esp_spiram_reserve_dma_pool(size_t size) { size_t esp_spiram_get_size() { + psram_size_t size=esp_spiram_get_chip_size(); + if (size==PSRAM_SIZE_32MBITS) return 4*1024*1024; + if (size==PSRAM_SIZE_64MBITS) return 8*1024*1024; return CONFIG_SPIRAM_SIZE; } /* Before flushing the cache, if psram is enabled as a memory-mapped thing, we need to write back the data in the cache to the psram first, otherwise it will get lost. For now, we just read 64/128K of random PSRAM memory to do this. + Note that this routine assumes some unique mapping for the first 2 banks of the PSRAM memory range, as well as the + 2 banks after the 2 MiB mark. */ void IRAM_ATTR esp_spiram_writeback_cache() { @@ -216,18 +247,24 @@ void IRAM_ATTR esp_spiram_writeback_cache() } #endif -#if CONFIG_FREERTOS_UNICORE +#if (PSRAM_MODE != PSRAM_VADDR_MODE_LOWHIGH) + /* + Single-core and even/odd mode only have 32K of cache evenly distributed over the address lines. We can clear + the cache by just reading 64K worth of cache lines. + */. for (x=0; x<1024*64; x+=32) { i+=psram[x]; } #else /* + Low/high psram cache mode uses one 32K cache for the lowest 2MiB of SPI flash and another 32K for the highest + 2MiB. Clear this by reading from both regions. Note: this assumes the amount of external RAM is >2M. If it is 2M or less, what this code does is undefined. If we ever support external RAM chips of 2M or smaller, this may need adjusting. */ for (x=0; x<1024*64; x+=32) { i+=psram[x]; - i+=psram[x+(1024*1024*2)+(1024*64)]; //address picked to also clear cache of app cpu in low/high mode + i+=psram[x+(1024*1024*2)]; } #endif @@ -243,6 +280,4 @@ void IRAM_ATTR esp_spiram_writeback_cache() #endif } - - #endif diff --git a/components/esp32/test/test_himem.c b/components/esp32/test/test_himem.c new file mode 100644 index 0000000000..95ed27d5ae --- /dev/null +++ b/components/esp32/test/test_himem.c @@ -0,0 +1,104 @@ +#include "unity.h" +#include +#include +#include +#include "freertos/FreeRTOS.h" +#include "freertos/task.h" +#include "esp_system.h" +#include "rom/cache.h" +#include "sdkconfig.h" +#include "esp_himem.h" + +#if CONFIG_SPIRAM_BANKSWITCH_ENABLE + +//Fill memory with pseudo-random data generated from the given seed. +static void fill_mem_seed(int seed, void *mem, int len) +{ + uint32_t *p = (uint32_t *)mem; + unsigned int rseed = seed ^ 0xa5a5a5a5; + for (int i = 0; i < len / 4; i++) { + *p++ = rand_r(&rseed); + } +} + +//Check the memory filled by fill_mem_seed. Returns true if the data is still intact. +static bool check_mem_seed(int seed, void *mem, int len) +{ + uint32_t *p = (uint32_t *)mem; + unsigned int rseed = seed ^ 0xa5a5a5a5; + for (int i = 0; i < len / 4; i++) { + uint32_t ex = rand_r(&rseed); + if (ex != *p) { + printf("check_mem_seed: %p @ %p has 0x%08x expected 0x%08x\n", mem, p, *p, ex); + return false; + } + p++; + } + return true; +} + +//Allocate a himem region, fill it with data, check it and release it. +static bool test_region(int check_size, int seed) +{ + esp_himem_handle_t mh; + esp_himem_rangehandle_t rh; + bool ret = true; + + ESP_ERROR_CHECK(esp_himem_alloc(check_size, &mh)); + ESP_ERROR_CHECK(esp_himem_alloc_map_range(ESP_HIMEM_BLKSZ * 2, &rh)); + for (int i = 0; i < check_size; i += ESP_HIMEM_BLKSZ) { + uint32_t *ptr = NULL; + ESP_ERROR_CHECK(esp_himem_map(mh, rh, i, 0, ESP_HIMEM_BLKSZ, 0, (void**)&ptr)); + fill_mem_seed(i ^ seed, ptr, ESP_HIMEM_BLKSZ); + ESP_ERROR_CHECK(esp_himem_unmap(rh, ptr, ESP_HIMEM_BLKSZ)); + } + for (int i = 0; i < check_size; i += ESP_HIMEM_BLKSZ) { + uint32_t *ptr; + ESP_ERROR_CHECK(esp_himem_map(mh, rh, i, 0, ESP_HIMEM_BLKSZ, 0, (void**)&ptr)); + if (!check_mem_seed(i ^ seed, ptr, ESP_HIMEM_BLKSZ)) { + printf("Error in block %d\n", i / ESP_HIMEM_BLKSZ); + ret = false; + break; + } + ESP_ERROR_CHECK(esp_himem_unmap(rh, ptr, ESP_HIMEM_BLKSZ)); + } + ESP_ERROR_CHECK(esp_himem_free(mh)); + ESP_ERROR_CHECK(esp_himem_free_map_range(rh)); + return ret; +} + + +static volatile int testsDone; + +static void memtest_thread(void *arg) +{ + int d = (int)arg; + for (int i = 0; i < 4; i++) { + printf("d=%d check=%d\n", d, i); + test_region(2 * 1024 * 1024, d + (i << 16)); + vTaskDelay(d); + } + testsDone++; //note possible race here... not really an issue if the two tasks have different vTaskDelay args + vTaskDelete(NULL); +} + + +TEST_CASE("high psram memory test", "[himem]") +{ + printf("Doing single-core test\n"); + assert(test_region(4 * 1024 * 1024, 0xaaaa)); + + testsDone = 0; + printf("Doing dual-core test...\n"); + xTaskCreatePinnedToCore(&memtest_thread, "th2", 1024 * 2, (void *)2, 5, NULL, 1); + xTaskCreatePinnedToCore(&memtest_thread, "th1", 1024 * 2, (void *)5, 5, NULL, 0); + while (testsDone != 2) { + vTaskDelay(10); + } + + printf("Done!\n"); + vTaskDelay(100); +} + + +#endif diff --git a/docs/Doxyfile b/docs/Doxyfile index 4765c675c8..3820e35c50 100644 --- a/docs/Doxyfile +++ b/docs/Doxyfile @@ -147,6 +147,8 @@ INPUT = \ ../../components/heap/include/esp_heap_trace.h \ ../../components/heap/include/esp_heap_caps_init.h \ ../../components/heap/include/multi_heap.h \ + ## Himem + ../../components/esp32/include/esp_himem.h \ ## Interrupt Allocation ../../components/esp32/include/esp_intr_alloc.h \ ## Watchdogs diff --git a/docs/en/api-reference/system/himem.rst b/docs/en/api-reference/system/himem.rst new file mode 100644 index 0000000000..a3eba8be3b --- /dev/null +++ b/docs/en/api-reference/system/himem.rst @@ -0,0 +1,33 @@ +The himem allocation API +======================== + +Overview +-------- + +The ESP32 can access external SPI RAM transparently, so you can use it as normal memory in your program code. However, because the address +space for external memory is limited in size, only the first 4MiB can be used as such. Access to the remaining memory is still possible, +however this needs to go through a bankswitching scheme controlled by the himem API. + +Specifically, what is implemented by the himem API is a bankswitching scheme. Hardware-wise, the 4MiB region for external SPI RAM is +mapped into the CPU address space by a MMU, which maps a configurable 32K bank/page of external SPI RAM into each of the 32K pages in the +4MiB region accessed by the CPU. For external memories that are <=4MiB, this MMU is configured to unity mapping, effectively mapping each +CPU address 1-to-1 to the external SPI RAM address. + +In order to use the himem API, you have to enable it in the menuconfig using :envvar:`CONFIG_SPIRAM_BANKSWITCH_ENABLE`, as well as set the amount +of banks reserved for this in :envvar:`CONFIG_SPIRAM_BANKSWITCH_RESERVE`. This decreases +the amount of external memory allocated by functions like ``malloc()``, but it allows you to use the himem api to map any of the remaining memory +into the reserved banks. + +The himem API is more-or-less an abstraction of the bankswitching scheme: it allows you to claim one or more banks of address space +(called 'regions' in the API) as well as one or more of banks of memory to map into the ranges. + +Example +------- + +An example doing a simple memory test of the high memory range is available in esp-idf: :example:`system/himem` + + +API Reference +------------- + +.. include:: /_build/inc/esp_himem.inc diff --git a/docs/en/api-reference/system/index.rst b/docs/en/api-reference/system/index.rst index 76b7c8d62a..c85b238eb8 100644 --- a/docs/en/api-reference/system/index.rst +++ b/docs/en/api-reference/system/index.rst @@ -8,6 +8,7 @@ System API FreeRTOS Additions Heap Memory Allocation Heap Memory Debugging + Himem (large external SPI RAM) API Interrupt Allocation Watchdogs Inter-Processor Call diff --git a/docs/en/api-reference/system/mem_alloc.rst b/docs/en/api-reference/system/mem_alloc.rst index 7d56bf1955..62a322bd8c 100644 --- a/docs/en/api-reference/system/mem_alloc.rst +++ b/docs/en/api-reference/system/mem_alloc.rst @@ -40,6 +40,9 @@ which it can't do for a normal malloc() call. This can help to use all the avail Memory allocated with MALLOC_CAP_32BIT can *only* be accessed via 32-bit reads and writes, any other type of access will generate a fatal LoadStoreError exception. +External SPI RAM under 4MiB in size can be allocated using standard ``malloc`` calls, if that is enabled in menuconfig. To +use the region above the 4MiB limit, you can use the :doc:`himem API`. + API Reference - Heap Allocation ------------------------------- diff --git a/docs/zh_CN/api-reference/system/himem.rst b/docs/zh_CN/api-reference/system/himem.rst new file mode 100644 index 0000000000..e34272742a --- /dev/null +++ b/docs/zh_CN/api-reference/system/himem.rst @@ -0,0 +1 @@ +.. include:: ../../../en/api-reference/system/himem.rst \ No newline at end of file diff --git a/examples/system/himem/CMakeLists.txt b/examples/system/himem/CMakeLists.txt new file mode 100644 index 0000000000..4297397b63 --- /dev/null +++ b/examples/system/himem/CMakeLists.txt @@ -0,0 +1,6 @@ +# The following lines of boilerplate have to be in your project's CMakeLists +# in this exact order for cmake to work correctly +cmake_minimum_required(VERSION 3.5) + +include($ENV{IDF_PATH}/tools/cmake/project.cmake) +project(himem_test) diff --git a/examples/system/himem/Makefile b/examples/system/himem/Makefile new file mode 100644 index 0000000000..6ffdebd264 --- /dev/null +++ b/examples/system/himem/Makefile @@ -0,0 +1,9 @@ +# +# This is a project Makefile. It is assumed the directory this Makefile resides in is a +# project subdirectory. +# + +PROJECT_NAME := himem_test + +include $(IDF_PATH)/make/project.mk + diff --git a/examples/system/himem/README.md b/examples/system/himem/README.md new file mode 100644 index 0000000000..700df059f1 --- /dev/null +++ b/examples/system/himem/README.md @@ -0,0 +1,5 @@ +# Example: himem + +This test tests the upper memory of an ESP32 system with 8MiB of PSRAM +attached. Note that this (obviously) will not work unless the ESP32 +you're running this on actually does have 8MiB pf PSRAM. diff --git a/examples/system/himem/main/CMakeLists.txt b/examples/system/himem/main/CMakeLists.txt new file mode 100644 index 0000000000..a875dec50c --- /dev/null +++ b/examples/system/himem/main/CMakeLists.txt @@ -0,0 +1,4 @@ +set(COMPONENT_SRCS "himem_test_main.c") +set(COMPONENT_ADD_INCLUDEDIRS ".") + +register_component() diff --git a/examples/system/himem/main/component.mk b/examples/system/himem/main/component.mk new file mode 100644 index 0000000000..44bd2b5273 --- /dev/null +++ b/examples/system/himem/main/component.mk @@ -0,0 +1,3 @@ +# +# Main Makefile. This is basically the same as a component makefile. +# diff --git a/examples/system/himem/main/himem_test_main.c b/examples/system/himem/main/himem_test_main.c new file mode 100644 index 0000000000..c5f8c97a2a --- /dev/null +++ b/examples/system/himem/main/himem_test_main.c @@ -0,0 +1,98 @@ +/* Himem API example + + This example code is in the Public Domain (or CC0 licensed, at your option.) + + Unless required by applicable law or agreed to in writing, this + software is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR + CONDITIONS OF ANY KIND, either express or implied. +*/ +#include +#include +#include +#include "freertos/FreeRTOS.h" +#include "freertos/task.h" +#include "freertos/queue.h" +#include "esp_system.h" +#include "nvs_flash.h" +#include "esp_heap_caps.h" +#include "esp_spiram.h" +#include "rom/cache.h" +#include "sdkconfig.h" +#include "esp_himem.h" + + +//Fill memory with pseudo-random data generated from the given seed. +//Fills the memory in 32-bit words for speed. +static void fill_mem_seed(int seed, void *mem, int len) +{ + uint32_t *p = (uint32_t *)mem; + unsigned int rseed = seed ^ 0xa5a5a5a5; + for (int i = 0; i < len / 4; i++) { + *p++ = rand_r(&rseed); + } +} + +//Check the memory filled by fill_mem_seed. Returns true if the data matches the data +//that fill_mem_seed wrote (when given the same seed). +//Returns true if there's a match, false when the region differs from what should be there. +static bool check_mem_seed(int seed, void *mem, int len, int phys_addr) +{ + uint32_t *p = (uint32_t *)mem; + unsigned int rseed = seed ^ 0xa5a5a5a5; + for (int i = 0; i < len / 4; i++) { + uint32_t ex = rand_r(&rseed); + if (ex != *p) { + printf("check_mem_seed: %x has 0x%08x expected 0x%08x\n", phys_addr+((char*)p-(char*)mem), *p, ex); + return false; + } + p++; + } + return true; +} + +//Allocate a himem region, fill it with data, check it and release it. +static bool test_region(int check_size, int seed) +{ + esp_himem_handle_t mh; //Handle for the address space we're using + esp_himem_rangehandle_t rh; //Handle for the actual RAM. + bool ret = true; + + //Allocate the memory we're going to check. + ESP_ERROR_CHECK(esp_himem_alloc(check_size, &mh)); + //Allocate a block of address range + ESP_ERROR_CHECK(esp_himem_alloc_map_range(ESP_HIMEM_BLKSZ, &rh)); + for (int i = 0; i < check_size; i += ESP_HIMEM_BLKSZ) { + uint32_t *ptr = NULL; + //Map in block, write pseudo-random data, unmap block. + ESP_ERROR_CHECK(esp_himem_map(mh, rh, i, 0, ESP_HIMEM_BLKSZ, 0, (void**)&ptr)); + fill_mem_seed(i ^ seed, ptr, ESP_HIMEM_BLKSZ); // + ESP_ERROR_CHECK(esp_himem_unmap(rh, ptr, ESP_HIMEM_BLKSZ)); + } + vTaskDelay(5); //give the OS some time to do things so the task watchdog doesn't bark + for (int i = 0; i < check_size; i += ESP_HIMEM_BLKSZ) { + uint32_t *ptr; + //Map in block, check against earlier written pseudo-random data, unmap block. + ESP_ERROR_CHECK(esp_himem_map(mh, rh, i, 0, ESP_HIMEM_BLKSZ, 0, (void**)&ptr)); + if (!check_mem_seed(i ^ seed, ptr, ESP_HIMEM_BLKSZ, i)) { + printf("Error in block %d\n", i / ESP_HIMEM_BLKSZ); + ret = false; + } + ESP_ERROR_CHECK(esp_himem_unmap(rh, ptr, ESP_HIMEM_BLKSZ)); + if (!ret) break; //don't check rest of blocks if error occurred + } + //Okay, all done! + ESP_ERROR_CHECK(esp_himem_free(mh)); + ESP_ERROR_CHECK(esp_himem_free_map_range(rh)); + return ret; +} + + +void app_main() +{ + size_t memcnt=esp_himem_get_phys_size(); + size_t memfree=esp_himem_get_free_size(); + printf("Himem has %dKiB of memory, %dKiB of which is free. Testing the free memory...\n", (int)memcnt/1024, (int)memfree/1024); + assert(test_region(memfree, 0xaaaa)); + printf("Done!\n"); +} + diff --git a/examples/system/himem/sdkconfig.defaults b/examples/system/himem/sdkconfig.defaults new file mode 100644 index 0000000000..b33372a2ce --- /dev/null +++ b/examples/system/himem/sdkconfig.defaults @@ -0,0 +1,12 @@ +CONFIG_SPIRAM_SUPPORT=y +CONFIG_SPIRAM_BOOT_INIT=y +CONFIG_SPIRAM_IGNORE_NOTFOUND= +CONFIG_SPIRAM_USE_MALLOC=y +CONFIG_SPIRAM_TYPE_AUTO=y +CONFIG_SPIRAM_SIZE=-1 +CONFIG_SPIRAM_SPEED_40M=y +CONFIG_SPIRAM_MEMTEST=y +CONFIG_SPIRAM_CACHE_WORKAROUND=y +CONFIG_SPIRAM_BANKSWITCH_ENABLE=y +CONFIG_SPIRAM_BANKSWITCH_RESERVE=4 + diff --git a/tools/unit-test-app/configs/psram b/tools/unit-test-app/configs/psram index 4575118a4e..03380b2d4b 100644 --- a/tools/unit-test-app/configs/psram +++ b/tools/unit-test-app/configs/psram @@ -1,2 +1,3 @@ TEST_EXCLUDE_COMPONENTS=libsodium bt app_update -CONFIG_SPIRAM_SUPPORT=y \ No newline at end of file +CONFIG_SPIRAM_SUPPORT=y +CONFIG_SPIRAM_BANKSWITCH_ENABLE=n diff --git a/tools/unit-test-app/configs/psram_8m b/tools/unit-test-app/configs/psram_8m new file mode 100644 index 0000000000..d28e58702a --- /dev/null +++ b/tools/unit-test-app/configs/psram_8m @@ -0,0 +1,4 @@ +TEST_COMPONENTS=esp32 +CONFIG_SPIRAM_SUPPORT=y +CONFIG_SPIRAM_BANKSWITCH_ENABLE=y +CONFIG_SPIRAM_BANKSWITCH_RESERVE=8 diff --git a/tools/unit-test-app/tools/ConfigDependency.yml b/tools/unit-test-app/tools/ConfigDependency.yml index 3af3e565b5..f7b265bc2a 100644 --- a/tools/unit-test-app/tools/ConfigDependency.yml +++ b/tools/unit-test-app/tools/ConfigDependency.yml @@ -1 +1,2 @@ -"psram": "CONFIG_SPIRAM_SUPPORT=y" +"psram": '{CONFIG_SPIRAM_SUPPORT=y} and not {CONFIG_SPIRAM_BANKSWITCH_ENABLE=y}' +"8Mpsram": "CONFIG_SPIRAM_BANKSWITCH_ENABLE=y" -- 2.40.0