cpu/esp8266: add LoadStoreError exception handler
Usually, the access to the IROM (flash) memory requires 32-bit word aligned reads. Attempts to access data in the IROM (flash) memory less than 32 bits in size triggers a LoadStoreError exception. With the exception handler from esp-open-rtos it becomes possible to access data in IROM (flash) with a size of less than 32 bits and thus to place .rodata sections in the IROM (flash).
This commit is contained in:
parent
b17070fbf1
commit
de91b8dc88
312
cpu/esp_common/vendor/xtensa/xtensa_vectors.S
vendored
312
cpu/esp_common/vendor/xtensa/xtensa_vectors.S
vendored
@ -485,12 +485,324 @@ User Exception (including Level 1 Interrupt from user mode).
|
||||
|
||||
_UserExceptionVector:
|
||||
|
||||
#ifdef MCU_ESP8266
|
||||
wsr a0, EXCSAVE_1 /* preserve a0 */
|
||||
j _UserExceptionTrampoline /* jump to handler trampoline */
|
||||
#else
|
||||
wsr a0, EXCSAVE_1 /* preserve a0 */
|
||||
call0 _xt_user_exc /* user exception handler */
|
||||
/* never returns here - call0 is used as a jump (see note at top) */
|
||||
#endif
|
||||
|
||||
.end literal_prefix
|
||||
|
||||
#ifdef MCU_ESP8266
|
||||
/*************************** LoadStoreError Handler BEGIN ********************/
|
||||
/*
|
||||
* PLEASE NOTE: The code between "LoadStoreError Handler BEGIN" and
|
||||
* "LoadStoreError Handler END" markers was extracted from esp-open-rtos. It is
|
||||
* under the following copyright:
|
||||
*
|
||||
* Original vector contents Copyright (C) 2014-2015 Espressif Systems
|
||||
* Additions Copyright (C) Superhouse Automation Pty Ltd and Angus Gratton
|
||||
* BSD Licensed as described in the file LICENSE
|
||||
*
|
||||
* Usually, the access to the IROM (flash) memory requires 32-bit word aligned
|
||||
* reads. Attempts to access data in the IROM (flash) memory less than 32 bits
|
||||
* in size triggers a LoadStoreError exception. Therefore, it is not possible to
|
||||
* place .rodata sections in IROM (flash). Rather, .rodata sections have to
|
||||
* be placed in RAM. With the exception handler from esp-open-rtos it becomes
|
||||
* possible to access data in IROM (flash) with a size of less than 32 bits
|
||||
* and thus to place .rodata sections in the IROM (flash).
|
||||
*/
|
||||
|
||||
#define CAUSE_LOADSTORE 3
|
||||
#define fatal_exception_handler _xt_user_exc
|
||||
|
||||
/* LoadStoreError handler stack */
|
||||
|
||||
.section .bss
|
||||
.balign 16
|
||||
|
||||
_LoadStoreErrorHandlerStack:
|
||||
.word 0 # a0
|
||||
.word 0 # (unused)
|
||||
.word 0 # a2
|
||||
.word 0 # a3
|
||||
.word 0 # a4
|
||||
|
||||
/* LoadStoreError Trampoline */
|
||||
|
||||
.section .UserExceptionTrampoline.text, "x"
|
||||
.literal_position
|
||||
.balign 4
|
||||
|
||||
_UserExceptionTrampoline:
|
||||
|
||||
wsr a1, EXCSAVE_2 /* preserve a1 */
|
||||
#ifdef MCU_ESP8266
|
||||
rsr a1, exccause
|
||||
beqi a1, CAUSE_LOADSTORE, _LoadStoreErrorHandler
|
||||
#endif
|
||||
rsr a1, EXCSAVE_2 /* restore a1 */
|
||||
call0 _xt_user_exc /* user exception handler */
|
||||
/* never returns here - call0 is used as a jump (see note at top) */
|
||||
|
||||
/*
|
||||
* Xtensa "Load/Store Exception" handler:
|
||||
* Completes L8/L16 load instructions from Instruction address space,
|
||||
* for which the architecture only supports 32-bit reads.
|
||||
*
|
||||
* Called from UserExceptionVector if EXCCAUSE is LoadStoreErrorCause
|
||||
*
|
||||
* (Fast path (no branches) is for L8UI)
|
||||
*/
|
||||
.literal_position
|
||||
.balign 4
|
||||
.type LoadStoreErrorHandler, @function
|
||||
|
||||
_LoadStoreErrorHandler:
|
||||
|
||||
rsr a1, EXCSAVE_2 /* restore a1 */
|
||||
wsr a1, EXCSAVE_1 /* save it to excsave1 */
|
||||
/* Registers are saved in the address corresponding to their register
|
||||
* number times 4. This allows a quick and easy mapping later on when
|
||||
* needing to store the value to a particular register number. */
|
||||
movi sp, _LoadStoreErrorHandlerStack
|
||||
s32i a0, sp, 0
|
||||
s32i a2, sp, 0x08
|
||||
s32i a3, sp, 0x0c
|
||||
s32i a4, sp, 0x10
|
||||
rsr a0, sar # Save SAR in a0 to restore later
|
||||
|
||||
/* Examine the opcode which generated the exception */
|
||||
/* Note: Instructions are in this order to avoid pipeline stalls. */
|
||||
rsr a2, epc1
|
||||
movi a3, ~3
|
||||
ssa8l a2 # sar is now correct shift for aligned read
|
||||
and a2, a2, a3 # a2 now 4-byte aligned address of instruction
|
||||
l32i a4, a2, 0
|
||||
l32i a2, a2, 4
|
||||
movi a3, 0x00700F # opcode mask for l8ui/l16si/l16ui
|
||||
src a2, a2, a4 # a2 now instruction that failed
|
||||
and a3, a2, a3 # a3 is masked instruction
|
||||
bnei a3, 0x000002, .LSE_check_l16
|
||||
|
||||
/* Note: At this point, opcode could technically be one of two things:
|
||||
* xx0xx2 (L8UI)
|
||||
* xx8xx2 (Reserved (invalid) opcode)
|
||||
* It is assumed that we'll never get to this point from an illegal
|
||||
* opcode, so we don't bother to check for that case and presume this
|
||||
* is always an L8UI. */
|
||||
|
||||
movi a4, ~3
|
||||
rsr a3, excvaddr # read faulting address
|
||||
and a4, a3, a4 # a4 now word aligned read address
|
||||
|
||||
l32i a4, a4, 0 # perform the actual read
|
||||
ssa8l a3 # sar is now shift to extract a3's byte
|
||||
srl a3, a4 # shift right correct distance
|
||||
extui a4, a3, 0, 8 # mask off bits we need for an l8
|
||||
|
||||
.LSE_post_fetch:
|
||||
/* We jump back here after either the L8UI or the L16*I routines do the
|
||||
* necessary work to read the value from memory.
|
||||
* At this point, a2 holds the faulting instruction and a4 holds the
|
||||
* correctly read value.
|
||||
|
||||
* Restore original SAR value (saved in a0) and update EPC so we'll
|
||||
* return back to the instruction following the one we just emulated */
|
||||
|
||||
/* Note: Instructions are in this order to avoid pipeline stalls */
|
||||
rsr a3, epc1
|
||||
wsr a0, sar
|
||||
addi a3, a3, 0x3
|
||||
wsr a3, epc1
|
||||
|
||||
/* Stupid opcode tricks: The jumptable we use later on needs 16 bytes
|
||||
* per entry (so we can avoid a second jump by just doing a RFE inside
|
||||
* each entry). Unfortunately, however, Xtensa doesn't have an addx16
|
||||
* operation to make that easy for us. Luckily, all of the faulting
|
||||
* opcodes we're processing are guaranteed to have bit 3 be zero, which
|
||||
* means if we just shift the register bits of the opcode down by 3
|
||||
* instead of 4, we will get the register number multiplied by 2. This
|
||||
* combined with an addx8 will give us an effective addx16 without
|
||||
* needing any extra shift operations. */
|
||||
extui a2, a2, 3, 5 # a2 is now destination register 0-15 times 2
|
||||
|
||||
bgei a2, 10, .LSE_assign_reg # a5..a15 use jumptable
|
||||
beqi a2, 2, .LSE_assign_a1 # a1 uses a special routine
|
||||
|
||||
/* We're storing into a0 or a2..a4, which are all saved in our "stack"
|
||||
* area. Calculate the correct address and stick the value in there,
|
||||
* then just do our normal restore and RFE (no jumps required, which
|
||||
* actually makes a0..a4 substantially faster). */
|
||||
addx2 a2, a2, sp
|
||||
s32i a4, a2, 0
|
||||
|
||||
/* Restore all regs and return */
|
||||
l32i a0, sp, 0
|
||||
l32i a2, sp, 0x08
|
||||
l32i a3, sp, 0x0c
|
||||
l32i a4, sp, 0x10
|
||||
rsr a1, excsave1 # restore a1 saved by UserExceptionVector
|
||||
rfe
|
||||
|
||||
.LSE_assign_reg:
|
||||
/* At this point, a2 contains the register number times 2, a4 is the
|
||||
* read value. */
|
||||
|
||||
/* Calculate the jumptable address, and restore all regs except a2 and
|
||||
* a4 so we have less to do after jumping. */
|
||||
/* Note: Instructions are in this order to avoid pipeline stalls. */
|
||||
movi a3, .LSE_jumptable_base
|
||||
l32i a0, sp, 0
|
||||
addx8 a2, a2, a3 # a2 is now the address to jump to
|
||||
l32i a3, sp, 0x0c
|
||||
|
||||
jx a2
|
||||
|
||||
.balign 4
|
||||
.LSE_check_l16:
|
||||
/* At this point, a2 contains the opcode, a3 is masked opcode */
|
||||
movi a4, 0x001002 # l16si or l16ui opcode after masking
|
||||
bne a3, a4, .LSE_wrong_opcode
|
||||
|
||||
/* Note: At this point, the opcode could be one of two things:
|
||||
* xx1xx2 (L16UI)
|
||||
* xx9xx2 (L16SI)
|
||||
* Both of these we can handle. */
|
||||
|
||||
movi a4, ~3
|
||||
rsr a3, excvaddr # read faulting address
|
||||
and a4, a3, a4 # a4 now word aligned read address
|
||||
|
||||
l32i a4, a4, 0 # perform the actual read
|
||||
ssa8l a3 # sar is now shift to extract a3's bytes
|
||||
srl a3, a4 # shift right correct distance
|
||||
extui a4, a3, 0, 16 # mask off bits we need for an l16
|
||||
|
||||
bbci a2, 15, .LSE_post_fetch # Not a signed op
|
||||
bbci a4, 15, .LSE_post_fetch # Value does not need sign-extension
|
||||
|
||||
movi a3, 0xFFFF0000
|
||||
or a4, a3, a4 # set 32-bit sign bits
|
||||
j .LSE_post_fetch
|
||||
|
||||
.LSE_wrong_opcode:
|
||||
/* If we got here it's not an opcode we can try to fix, so bomb out.
|
||||
* Restore registers so any dump the fatal exception routine produces
|
||||
* will have correct values */
|
||||
wsr a0, sar
|
||||
l32i a0, sp, 0
|
||||
/*l32i a2, sp, 0x08*/
|
||||
l32i a3, sp, 0x0c
|
||||
l32i a4, sp, 0x10
|
||||
rsr a1, excsave1
|
||||
mov a2, a1
|
||||
movi a3, 0
|
||||
call0 fatal_exception_handler
|
||||
|
||||
.balign 4
|
||||
.LSE_assign_a1:
|
||||
/* a1 is saved in excsave1, so just update that with the value, */
|
||||
wsr a4, excsave1
|
||||
/* Then restore all regs and return */
|
||||
l32i a0, sp, 0
|
||||
l32i a2, sp, 0x08
|
||||
l32i a3, sp, 0x0c
|
||||
l32i a4, sp, 0x10
|
||||
rsr a1, excsave1
|
||||
rfe
|
||||
|
||||
.balign 4
|
||||
.LSE_jumptable:
|
||||
/* The first 5 entries (80 bytes) of this table are unused (registers
|
||||
* a0..a4 are handled separately above). Rather than have a whole bunch
|
||||
* of wasted space, we just pretend that the table starts 80 bytes
|
||||
* earlier in memory. */
|
||||
.set .LSE_jumptable_base, .LSE_jumptable - (16 * 5)
|
||||
|
||||
.org .LSE_jumptable_base + (16 * 5)
|
||||
mov a5, a4
|
||||
l32i a2, sp, 0x08
|
||||
l32i a4, sp, 0x10
|
||||
rsr a1, excsave1
|
||||
rfe
|
||||
|
||||
.org .LSE_jumptable_base + (16 * 6)
|
||||
mov a6, a4
|
||||
l32i a2, sp, 0x08
|
||||
l32i a4, sp, 0x10
|
||||
rsr a1, excsave1
|
||||
rfe
|
||||
|
||||
.org .LSE_jumptable_base + (16 * 7)
|
||||
mov a7, a4
|
||||
l32i a2, sp, 0x08
|
||||
l32i a4, sp, 0x10
|
||||
rsr a1, excsave1
|
||||
rfe
|
||||
|
||||
.org .LSE_jumptable_base + (16 * 8)
|
||||
mov a8, a4
|
||||
l32i a2, sp, 0x08
|
||||
l32i a4, sp, 0x10
|
||||
rsr a1, excsave1
|
||||
rfe
|
||||
|
||||
.org .LSE_jumptable_base + (16 * 9)
|
||||
mov a9, a4
|
||||
l32i a2, sp, 0x08
|
||||
l32i a4, sp, 0x10
|
||||
rsr a1, excsave1
|
||||
rfe
|
||||
|
||||
.org .LSE_jumptable_base + (16 * 10)
|
||||
mov a10, a4
|
||||
l32i a2, sp, 0x08
|
||||
l32i a4, sp, 0x10
|
||||
rsr a1, excsave1
|
||||
rfe
|
||||
|
||||
.org .LSE_jumptable_base + (16 * 11)
|
||||
mov a11, a4
|
||||
l32i a2, sp, 0x08
|
||||
l32i a4, sp, 0x10
|
||||
rsr a1, excsave1
|
||||
rfe
|
||||
|
||||
.org .LSE_jumptable_base + (16 * 12)
|
||||
mov a12, a4
|
||||
l32i a2, sp, 0x08
|
||||
l32i a4, sp, 0x10
|
||||
rsr a1, excsave1
|
||||
rfe
|
||||
|
||||
.org .LSE_jumptable_base + (16 * 13)
|
||||
mov a13, a4
|
||||
l32i a2, sp, 0x08
|
||||
l32i a4, sp, 0x10
|
||||
rsr a1, excsave1
|
||||
rfe
|
||||
|
||||
.org .LSE_jumptable_base + (16 * 14)
|
||||
mov a14, a4
|
||||
l32i a2, sp, 0x08
|
||||
l32i a4, sp, 0x10
|
||||
rsr a1, excsave1
|
||||
rfe
|
||||
|
||||
.org .LSE_jumptable_base + (16 * 15)
|
||||
mov a15, a4
|
||||
l32i a2, sp, 0x08
|
||||
l32i a4, sp, 0x10
|
||||
rsr a1, excsave1
|
||||
rfe
|
||||
|
||||
/*************************** LoadStoreError Handler END **********************/
|
||||
#endif
|
||||
|
||||
/*
|
||||
--------------------------------------------------------------------------------
|
||||
Insert some waypoints for jumping beyond the signed 8-bit range of
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user