Skip to content

Commit

Permalink
eboot: .RODATA, upstream uzlib, move CRC, save 112 bytes (#7844)
Browse files Browse the repository at this point in the history
RODATA can be copied automatically by the bootrom, so no reason not to
allow its use for strings and constants in eboot.c

Revert to pfalcon's original uzlib since the single patch to remove
RODATA is not required.

Rationalize eboot.ld linker script, clean up BSS and init it in code.

Saves 112 bytes of space in the bootloader sector by removing the
extra code associated with literal loads.

* Move CRC out of bootload sector

We added protection to only erase the bootload sector when flashing an
image when the new sector != the old sector.  This was intended to
minimize the chance of bricking (i.e. if there was a powerfail during
flashing of the boot sector the chip would be dead).

Unfortunately, by placing the CRC inside the eboot sector *every*
application will have a unique eboot sector (due to the crc/len), so
this protection doesn't work.

Move the CRC into the first 8 bytes of IROM itself.  This frees up extra
space in the boot sector and ensures that eboot won't be reflashed
unless there really is an eboot change.
  • Loading branch information
earlephilhower committed Feb 8, 2021
1 parent 6c564c2 commit 07b4c09
Show file tree
Hide file tree
Showing 9 changed files with 67 additions and 108 deletions.
2 changes: 1 addition & 1 deletion .gitmodules
Original file line number Diff line number Diff line change
Expand Up @@ -24,4 +24,4 @@
url = https://github.com/arduino-libraries/Ethernet.git
[submodule "tools/sdk/uzlib"]
path = tools/sdk/uzlib
url = https://github.com/earlephilhower/uzlib.git
url = https://github.com/pfalcon/uzlib.git
8 changes: 4 additions & 4 deletions bootloaders/eboot/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -40,17 +40,17 @@ APP_FW := eboot.bin

all: $(APP_OUT)

tinflate.o: $(UZLIB_PATH)/tinflate.c $(UZLIB_PATH)/uzlib.h $(UZLIB_PATH)/uzlib_conf.h
tinflate.o: $(UZLIB_PATH)/tinflate.c $(UZLIB_PATH)/uzlib.h $(UZLIB_PATH)/uzlib_conf.h Makefile
$(CC) $(CFLAGS) -c -o tinflate.o $(UZLIB_PATH)/tinflate.c

tinfgzip.o: $(UZLIB_PATH)/tinfgzip.c $(UZLIB_PATH)/uzlib.h $(UZLIB_PATH)/uzlib_conf.h
tinfgzip.o: $(UZLIB_PATH)/tinfgzip.c $(UZLIB_PATH)/uzlib.h $(UZLIB_PATH)/uzlib_conf.h Makefile
$(CC) $(CFLAGS) -c -o tinfgzip.o $(UZLIB_PATH)/tinfgzip.c

$(APP_AR): $(TARGET_OBJ_PATHS) tinflate.o tinfgzip.o
$(APP_AR): $(TARGET_OBJ_PATHS) tinflate.o tinfgzip.o Makefile
$(AR) cru $@ $^

$(APP_OUT): $(APP_AR) eboot.ld | Makefile
$(LD) $(LD_SCRIPT) $(LDFLAGS) -Wl,--start-group -Wl,--whole-archive $(APP_AR) -Wl,--end-group -o $@
$(LD) $(LD_SCRIPT) $(LDFLAGS) -Wl,--start-group -Wl,--sort-common $(APP_AR) -Wl,--end-group -o $@

clean:
rm -f *.o
Expand Down
42 changes: 17 additions & 25 deletions bootloaders/eboot/eboot.c
Original file line number Diff line number Diff line change
Expand Up @@ -14,28 +14,19 @@
#include "eboot_command.h"
#include <uzlib.h>

extern unsigned char _gzip_dict;

#define SWRST do { (*((volatile uint32_t*) 0x60000700)) |= 0x80000000; } while(0);

extern void ets_wdt_enable(void);
extern void ets_wdt_disable(void);

// Converts bit of a string into a uint32
#define S(a,b,c,d) ( (((uint32_t)a) & 0xff) | (((uint32_t)b) << 8) | (((uint32_t)c) << 16) | (((uint32_t)d)<<24) )

int print_version(const uint32_t flash_addr)
{
uint32_t ver;
if (SPIRead(flash_addr + APP_START_OFFSET + sizeof(image_header_t) + sizeof(section_header_t), &ver, sizeof(ver))) {
return 1;
}
// We don't have BSS and can't print from flash, so build up string
// 4 chars at a time. Smaller code than byte-wise assignment.
uint32_t fmt[2];
fmt[0] = S('v', '%', '0', '8');
fmt[1] = S('x', '\n', 0, 0);
ets_printf((const char*) fmt, ver);
ets_printf("v%08x\n", ver);
return 0;
}

Expand Down Expand Up @@ -222,6 +213,16 @@ int main()
bool clear_cmd = false;
struct eboot_command cmd;

// BSS init commented out for now to save space. If any static variables set
// to 0 are used, need to uncomment it or else the BSS will not be cleared and
// the static vars will power on with random values.
#if 0
// Clear BSS ourselves, we don't have handy C runtime
extern char _bss_start;
extern char _bss_end;
ets_bzero(&_bss_start, &_bss_end - &_bss_start);
#endif

print_version(0);

if (eboot_command_read(&cmd) == 0) {
Expand All @@ -236,32 +237,26 @@ int main()
}

if (cmd.action == ACTION_COPY_RAW) {
uint32_t cp = S('c', 'p', ':', 0);
ets_printf((const char *)&cp);
ets_printf("cp:");

ets_wdt_disable();
res = copy_raw(cmd.args[0], cmd.args[1], cmd.args[2], false);
ets_wdt_enable();

cp = S('0' + res, '\n', 0, 0 );
ets_printf((const char *)&cp);
ets_printf("%d\n", res);
#if 0
//devyte: this verify step below (cmp:) only works when the end of copy operation above does not overwrite the
//beginning of the image in the empty area, see #7458. Disabling for now.
//TODO: replace the below verify with hash type, crc, or similar.
// Verify the copy
uint32_t v[2];
v[0] = S('c', 'm', 'p', ':');
v[1] = 0;
ets_printf(const char *)v);
ets_printf("cmp:");
if (res == 0) {
ets_wdt_disable();
res = copy_raw(cmd.args[0], cmd.args[1], cmd.args[2], true);
ets_wdt_enable();
}

cp = S('0' + res, '\n', 0, 0 );
ets_printf((const char *)&cp);
ets_printf("%d\n", res);
#endif
if (res == 0) {
cmd.action = ACTION_LOAD_APP;
Expand All @@ -274,13 +269,10 @@ int main()
}

if (cmd.action == ACTION_LOAD_APP) {
ets_putc('l'); ets_putc('d'); ets_putc('\n');
ets_printf("ld\n");
res = load_app_from_flash_raw(cmd.args[0]);
// We will get to this only on load fail
uint32_t e[2];
e[0] = S('e', ':', '0' + res, '\n' );
e[1] = 0;
ets_printf((const char*)e);
ets_printf("e:%d\n", res);
}

if (res) {
Expand Down
Binary file modified bootloaders/eboot/eboot.elf
Binary file not shown.
90 changes: 24 additions & 66 deletions bootloaders/eboot/eboot.ld
Original file line number Diff line number Diff line change
Expand Up @@ -42,53 +42,13 @@ PROVIDE(_memmap_cacheattr_reset = _memmap_cacheattr_wb_trapnull);
SECTIONS
{

.dport0.rodata : ALIGN(4)
{
_dport0_rodata_start = ABSOLUTE(.);
*(.dport0.rodata)
*(.dport.rodata)
_dport0_rodata_end = ABSOLUTE(.);
} >dport0_0_seg :dport0_0_phdr

.dport0.literal : ALIGN(4)
{
_dport0_literal_start = ABSOLUTE(.);
*(.dport0.literal)
*(.dport.literal)
_dport0_literal_end = ABSOLUTE(.);
} >dport0_0_seg :dport0_0_phdr

.dport0.data : ALIGN(4)
{
_dport0_data_start = ABSOLUTE(.);
*(.dport0.data)
*(.dport.data)
_dport0_data_end = ABSOLUTE(.);
} >dport0_0_seg :dport0_0_phdr

.data : ALIGN(4)
.globals : ALIGN(4)
{
*(COMMON) /* Global vars */
. = ALIGN(4);
_heap_start = ABSOLUTE(.);
/* _stack_sentry = ALIGN(0x8); */
} >dram0_0_seg :dram0_0_bss_phdr
/* __stack = 0x3ffc8000; */

.text : ALIGN(4)
.data : ALIGN(4)
{
_stext = .;
_text_start = ABSOLUTE(.);
*(.entry.text)
*(.init.literal)
*(.init)
*(.literal .text .literal.* .text.* .stub .gnu.warning .gnu.linkonce.literal.* .gnu.linkonce.t.*.literal .gnu.linkonce.t.*)
*(.fini.literal)
*(.fini)
*(.gnu.version)
_text_end = ABSOLUTE(.);
_etext = .;
. = ALIGN (8);
_data_start = ABSOLUTE(.);
*(.data)
*(.data.*)
Expand All @@ -102,7 +62,10 @@ SECTIONS
*(.gnu.linkonce.s2.*)
*(.jcr)
_data_end = ABSOLUTE(.);
. = ALIGN (8);
} >dram0_0_seg :dram0_0_bss_phdr

.rodata : ALIGN(4)
{
_rodata_start = ABSOLUTE(.);
*(.rodata)
*(.rodata.*)
Expand Down Expand Up @@ -131,14 +94,11 @@ SECTIONS
*(.xt_except_desc_end)
*(.dynamic)
*(.gnu.version_d)
. = ALIGN(4); /* this table MUST be 4-byte aligned */
_bss_table_start = ABSOLUTE(.);
LONG(_bss_start)
LONG(_bss_end)
_bss_table_end = ABSOLUTE(.);
_rodata_end = ABSOLUTE(.);
} >dram0_0_seg :dram0_0_bss_phdr

. = ALIGN (8);
.bss : ALIGN(4)
{
_bss_start = ABSOLUTE(.);
*(.dynsbss)
*(.sbss)
Expand All @@ -152,26 +112,24 @@ SECTIONS
*(.bss)
*(.bss.*)
*(.gnu.linkonce.b.*)
. = ALIGN (8);
_bss_end = ABSOLUTE(.);
_free_space = 4096 - 17 - (. - _stext);
/*
The boot loader checksum must be before the CRC, which is written by elf2bin.py.
This leaves 16 bytes after the checksum for the CRC placed at the end of the
4096-byte sector. */
_cs_here = (ALIGN((. + 1), 16) == ALIGN(16)) ? (ALIGN(16) - 1) : (. + 0x0F);
} >dram0_0_seg :dram0_0_bss_phdr

/*
The filling (padding) and values for _crc_size and _crc_val are handled by
elf2bin.py. With this, we give values to the symbols without explicitly
assigning space. This avoids the linkers back *fill* operation that causes
trouble.

The CRC info is stored in last 8 bytes. */
_crc_size = _stext + 4096 - 8;
_crc_val = _stext + 4096 - 4;
ASSERT((4096 > (17 + (. - _stext))), "Error: No space for CS and CRC in bootloader sector.");
ASSERT((_crc_size > _cs_here), "Error: CRC must be located after CS.");
.text : ALIGN(4)
{
_stext = .;
_text_start = ABSOLUTE(.);
*(.entry.text)
*(.init.literal)
*(.init)
*(.literal .text .literal.* .text.* .stub .gnu.warning .gnu.linkonce.literal.* .gnu.linkonce.t.*.literal .gnu.linkonce.t.*)
*(.fini.literal)
*(.fini)
*(.gnu.version)
_text_end = ABSOLUTE(.);
_etext = .;
. = ALIGN (4); /* Ensure 32b alignment since this is written to IRAM */
} >iram1_0_seg :iram1_0_phdr

.lit4 : ALIGN(4)
Expand Down
18 changes: 10 additions & 8 deletions cores/esp8266/Esp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -444,22 +444,24 @@ bool EspClass::checkFlashConfig(bool needsEquals) {
return false;
}

// These are defined in the linker script, and filled in by the elf2bin.py util
extern "C" uint32_t __crc_len;
extern "C" uint32_t __crc_val;

bool EspClass::checkFlashCRC() {
// The CRC and total length are placed in extra space at the end of the 4K chunk
// of flash occupied by the bootloader. If the bootloader grows to >4K-8 bytes,
// we'll need to adjust this.
uint32_t flashsize = *((uint32_t*)(0x40200000 + 4088)); // Start of PROGMEM plus 4K-8
uint32_t flashcrc = *((uint32_t*)(0x40200000 + 4092)); // Start of PROGMEM plus 4K-4
// Dummy CRC fill
uint32_t z[2];
z[0] = z[1] = 0;

uint32_t firstPart = (uintptr_t)&__crc_len - 0x40200000; // How many bytes to check before the 1st CRC val

// Start the checksum
uint32_t crc = crc32((const void*)0x40200000, 4096-8, 0xffffffff);
uint32_t crc = crc32((const void*)0x40200000, firstPart, 0xffffffff);
// Pretend the 2 words of crc/len are zero to be idempotent
crc = crc32(z, 8, crc);
// Finish the CRC calculation over the rest of flash
crc = crc32((const void*)0x40201000, flashsize-4096, crc);
return crc == flashcrc;
crc = crc32((const void*)(0x40200000 + firstPart + 8), __crc_len - (firstPart + 8), crc);
return crc == __crc_val;
}


Expand Down
6 changes: 3 additions & 3 deletions tools/elf2bin.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,8 @@
ffreqb = { '40': 0, '26': 1, '20': 2, '80': 15 }
fsizeb = { '512K': 0, '256K': 1, '1M': 2, '2M': 3, '4M': 4, '8M': 8, '16M': 9 }

crcsize_offset = 4088
crcval_offset = 4092
crcsize_offset = 4096 + 16
crcval_offset = 4096 + 16 + 4

def get_elf_entry(elf, path):
p = subprocess.Popen([path + "/xtensa-lx106-elf-readelf", '-h', elf], stdout=subprocess.PIPE, universal_newlines=True )
Expand Down Expand Up @@ -188,7 +188,7 @@ def wrapper(**kwargs):

wrapper(
elf=args.eboot,
segments=[".text"],
segments=[".text", ".rodata"],
to_addr=4096
)

Expand Down
7 changes: 7 additions & 0 deletions tools/sdk/ld/eagle.app.v6.common.ld.h
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,13 @@ SECTIONS
.irom0.text : ALIGN(4)
{
_irom0_text_start = ABSOLUTE(.);

/* Stuff the CRC in well known symbols at a well known location */
__crc_len = ABSOLUTE(.);
LONG(0x00000000);
__crc_val = ABSOLUTE(.);
LONG(0x00000000);

*(.ver_number)
*.c.o(.literal*, .text*)
*.cpp.o(EXCLUDE_FILE (umm_malloc.cpp.o) .literal*, EXCLUDE_FILE (umm_malloc.cpp.o) .text*)
Expand Down
2 changes: 1 addition & 1 deletion tools/sdk/uzlib
Submodule uzlib updated 1 files
+2 −25 src/tinflate.c

11 comments on commit 07b4c09

@einglis
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm pretty certain that the change to the CRC placement in elf2bin.py has broken the segment checksum. I'll investigate a little more and hopefully propose a patch.

@earlephilhower
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@einglis Are you trying to fix a problem you're seeing somewhere? Can you explain why you need to fix it?

I believe you are technically correct with your observation, however the 1-byte checksum is ignored by eboot.c (the only thing to use the 2nd array with the invalid .text c'sum). Actually, since it's .text and already in ROM at the right address, eboot doesn't do anything at all with this section other than skip it.

The ROM only ever looks at the 1st structure (which contains eboot's sections), and that does have a proper c'sum. You can verify this by running esptool image_info.

You're free to propose a fix to elf2bin, but it's not going to actually change anything that I can see (since, again, the c'sum byte is completely ignored in the 2nd stage bootloader).

@einglis
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for the response, @earlephilhower. My application accepts uploaded binaries for upgrades (nothing magic there), but I wanted to avoid bricking devices by being a little more careful with their validation. I thought the segment checksums would be a quick-win here.

@earlephilhower
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Actually, the c'sum is used on boot not update/reflashing. The eboot copy command (for upgrade) just...copies, nothing more. It'll copy garbage if you ask it to.

You can always use signing to crypto-ensure updates are valid as received, but unless you expect the TCP stream (which is CRC protected with retry on mismatch) itself to corrupt data it feels like overkill.

@einglis
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm using the Update library, and while it does superficial checking of the supplied data (it checks the start byte, but I don't recall whether it checks the checkum), it only considers the boot image, not the main image that follows.

Hence - just like you describe for the boot process - it will accept pretty much anything for the main image. If that data is bad (eg, the user uploaded a duff file, not corrupt transmission, which I agree is unlikely), then the device becomes bricked on reboot.

I wanted my code to be a little more diligent about checking the validity of the uploaded data to avoid this case; specifically a good bootloader but damaged main image.

@earlephilhower
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

One point I was just corrected on: TCP stream data isn't c'sum'd, so if you want to guarantee the data hasn't changed in flight from your server to the device you should use TLS(https)...you should do that, anyway, since OTW there is no security whatsoever.

Use TLS or signed images, and you get as good a guarantee as you're gonna get that data wasn't corrupted in flight. After that, if you hit bitrot you're bricked anyway with the current setup. The bootloader can't do anything in the case of CRC mismatch other infinite loop at that point.

What you're describing sounds like you want a backup firmware and for the bootloader to c'sum the primary before start (adds 2-3 seconds+++ to boot) and on fail jump to the backup and pray that wasn't corrupted. That's not supported now and not going to work w/o some serious work on the core. You might want to drop Arduino and use the plain SDK and rboot instead, if that's a requirement.

@einglis
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Well, TCP checksums or not, I'm not concerned at this stage about corrupt transmission. And yes, your description of the backup firmware would be the gold standard.

I guess I'm trying to get a 'good enough' solution, by having the device do the validation of an image (that it assumes has been received correctly), before submitting it as a replacement for the one and only real main image. Received an image? Checksums good? Please upgrade. Checksums bad? Please discard.

One could clearly make the argument that if there is no corruption, and the first part of the binary is a valid bootloader, then it's pretty unlikely the remaining data is not a valid image. There's no disputing that. But equally, the binary file format contains mechanisms (the segment checksums) that can give the paranoid programmer that extra feeling of warmth when validated. But since 3.0.0, they're broken. Best case, they can't be used; worst case, code that was already doing what I describe now starts rejecting otherwise valid images.

Aside: since my initial report, I've looked more closely at what this change actually did, and I agree it's perfectly sensible, and concur with the motivation. I just think it's a shame to have collateral damage as a result. One possible solution would be to have the CRC disregard the very final byte (where the checksum is), and then calculate that segment checksum after the CRC has been inserted.

@einglis
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Addendum: how is it okay to stomp over program data anyway? I don't understand why putting the CRC where you now do doesn't break something (else).

@earlephilhower
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As I said, use TLS or signing. Either will give you 1000x better protection than a 1-byte checksum and ensure what goes into flash is what you sent.

You can also send in a PR to correct the calculation for .text, but you'd also want to redo eboot.c to actually look at it. And in case of failure, again I'm not sure what you can do other than brick given things as they are today.

Addendum: how is it okay to stomp over program data anyway? I don't understand why putting the CRC where you now do doesn't break something (else).

It's not stomping on anything. The linker has reserved those locations already, see the .ld files. They're in a well-known address so the core can use it for the CRC check call (see the examples).

@einglis
Copy link
Contributor

@einglis einglis commented on 07b4c09 Jan 14, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's not stomping on anything. The linker has reserved those locations already, see the .ld files.

Yeah, that's fair. Thanks.

@earlephilhower
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also, FYI, there is a built-in MD5 checksum if either your webserver or your app can provide it. Check the .setMD5() call out.

Please sign in to comment.