From cdb01676b14ddd69c9fdd04979a35e3f50a7fafc Mon Sep 17 00:00:00 2001 From: Joe Richey Date: Thu, 9 Apr 2020 01:52:05 -0700 Subject: [PATCH] bios: Add code to boot as Firmware ROM The new assembly files handle: - rom16.s: Jumping from reset, transitioning to 32-bit mode - rom32.s: Copying data from ROM to RAM, jumping to PVH entry point To place this code correctly, we add a new Program Header for the code and data that expect to be in ROM. See the comments in layout.ld for more information. We also place the 32-bit GDT in the ROM. This is mostly for convenience, as it lets us use the GDT directly from the ROM code without having to do any complex offset calculations. As laying out the code for a ROM makes the binary ~45% bigger, we gate building as a ROM behind an optional feature. Signed-off-by: Joe Richey --- Cargo.toml | 2 ++ layout.ld | 36 ++++++++++++++++++++++++++++++++++++ src/asm/mod.rs | 5 +++++ src/asm/rom16.s | 32 ++++++++++++++++++++++++++++++++ src/asm/rom32.s | 37 +++++++++++++++++++++++++++++++++++++ src/gdt.rs | 4 ++++ 6 files changed, 116 insertions(+) create mode 100644 src/asm/rom16.s create mode 100644 src/asm/rom32.s diff --git a/Cargo.toml b/Cargo.toml index c0b1ac4f..ee37d736 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -22,6 +22,8 @@ log-serial = [] # Log panics to serial output. Disabling this (without disabling log-serial) # gets you most of the code size reduction, without losing _all_ debugging. log-panic = ["log-serial"] +# Support builing the firmware as a BIOS ROM (i.e. starting in real mode). +rom = [] [dependencies] bitflags = "1.2" diff --git a/layout.ld b/layout.ld index 50b69672..5d198838 100644 --- a/layout.ld +++ b/layout.ld @@ -4,6 +4,7 @@ PHDRS { ram PT_LOAD FILEHDR PHDRS ; note PT_NOTE ; + rom PT_LOAD ; } /* Loaders like to put stuff in low memory (< 1M), so we don't use it. */ @@ -24,6 +25,7 @@ SECTIONS .text : { *(.text .text.*) } .text32 : { *(.text32) } .data : { *(.data .data.*) } + data_end = .; data_size = . - data_start; /* The BSS section isn't mapped from file data. It is just zeroed in RAM. */ @@ -38,6 +40,40 @@ SECTIONS . = ram_max - stack_size; .stack (NOLOAD) : { . += stack_size; } :NONE + /* When using the ROM, the entire firmware is loaded right below 4 GiB. Code + at file offset X, which expects to execute at (ram_min + X), is loaded at + address (four_gig - file_size + X). We use the linker script to compute + "offset", the difference between these two values. Note that the BSS and + Stack don't contribute to file size, we calculate from data_end. */ + four_gig = 1 << 32; + rom_code_size = SIZEOF(.rom32) + SIZEOF(.rom16); + offset = four_gig - data_end - rom_code_size - SIZEOF(.romdata); + /* QEMU requires that the ROM size be 64K-aligned. If the file ends after + the reset vector code, this is equivalent to "offset" being 64K-aligned, + because ram_min and four_gig are both 64K-algined. */ + offset = -ALIGN(-offset, 64K); /* = ALIGN_DOWN(offset, 64K) */ + + /* With the ROM, the data at [rom_data_start, rom_data_end) expects to + execute in RAM, so we will need to copy it to [data_start, data_end). */ + rom_data_start = data_start + offset; + rom_data_end = data_end + offset; + + /* This code/data expects to be in the ROM memory region, so we set the + virtual adddress accordingly. We place the ROM data right after the RAM + data (in the file) and place the ROM code at the very end of the region. + This is necessary for the code to be correct: + - If we placed all the code/data after rom_data_end, the reset vector + code wouldn't be right before four_gig. + - If we placed all the code/data at the end, the linker would still + place the ROM data right after the RAM data in the file, making our + virtual addresses inconsistent with the file layout. */ + . = rom_data_end; + .romdata : { *(.romdata) } :rom + /* This gap can't be optimized away, as these sections use the same PHDR. */ + . = four_gig - rom_code_size; + .rom32 : { *(.rom32) } + .rom16 : { KEEP(*(.rom16)) } + /* Match edk2's GccBase.lds DISCARD section */ /DISCARD/ : { *(.note.GNU-stack) diff --git a/src/asm/mod.rs b/src/asm/mod.rs index ab08edf0..875c93c3 100644 --- a/src/asm/mod.rs +++ b/src/asm/mod.rs @@ -1,2 +1,7 @@ global_asm!(include_str!("ram32.s")); global_asm!(include_str!("ram64.s")); + +#[cfg(feature = "rom")] +global_asm!(include_str!("rom16.s")); +#[cfg(feature = "rom")] +global_asm!(include_str!("rom32.s")); diff --git a/src/asm/rom16.s b/src/asm/rom16.s new file mode 100644 index 00000000..1c68895d --- /dev/null +++ b/src/asm/rom16.s @@ -0,0 +1,32 @@ +.section .rom16, "ax" +.code16 + +rom16_start: + # Order of instructions from Intel SDM 9.9.1 "Switching to Protected Mode" + # Step 1: Disable interrupts + cli + + # Step 2: Load the GDT + # We are currently in 16-bit real mode. To enter 32-bit protected mode, we + # need to load 32-bit code/data segments into our GDT. The gdt32 in ROM is + # at too high of an address (right below 4G) for the data segment to reach. + # + # But we can load gdt32 via the code segement. After a reset, the base of + # the CS register is 0xFFFF0000, which means we can access gdt32. + movw $(GDT32_PTR - 0xFFFF0000), %bx + lgdtl %cs:(%bx) + + # Step 3: Set CRO.PE (Protected Mode Enable) + movl %cr0, %eax + orb $0b00000001, %al # Set bit 0 + movl %eax, %cr0 + + # Step 4: Far JMP to change execution flow and serialize the processor. + # Set CS to a 32-bit Code-Segment and jump to 32-bit code. + ljmpl $0x08, $rom32_start + +# The reset vector must go at the end of ROM, exactly 16 bytes from the end. +reset_vec: # 0x0_FFFF_FFF0 + jmp rom16_start + .space (reset_vec + 0x10) - . # Pad to the end with zeros +four_gigs: # 0x1_0000_0000 diff --git a/src/asm/rom32.s b/src/asm/rom32.s new file mode 100644 index 00000000..4a91f258 --- /dev/null +++ b/src/asm/rom32.s @@ -0,0 +1,37 @@ +.section .rom32, "ax" +.code32 + +rom32_start: + # Now that we are in 32-bit mode, setup all the Data-Segments to be 32-bit. + movw $0x10, %ax + movw %ax, %ds + movw %ax, %es + movw %ax, %ss + movw %ax, %fs + movw %ax, %gs + + # Needed for the REP instructions below + cld + +copy_rom_to_ram: + # This is equivalent to: memcpy(data_start, rom_data_start, data_size) + movl $rom_data_start, %esi + movl $data_start, %edi + movl $data_size, %ecx + rep movsb (%esi), (%edi) + +zero_bss_in_ram: + # This is equivalent to: memset(bss_start, 0, bss_size) + xorb %al, %al + movl $bss_start, %edi + movl $bss_size, %ecx + rep stosb %al, (%edi) + +jump_to_ram: + # Zero out %ebx, as we don't have a PVH StartInfo struct. + xorl %ebx, %ebx + + # Jumping all that way from ROM (~4 GiB) to RAM (~1 MiB) is too far for a + # 32-bit relative jump, so we use a 32-bit aboslute jump. + movl $ram32_start, %eax + jmp *%eax diff --git a/src/gdt.rs b/src/gdt.rs index 5f111694..5baf85b5 100644 --- a/src/gdt.rs +++ b/src/gdt.rs @@ -60,6 +60,10 @@ impl Pointer { static GDT64_PTR: Pointer = Pointer::new(&GDT64); static GDT64: [Descriptor; 2] = [Descriptor::empty(), Descriptor::CODE64]; +// Our 32-bit GDT lives in ROM, so it can be directly used by the ROM code. We +// should never reference or access this GDT when we are running in RAM. #[no_mangle] +#[link_section = ".romdata"] static GDT32_PTR: Pointer = Pointer::new(&GDT32); +#[link_section = ".romdata"] static GDT32: [Descriptor; 3] = [Descriptor::empty(), Descriptor::CODE32, Descriptor::DATA32];