diff --git a/Cargo.toml b/Cargo.toml index 8150a06c..5ce21983 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -22,6 +22,8 @@ log-serial = [] # Log panics to serial output. Disabling this (without disabling log-serial) # gets you most of the code size reduction, without losing _all_ debugging. log-panic = ["log-serial"] +# Support builing the firmware as a BIOS ROM (i.e. starting in real mode). +rom = [] [dependencies] x86_64 = "0.9" diff --git a/layout.ld b/layout.ld index 5aff83e1..744b97eb 100644 --- a/layout.ld +++ b/layout.ld @@ -4,6 +4,11 @@ PHDRS { ram PT_LOAD FILEHDR PHDRS ; note PT_NOTE ; + /* If we use PT_LOAD for the ROM, QEMU will load the our ROM code into the + BIOS region when using PVH Boot, causing a conflict. We use PT_SHLIB (a + reserved Program Header type) to prevent hypervisors from loading the + ROM code or interpreting the ROM as a PT_NOTE. */ + rom PT_SHLIB ; } /* Loaders like to put stuff in low memory (< 1M), so we don't use it. */ @@ -39,6 +44,21 @@ SECTIONS ASSERT((. <= ram_max - stack_size), "firmware size too big for RAM region") + /* Get the size of the ROM sections without any padding */ + rom_size = SIZEOF(.romdata) + SIZEOF(.rom32) + SIZEOF(.rom16) + SIZEOF(.reset); + /* If we have a ROM, we need to pad the file to be a multiple of 64K. */ + min_file_size = file_size + rom_size; + pad_size = rom_size ? ALIGN(min_file_size, 64K) - min_file_size : 0; + + /* The ROM code must be at the very end of the file, and mapped below 4G */ + . = (1 << 32) - rom_size - pad_size; + rom_data_start = . - data_size; + + .romdata : { . += pad_size; *(.romdata) } :rom + .rom32 : { *(.rom32) } + .rom16 : { *(.rom16) } + .reset : { KEEP(*(.reset)) } + /* Match edk2's GccBase.lds DISCARD section */ /DISCARD/ : { *(.note.GNU-stack) diff --git a/src/asm/gdt32.s b/src/asm/gdt32.s new file mode 100644 index 00000000..eab00faf --- /dev/null +++ b/src/asm/gdt32.s @@ -0,0 +1,56 @@ +.section .romdata, "a" + +gdt32_ptr: + .short gdt32_end - gdt32_start - 1 # GDT length is actually (length - 1) + .long gdt32_start + +gdt32_start: # First descriptor is always null + .quad 0 +code32_desc: # Base = 0, Limit = 0xF_FFFF w/ 4K Ganularity = 4G + # CS.Limit[15:00] = 0xFFFF + .short 0xffff + # CS.Base[15:00] = 0 + .short 0x0000 + # CS.Base[23:16] = 0 (bits 0-7) + .byte 0x00 + # CS.Accessed = 1 (bit 8) - Don't write to segment on first use + # CS.ReadEnable = 1 (bit 9) - Read/Execute Code-Segment + # CS.Conforming = 0 (bit 10) - Nonconforming, no lower-priv access + # CS.Executable = 1 (bit 11) - Code-Segement + # CS.S = 1 (bit 12) - Not a System-Segement + # CS.DPL = 0 (bits 13-14) - We only use this segment in Ring 0 + # CS.P = 1 (bit 15) - Segment is present + .byte 0b10011011 + # CS.Limit[19:16] = 0xF (bits 16-19) + # CS.AVL = 0 (bit 20) - Our software doesn't use this bit + # CS.L = 0 (bit 21) - This isn't a 64-bit segment + # CS.B = 1 (bit 22) - This is a 32-bit segment + # CS.G = 1 (bit 23) - 4K Granularity + .byte 0b11001111 + # CS.Base[31:24] = 0 (bits 24-31) + .byte 0x00 + +data32_desc: # Base = 0, Limit = 0xF_FFFF w/ 4K Ganularity = 4G + # DS.Limit[15:00] = 0xFFFF + .short 0xffff + # DS.Base[15:00] = 0 + .short 0x0000 + # DS.Base[23:16] = 0 (bits 0-7) + .byte 0x00 + # DS.Accessed = 1 (bit 8) - Don't write to segment on first use + # DS.WriteEnable = 1 (bit 9) - Read/Write Data-Segment + # DS.Expansion = 0 (bit 10) - Expand-up + # DS.Executable = 0 (bit 11) - Data-Segement + # DS.S = 1 (bit 12) - Not a System-Segement + # DS.DPL = 0 (bits 13-14) - We only use this segment in Ring 0 + # DS.P = 1 (bit 15) - Segment is present + .byte 0b10010011 + # DS.Limit[19:16] = 0xF (bits 16-19) + # DS.AVL = 0 (bit 20) - Our software doesn't use this bit + # DS.L = 0 (bit 21) - This isn't a 64-bit segment + # DS.B = 1 (bit 22) - This is a 32-bit segment + # DS.G = 1 (bit 23) - 4K Granularity + .byte 0b11001111 + # DS.Base[31:24] = 0 (bits 24-31) + .byte 0x00 +gdt32_end: diff --git a/src/asm/mod.rs b/src/asm/mod.rs index 95dd9360..7a7e6a92 100644 --- a/src/asm/mod.rs +++ b/src/asm/mod.rs @@ -2,3 +2,12 @@ global_asm!(include_str!("note.s")); global_asm!(include_str!("ram32.s")); global_asm!(include_str!("ram64.s")); global_asm!(include_str!("gdt64.s")); + +#[cfg(feature = "rom")] +global_asm!(include_str!("reset.s")); +#[cfg(feature = "rom")] +global_asm!(include_str!("rom16.s")); +#[cfg(feature = "rom")] +global_asm!(include_str!("rom32.s")); +#[cfg(feature = "rom")] +global_asm!(include_str!("gdt32.s")); diff --git a/src/asm/reset.s b/src/asm/reset.s new file mode 100644 index 00000000..65149b87 --- /dev/null +++ b/src/asm/reset.s @@ -0,0 +1,9 @@ +.section .reset, "ax" +.code16 + +.align 16 +reset_vec: # 0x0_FFFF_FFF0 + jmp rom16_start + +.align 16 +reset_end: # 0x1_0000_0000 diff --git a/src/asm/rom16.s b/src/asm/rom16.s new file mode 100644 index 00000000..568dcd72 --- /dev/null +++ b/src/asm/rom16.s @@ -0,0 +1,26 @@ +.section .rom16, "ax" +.code16 + +rom16_start: + # Order of instructions from Intel SDM 9.9.1 "Switching to Protected Mode" + # Step 1: Disable interrupts + cli + + # Step 2: Load the GDT + # We are currently in 16-bit real mode. To enter 32-bit protected mode, we + # need to load 32-bit code/data segments into our GDT. The gdt32 in ROM is + # at too high of an address (right below 4G) for the data segment to reach. + # + # But we can load gdt32 via the code segement. After a reset, the base of + # the CS register is 0xFFFF0000, which means we can access gdt32. + movw $(gdt32_ptr - 0xFFFF0000), %bx + lgdtl %cs:(%bx) + + # Step 3: Set CRO.PE (Protected Mode Enable) + movl %cr0, %eax + orb $0b00000001, %al # Set bit 0 + movl %eax, %cr0 + + # Step 4: Far JMP to change execution flow and serializes the processor. + # Set CS to a 32-bit segment and jump to 32-bit code. + ljmpl $(code32_desc - gdt32_start), $rom32_start diff --git a/src/asm/rom32.s b/src/asm/rom32.s new file mode 100644 index 00000000..d574b7fb --- /dev/null +++ b/src/asm/rom32.s @@ -0,0 +1,37 @@ +.section .rom32, "ax" +.code32 + +rom32_start: + # Now that we are in 32-bit mode, setup all the data segments to be 32-bit. + movw $(data32_desc - gdt32_start), %ax + movw %ax, %ds + movw %ax, %es + movw %ax, %ss + movw %ax, %fs + movw %ax, %gs + + # Needed for the REP instructions below + cld + +copy_rom_to_ram: + # This is equivalent to: memcpy(data_start, rom_data_start, data_size) + movl $rom_data_start, %esi + movl $data_start, %edi + movl $data_size, %ecx + rep movsb (%esi), (%edi) + +zero_bss_in_ram: + # This is equivalent to: memset(bss_start, 0, bss_size) + xorb %al, %al + movl $bss_start, %edi + movl $bss_size, %ecx + rep stosb %al, (%edi) + +jump_to_ram: + # Zero out %ebx, as we don't have a PVH StartInfo struct. + xorl %ebx, %ebx + + # Jumping all that way from ROM (~4 GiB) to RAM (~1 MiB) is too far for a + # 32-bit relative jump, so we use a 32-bit aboslute jump. + movl $ram32_start, %eax + jmp *%eax