From 69d777e4c00bb36f9778c59d173d402da65894cf Mon Sep 17 00:00:00 2001 From: Romain Thomas Date: Sat, 7 Dec 2024 06:01:28 +0100 Subject: [PATCH] Add support for x86/x86-64 operands --- api/python/lief/assembly/__init__.pyi | 60 ++ api/python/lief/assembly/x86/__init__.pyi | 787 ++++++++++++++++++ .../lief/assembly/x86/operands/__init__.pyi | 32 + api/python/src/asm/pyInstruction.cpp | 88 ++ api/python/src/asm/x86/CMakeLists.txt | 3 + api/python/src/asm/x86/init.cpp | 4 + .../src/asm/x86/operands/CMakeLists.txt | 6 + .../src/asm/x86/operands/pyImmediate.cpp | 27 + api/python/src/asm/x86/operands/pyMemory.cpp | 69 ++ .../src/asm/x86/operands/pyPCRelative.cpp | 29 + .../src/asm/x86/operands/pyRegister.cpp | 31 + api/python/src/asm/x86/pyInstruction.cpp | 13 + api/python/src/asm/x86/pyOperand.cpp | 39 + api/rust/autocxx_ffi.rs | 20 + .../cargo/lief/src/assembly/instruction.rs | 99 +++ api/rust/cargo/lief/src/assembly/x86.rs | 7 + .../lief/src/assembly/x86/instruction.rs | 16 + .../cargo/lief/src/assembly/x86/operands.rs | 139 ++++ .../src/assembly/x86/operands/immediate.rs | 40 + .../lief/src/assembly/x86/operands/memory.rs | 77 ++ .../src/assembly/x86/operands/pc_relative.rs | 40 + .../src/assembly/x86/operands/register.rs | 43 + .../cargo/lief/tests/disassembler_test.rs | 78 +- api/rust/include/LIEF/rust/ASM.hpp | 3 + .../include/LIEF/rust/asm/Instruction.hpp | 19 +- .../include/LIEF/rust/asm/x86/Instruction.hpp | 18 +- .../include/LIEF/rust/asm/x86/Operand.hpp | 28 + .../include/LIEF/rust/asm/x86/operands.hpp | 6 + .../LIEF/rust/asm/x86/operands/Immediate.hpp | 34 + .../LIEF/rust/asm/x86/operands/Memory.hpp | 38 + .../LIEF/rust/asm/x86/operands/PCRelative.hpp | 34 + .../LIEF/rust/asm/x86/operands/Register.hpp | 35 + .../extended/disassembler/cpp/arch/x86.rst | 25 + .../extended/disassembler/cpp/index.rst | 4 +- doc/sphinx/extended/disassembler/index.rst | 2 +- .../extended/disassembler/python/arch/x86.rst | 62 ++ .../extended/disassembler/python/index.rst | 3 +- include/LIEF/asm/Instruction.hpp | 62 ++ include/LIEF/asm/x86.hpp | 11 +- include/LIEF/asm/x86/Instruction.hpp | 6 + include/LIEF/asm/x86/Operand.hpp | 125 +++ include/LIEF/asm/x86/operands.hpp | 22 + include/LIEF/asm/x86/operands/Immediate.hpp | 49 ++ include/LIEF/asm/x86/operands/Memory.hpp | 77 ++ include/LIEF/asm/x86/operands/PCRelative.hpp | 48 ++ include/LIEF/asm/x86/operands/Register.hpp | 51 ++ src/asm/asm.cpp | 127 +++ tests/assembly/test_x86.py | 90 ++ 48 files changed, 2703 insertions(+), 23 deletions(-) create mode 100644 api/python/lief/assembly/x86/operands/__init__.pyi create mode 100644 api/python/src/asm/x86/operands/CMakeLists.txt create mode 100644 api/python/src/asm/x86/operands/pyImmediate.cpp create mode 100644 api/python/src/asm/x86/operands/pyMemory.cpp create mode 100644 api/python/src/asm/x86/operands/pyPCRelative.cpp create mode 100644 api/python/src/asm/x86/operands/pyRegister.cpp create mode 100644 api/python/src/asm/x86/pyOperand.cpp create mode 100644 api/rust/cargo/lief/src/assembly/x86/operands.rs create mode 100644 api/rust/cargo/lief/src/assembly/x86/operands/immediate.rs create mode 100644 api/rust/cargo/lief/src/assembly/x86/operands/memory.rs create mode 100644 api/rust/cargo/lief/src/assembly/x86/operands/pc_relative.rs create mode 100644 api/rust/cargo/lief/src/assembly/x86/operands/register.rs create mode 100644 api/rust/include/LIEF/rust/asm/x86/Operand.hpp create mode 100644 api/rust/include/LIEF/rust/asm/x86/operands.hpp create mode 100644 api/rust/include/LIEF/rust/asm/x86/operands/Immediate.hpp create mode 100644 api/rust/include/LIEF/rust/asm/x86/operands/Memory.hpp create mode 100644 api/rust/include/LIEF/rust/asm/x86/operands/PCRelative.hpp create mode 100644 api/rust/include/LIEF/rust/asm/x86/operands/Register.hpp create mode 100644 doc/sphinx/extended/disassembler/python/arch/x86.rst create mode 100644 include/LIEF/asm/x86/Operand.hpp create mode 100644 include/LIEF/asm/x86/operands.hpp create mode 100644 include/LIEF/asm/x86/operands/Immediate.hpp create mode 100644 include/LIEF/asm/x86/operands/Memory.hpp create mode 100644 include/LIEF/asm/x86/operands/PCRelative.hpp create mode 100644 include/LIEF/asm/x86/operands/Register.hpp diff --git a/api/python/lief/assembly/__init__.pyi b/api/python/lief/assembly/__init__.pyi index 1813fbf587..2353bdf1c7 100644 --- a/api/python/lief/assembly/__init__.pyi +++ b/api/python/lief/assembly/__init__.pyi @@ -1,3 +1,4 @@ +import enum from typing import Iterator, Optional, Union from . import ( @@ -9,12 +10,29 @@ from . import ( riscv as riscv, x86 as x86 ) +import lief class Engine: pass class Instruction: + class MemoryAccess(enum.Flag): + @staticmethod + def from_value(arg: int, /) -> Instruction.MemoryAccess: ... + + def __eq__(self, arg, /) -> bool: ... + + def __ne__(self, arg, /) -> bool: ... + + def __int__(self) -> int: ... + + NONE = 0 + + READ = 1 + + WRITE = 2 + @property def address(self) -> int: ... @@ -41,4 +59,46 @@ class Instruction: @property def is_syscall(self) -> bool: ... + @property + def is_memory_access(self) -> bool: ... + + @property + def is_move_reg(self) -> bool: ... + + @property + def is_add(self) -> bool: ... + + @property + def is_trap(self) -> bool: ... + + @property + def is_barrier(self) -> bool: ... + + @property + def is_return(self) -> bool: ... + + @property + def is_indirect_branch(self) -> bool: ... + + @property + def is_conditional_branch(self) -> bool: ... + + @property + def is_unconditional_branch(self) -> bool: ... + + @property + def is_compare(self) -> bool: ... + + @property + def is_move_immediate(self) -> bool: ... + + @property + def is_bitcast(self) -> bool: ... + + @property + def memory_access(self) -> Instruction.MemoryAccess: ... + + @property + def branch_target(self) -> Union[int, lief.lief_errors]: ... + def __str__(self) -> str: ... diff --git a/api/python/lief/assembly/x86/__init__.pyi b/api/python/lief/assembly/x86/__init__.pyi index f0efbc126c..09f175a1bb 100644 --- a/api/python/lief/assembly/x86/__init__.pyi +++ b/api/python/lief/assembly/x86/__init__.pyi @@ -1,10 +1,14 @@ import enum from typing import Iterator, Optional, Union +from . import operands as operands import lief class Instruction(lief.assembly.Instruction): + @property + def operands(self) -> Iterator[Optional[Operand]]: ... + @property def opcode(self) -> OPCODE: ... @@ -39644,3 +39648,786 @@ class OPCODE(enum.Enum): XTEST = 19816 INSTRUCTION_LIST_END = 19817 + +class Operand: + @property + def to_string(self) -> str: ... + + def __str__(self) -> str: ... + +class REG(enum.Enum): + NoRegister = 0 + + AH = 1 + + AL = 2 + + AX = 3 + + BH = 4 + + BL = 5 + + BP = 6 + + BPH = 7 + + BPL = 8 + + BX = 9 + + CH = 10 + + CL = 11 + + CS = 12 + + CX = 13 + + DF = 14 + + DH = 15 + + DI = 16 + + DIH = 17 + + DIL = 18 + + DL = 19 + + DS = 20 + + DX = 21 + + EAX = 22 + + EBP = 23 + + EBX = 24 + + ECX = 25 + + EDI = 26 + + EDX = 27 + + EFLAGS = 28 + + EIP = 29 + + EIZ = 30 + + ES = 31 + + ESI = 32 + + ESP = 33 + + FPCW = 34 + + FPSW = 35 + + FS = 36 + + FS_BASE = 37 + + GS = 38 + + GS_BASE = 39 + + HAX = 40 + + HBP = 41 + + HBX = 42 + + HCX = 43 + + HDI = 44 + + HDX = 45 + + HIP = 46 + + HSI = 47 + + HSP = 48 + + IP = 49 + + MXCSR = 50 + + RAX = 51 + + RBP = 52 + + RBX = 53 + + RCX = 54 + + RDI = 55 + + RDX = 56 + + RFLAGS = 57 + + RIP = 58 + + RIZ = 59 + + RSI = 60 + + RSP = 61 + + SI = 62 + + SIH = 63 + + SIL = 64 + + SP = 65 + + SPH = 66 + + SPL = 67 + + SS = 68 + + SSP = 69 + + CR0 = 71 + + CR1 = 72 + + CR2 = 73 + + CR3 = 74 + + CR4 = 75 + + CR5 = 76 + + CR6 = 77 + + CR7 = 78 + + CR8 = 79 + + CR9 = 80 + + CR10 = 81 + + CR11 = 82 + + CR12 = 83 + + CR13 = 84 + + CR14 = 85 + + CR15 = 86 + + DR0 = 87 + + DR1 = 88 + + DR2 = 89 + + DR3 = 90 + + DR4 = 91 + + DR5 = 92 + + DR6 = 93 + + DR7 = 94 + + DR8 = 95 + + DR9 = 96 + + DR10 = 97 + + DR11 = 98 + + DR12 = 99 + + DR13 = 100 + + DR14 = 101 + + DR15 = 102 + + FP0 = 103 + + FP1 = 104 + + FP2 = 105 + + FP3 = 106 + + FP4 = 107 + + FP5 = 108 + + FP6 = 109 + + FP7 = 110 + + MM0 = 111 + + MM1 = 112 + + MM2 = 113 + + MM3 = 114 + + MM4 = 115 + + MM5 = 116 + + MM6 = 117 + + MM7 = 118 + + R8 = 119 + + R9 = 120 + + R10 = 121 + + R11 = 122 + + R12 = 123 + + R13 = 124 + + R14 = 125 + + R15 = 126 + + ST0 = 127 + + ST1 = 128 + + ST2 = 129 + + ST3 = 130 + + ST4 = 131 + + ST5 = 132 + + ST6 = 133 + + ST7 = 134 + + XMM0 = 135 + + XMM1 = 136 + + XMM2 = 137 + + XMM3 = 138 + + XMM4 = 139 + + XMM5 = 140 + + XMM6 = 141 + + XMM7 = 142 + + XMM8 = 143 + + XMM9 = 144 + + XMM10 = 145 + + XMM11 = 146 + + XMM12 = 147 + + XMM13 = 148 + + XMM14 = 149 + + XMM15 = 150 + + R8B = 151 + + R9B = 152 + + R10B = 153 + + R11B = 154 + + R12B = 155 + + R13B = 156 + + R14B = 157 + + R15B = 158 + + R8BH = 159 + + R9BH = 160 + + R10BH = 161 + + R11BH = 162 + + R12BH = 163 + + R13BH = 164 + + R14BH = 165 + + R15BH = 166 + + R8D = 167 + + R9D = 168 + + R10D = 169 + + R11D = 170 + + R12D = 171 + + R13D = 172 + + R14D = 173 + + R15D = 174 + + R8W = 175 + + R9W = 176 + + R10W = 177 + + R11W = 178 + + R12W = 179 + + R13W = 180 + + R14W = 181 + + R15W = 182 + + R8WH = 183 + + R9WH = 184 + + R10WH = 185 + + R11WH = 186 + + R12WH = 187 + + R13WH = 188 + + R14WH = 189 + + R15WH = 190 + + YMM0 = 191 + + YMM1 = 192 + + YMM2 = 193 + + YMM3 = 194 + + YMM4 = 195 + + YMM5 = 196 + + YMM6 = 197 + + YMM7 = 198 + + YMM8 = 199 + + YMM9 = 200 + + YMM10 = 201 + + YMM11 = 202 + + YMM12 = 203 + + YMM13 = 204 + + YMM14 = 205 + + YMM15 = 206 + + K0 = 207 + + K1 = 208 + + K2 = 209 + + K3 = 210 + + K4 = 211 + + K5 = 212 + + K6 = 213 + + K7 = 214 + + XMM16 = 215 + + XMM17 = 216 + + XMM18 = 217 + + XMM19 = 218 + + XMM20 = 219 + + XMM21 = 220 + + XMM22 = 221 + + XMM23 = 222 + + XMM24 = 223 + + XMM25 = 224 + + XMM26 = 225 + + XMM27 = 226 + + XMM28 = 227 + + XMM29 = 228 + + XMM30 = 229 + + XMM31 = 230 + + YMM16 = 231 + + YMM17 = 232 + + YMM18 = 233 + + YMM19 = 234 + + YMM20 = 235 + + YMM21 = 236 + + YMM22 = 237 + + YMM23 = 238 + + YMM24 = 239 + + YMM25 = 240 + + YMM26 = 241 + + YMM27 = 242 + + YMM28 = 243 + + YMM29 = 244 + + YMM30 = 245 + + YMM31 = 246 + + ZMM0 = 247 + + ZMM1 = 248 + + ZMM2 = 249 + + ZMM3 = 250 + + ZMM4 = 251 + + ZMM5 = 252 + + ZMM6 = 253 + + ZMM7 = 254 + + ZMM8 = 255 + + ZMM9 = 256 + + ZMM10 = 257 + + ZMM11 = 258 + + ZMM12 = 259 + + ZMM13 = 260 + + ZMM14 = 261 + + ZMM15 = 262 + + ZMM16 = 263 + + ZMM17 = 264 + + ZMM18 = 265 + + ZMM19 = 266 + + ZMM20 = 267 + + ZMM21 = 268 + + ZMM22 = 269 + + ZMM23 = 270 + + ZMM24 = 271 + + ZMM25 = 272 + + ZMM26 = 273 + + ZMM27 = 274 + + ZMM28 = 275 + + ZMM29 = 276 + + ZMM30 = 277 + + ZMM31 = 278 + + K0_K1 = 279 + + K2_K3 = 280 + + K4_K5 = 281 + + K6_K7 = 282 + + TMMCFG = 283 + + TMM0 = 284 + + TMM1 = 285 + + TMM2 = 286 + + TMM3 = 287 + + TMM4 = 288 + + TMM5 = 289 + + TMM6 = 290 + + TMM7 = 291 + + R16 = 292 + + R17 = 293 + + R18 = 294 + + R19 = 295 + + R20 = 296 + + R21 = 297 + + R22 = 298 + + R23 = 299 + + R24 = 300 + + R25 = 301 + + R26 = 302 + + R27 = 303 + + R28 = 304 + + R29 = 305 + + R30 = 306 + + R31 = 307 + + R16B = 308 + + R17B = 309 + + R18B = 310 + + R19B = 311 + + R20B = 312 + + R21B = 313 + + R22B = 314 + + R23B = 315 + + R24B = 316 + + R25B = 317 + + R26B = 318 + + R27B = 319 + + R28B = 320 + + R29B = 321 + + R30B = 322 + + R31B = 323 + + R16BH = 324 + + R17BH = 325 + + R18BH = 326 + + R19BH = 327 + + R20BH = 328 + + R21BH = 329 + + R22BH = 330 + + R23BH = 331 + + R24BH = 332 + + R25BH = 333 + + R26BH = 334 + + R27BH = 335 + + R28BH = 336 + + R29BH = 337 + + R30BH = 338 + + R31BH = 339 + + R16D = 340 + + R17D = 341 + + R18D = 342 + + R19D = 343 + + R20D = 344 + + R21D = 345 + + R22D = 346 + + R23D = 347 + + R24D = 348 + + R25D = 349 + + R26D = 350 + + R27D = 351 + + R28D = 352 + + R29D = 353 + + R30D = 354 + + R31D = 355 + + R16W = 356 + + R17W = 357 + + R18W = 358 + + R19W = 359 + + R20W = 360 + + R21W = 361 + + R22W = 362 + + R23W = 363 + + R24W = 364 + + R25W = 365 + + R26W = 366 + + R27W = 367 + + R28W = 368 + + R29W = 369 + + R30W = 370 + + R31W = 371 + + R16WH = 372 + + R17WH = 373 + + R18WH = 374 + + R19WH = 375 + + R20WH = 376 + + R21WH = 377 + + R22WH = 378 + + R23WH = 379 + + R24WH = 380 + + R25WH = 381 + + R26WH = 382 + + R27WH = 383 + + R28WH = 384 + + R29WH = 385 + + R30WH = 386 + + R31WH = 387 + + NUM_TARGET_REGS = 388 diff --git a/api/python/lief/assembly/x86/operands/__init__.pyi b/api/python/lief/assembly/x86/operands/__init__.pyi new file mode 100644 index 0000000000..73e3023d81 --- /dev/null +++ b/api/python/lief/assembly/x86/operands/__init__.pyi @@ -0,0 +1,32 @@ +from typing import Iterator, Optional, Union + +import lief + + +class Immediate(lief.assembly.x86.Operand): + @property + def value(self) -> int: ... + +class Memory(lief.assembly.x86.Operand): + @property + def base(self) -> lief.assembly.x86.REG: ... + + @property + def scaled_register(self) -> lief.assembly.x86.REG: ... + + @property + def segment_register(self) -> lief.assembly.x86.REG: ... + + @property + def scale(self) -> int: ... + + @property + def displacement(self) -> int: ... + +class PCRelative(lief.assembly.x86.Operand): + @property + def value(self) -> int: ... + +class Register(lief.assembly.x86.Operand): + @property + def value(self) -> lief.assembly.x86.REG: ... diff --git a/api/python/src/asm/pyInstruction.cpp b/api/python/src/asm/pyInstruction.cpp index 567df9aa70..3960127fd2 100644 --- a/api/python/src/asm/pyInstruction.cpp +++ b/api/python/src/asm/pyInstruction.cpp @@ -1,6 +1,8 @@ #include #include #include "pyLIEF.hpp" +#include "pyErr.hpp" +#include "enums_wrapper.hpp" #include "LIEF/asm/Instruction.hpp" #include "asm/pyAssembly.hpp" @@ -18,6 +20,11 @@ void create(nb::module_& m) { )doc"_doc ); + enum_(obj, "MemoryAccess", nb::is_flag()) + .value("NONE", Instruction::MemoryAccess::NONE) + .value("READ", Instruction::MemoryAccess::READ) + .value("WRITE", Instruction::MemoryAccess::WRITE); + obj .def_prop_ro("address", &Instruction::address, R"doc(Address of the instruction)doc"_doc @@ -57,6 +64,87 @@ void create(nb::module_& m) { R"doc(True if the instruction is a syscall)doc"_doc ) + .def_prop_ro("is_memory_access", &Instruction::is_memory_access, + R"doc(True if the instruction performs a memory access)doc"_doc + ) + + .def_prop_ro("is_move_reg", &Instruction::is_move_reg, + R"doc(True if the instruction is a register to register move.)doc"_doc + ) + + .def_prop_ro("is_add", &Instruction::is_add, + R"doc(True if the instruction performs an arithmetic addition.)doc"_doc + ) + + .def_prop_ro("is_trap", &Instruction::is_trap, + R"doc( + True if the instruction is a trap. + + - On ``x86/x86-64`` this includes the ``ud1/ud2`` instructions + - On ``AArch64`` this includes the ``brk/udf`` instructions + )doc"_doc + ) + + .def_prop_ro("is_barrier", &Instruction::is_barrier, + R"doc( + True if the instruction prevents executing the instruction + that immediatly follows the current. This includes return + or unconditional branch instructions + )doc"_doc + ) + + .def_prop_ro("is_return", &Instruction::is_return, + R"doc(True if the instruction is a return)doc"_doc + ) + + .def_prop_ro("is_indirect_branch", &Instruction::is_indirect_branch, + R"doc( + True if the instruction is and indirect branch. + + This includes instructions that branch through a register (e.g. + ``jmp rax``, ``br x1``). + )doc"_doc) + + .def_prop_ro("is_conditional_branch", &Instruction::is_conditional_branch, + R"doc( + True if the instruction is **conditionally** jumping to the next + instruction **or** an instruction into some other basic block. + )doc"_doc + ) + + .def_prop_ro("is_unconditional_branch", &Instruction::is_unconditional_branch, + R"doc( + True if the instruction is jumping (**unconditionally**) to some other + basic block. + )doc"_doc + ) + + .def_prop_ro("is_compare", &Instruction::is_compare, + R"doc(True if the instruction is a comparison)doc"_doc + ) + + .def_prop_ro("is_move_immediate", &Instruction::is_move_immediate, + R"doc(True if the instruction is moving an immediate)doc"_doc + ) + + .def_prop_ro("is_bitcast", &Instruction::is_bitcast, + R"doc(True if the instruction is doing a bitcast)doc"_doc + ) + + .def_prop_ro("memory_access", &Instruction::memory_access, + R"doc(Memory access flags)doc"_doc + ) + + .def_prop_ro("branch_target", + [] (Instruction& self) { + return LIEF::py::error_or(&Instruction::branch_target, self); + }, + R"doc( + Given a :attr:`~.Instruction.is_branch` instruction, try to evaluate the + address of the destination. + )doc"_doc + ) + LIEF_DEFAULT_STR(Instruction) ; } diff --git a/api/python/src/asm/x86/CMakeLists.txt b/api/python/src/asm/x86/CMakeLists.txt index 3d0bc184a6..fa287c6f90 100644 --- a/api/python/src/asm/x86/CMakeLists.txt +++ b/api/python/src/asm/x86/CMakeLists.txt @@ -3,4 +3,7 @@ target_sources(pyLIEF PRIVATE pyInstruction.cpp pyOpcode.cpp pyRegister.cpp + pyOperand.cpp ) + +add_subdirectory(operands) diff --git a/api/python/src/asm/x86/init.cpp b/api/python/src/asm/x86/init.cpp index c0ca78dc64..5c007eba1c 100644 --- a/api/python/src/asm/x86/init.cpp +++ b/api/python/src/asm/x86/init.cpp @@ -1,7 +1,9 @@ #include "asm/x86/init.hpp" namespace LIEF::assembly::x86 { enum class OPCODE; +enum class REG; class Instruction; +class Operand; } namespace LIEF::assembly::x86::py { @@ -9,6 +11,8 @@ void init(nb::module_& m) { nb::module_ mod = m.def_submodule("x86"); create(mod); + create(mod); create(mod); + create(mod); } } diff --git a/api/python/src/asm/x86/operands/CMakeLists.txt b/api/python/src/asm/x86/operands/CMakeLists.txt new file mode 100644 index 0000000000..d7bf96508c --- /dev/null +++ b/api/python/src/asm/x86/operands/CMakeLists.txt @@ -0,0 +1,6 @@ +target_sources(pyLIEF PRIVATE + pyImmediate.cpp + pyRegister.cpp + pyMemory.cpp + pyPCRelative.cpp +) diff --git a/api/python/src/asm/x86/operands/pyImmediate.cpp b/api/python/src/asm/x86/operands/pyImmediate.cpp new file mode 100644 index 0000000000..e6844fd96a --- /dev/null +++ b/api/python/src/asm/x86/operands/pyImmediate.cpp @@ -0,0 +1,27 @@ +#include "asm/x86/init.hpp" +#include "LIEF/asm/x86/operands/Immediate.hpp" + +namespace LIEF::assembly::x86::py { +template<> +void create(nb::module_& m) { + nb::class_ obj(m, "Immediate", + R"doc( + This class represents an immediate operand (i.e. a constant) + + For instance: + + .. code-block:: text + + mov edi, 1; + | + +---> Immediate(1) + )doc"_doc + ); + + obj + .def_prop_ro("value", &operands::Immediate::value, + R"doc(The constant value wrapped by this operand)doc"_doc + ) + ; +} +} diff --git a/api/python/src/asm/x86/operands/pyMemory.cpp b/api/python/src/asm/x86/operands/pyMemory.cpp new file mode 100644 index 0000000000..7d109b0bf8 --- /dev/null +++ b/api/python/src/asm/x86/operands/pyMemory.cpp @@ -0,0 +1,69 @@ +#include "asm/x86/init.hpp" +#include "LIEF/asm/x86/operands/Memory.hpp" + +namespace LIEF::assembly::x86::py { +template<> +void create(nb::module_& m) { + nb::class_ obj(m, "Memory", + R"doc( + This class represents a memory operand. + + For instance: + + .. code-block:: text + + movq xmm3, qword ptr [rip + 823864]; + + | + | + Memory + | + +-----------+-----------+ + | | | + Base: rip Scale: 1 Displacement: 823864 + )doc"_doc + ); + + obj + .def_prop_ro("base", &x86::operands::Memory::base, + R"doc( + The base register. + + For ``lea rdx, [rip + 244634]`` it would return ``rip`` + )doc"_doc + ) + + .def_prop_ro("scaled_register", &x86::operands::Memory::scaled_register, + R"doc( + The scaled register. + + For ``mov rdi, qword ptr [r13 + 8*r14]`` it would return ``r14`` + )doc"_doc + ) + + .def_prop_ro("segment_register", &x86::operands::Memory::segment_register, + R"doc( + The segment register associated with the memory operation. + + For ``mov eax, dword ptr gs:[0]`` is would return ``gs`` + )doc"_doc + ) + + .def_prop_ro("scale", &x86::operands::Memory::scale, + R"doc( + The scale value associated with the :attr:`~.scaled_register`: + + For ``mov rdi, qword ptr [r13 + 8*r14]`` it would return ``8`` + )doc"_doc + ) + + .def_prop_ro("displacement", &x86::operands::Memory::displacement, + R"doc( + The displacement value. + + For ``call qword ptr [rip + 248779]`` it would return ``248779`` + )doc"_doc + ) + ; +} +} diff --git a/api/python/src/asm/x86/operands/pyPCRelative.cpp b/api/python/src/asm/x86/operands/pyPCRelative.cpp new file mode 100644 index 0000000000..197dfa5998 --- /dev/null +++ b/api/python/src/asm/x86/operands/pyPCRelative.cpp @@ -0,0 +1,29 @@ +#include "asm/x86/init.hpp" +#include "LIEF/asm/x86/operands/PCRelative.hpp" + +namespace LIEF::assembly::x86::py { +template<> +void create(nb::module_& m) { + nb::class_ obj(m, "PCRelative", + R"doc( + This class represents a RIP/EIP-relative operand. + + For instance: + + .. code-block:: text + + jmp 67633; + | + +----------> PCRelative(67633) + )doc"_doc + ); + + obj + .def_prop_ro("value", &x86::operands::PCRelative::value, + R"doc( + The effective value that is relative to the current ``rip/eip`` register + )doc"_doc + ) + ; +} +} diff --git a/api/python/src/asm/x86/operands/pyRegister.cpp b/api/python/src/asm/x86/operands/pyRegister.cpp new file mode 100644 index 0000000000..77be2eab86 --- /dev/null +++ b/api/python/src/asm/x86/operands/pyRegister.cpp @@ -0,0 +1,31 @@ +#include "asm/x86/init.hpp" +#include "LIEF/asm/x86/operands/Register.hpp" + +namespace LIEF::assembly::x86::py { +template<> +void create(nb::module_& m) { + nb::class_ obj(m, "Register", + R"doc( + This class represents a register operand. + + For instance: + + .. code-block:: text + + mov r15d, edi + | | + | +---------> Register(EDI) + | + +---------------> Register(R15D) + )doc"_doc + ); + + obj + .def_prop_ro("value", &x86::operands::Register::value, + R"doc( + The effective :class:`lief.assembly.x86.REG` wrapped by this operand + )doc"_doc + ) + ; +} +} diff --git a/api/python/src/asm/x86/pyInstruction.cpp b/api/python/src/asm/x86/pyInstruction.cpp index ec1e80b636..f07f0b37e9 100644 --- a/api/python/src/asm/x86/pyInstruction.cpp +++ b/api/python/src/asm/x86/pyInstruction.cpp @@ -1,7 +1,12 @@ #include "LIEF/asm/x86/Instruction.hpp" +#include "LIEF/asm/x86/Operand.hpp" #include "asm/x86/init.hpp" +#include + +#include + namespace LIEF::assembly::x86::py { template<> void create(nb::module_& m) { @@ -10,6 +15,14 @@ void create(nb::module_& m) { ); obj + .def_prop_ro("operands", [] (const x86::Instruction& self) { + auto ops = self.operands(); + return nb::make_iterator( + nb::type(), "operands_it", ops + ); + }, nb::keep_alive<0, 1>(), + R"doc(Iterator over the operands of the current instruction)doc"_doc + ) .def_prop_ro("opcode", &Instruction::opcode, R"doc(The instruction opcode as defined in LLVM)doc"_doc ) diff --git a/api/python/src/asm/x86/pyOperand.cpp b/api/python/src/asm/x86/pyOperand.cpp new file mode 100644 index 0000000000..888f78cf55 --- /dev/null +++ b/api/python/src/asm/x86/pyOperand.cpp @@ -0,0 +1,39 @@ +#include +#include + +#include "LIEF/asm/x86/Operand.hpp" + +#include "asm/x86/init.hpp" + +#include + +#include "pyLIEF.hpp" + +namespace LIEF::assembly::x86::operands { +class Immediate; +class Register; +class Memory; +class PCRelative; +} + +namespace LIEF::assembly::x86::py { +template<> +void create(nb::module_& m) { + nb::class_ obj(m, "Operand", + R"doc(This class represents an operand for an x86/x86-64 instruction)doc"_doc + ); + + obj + .def_prop_ro("to_string", &Operand::to_string, + R"doc(Pretty representation of the operand)doc"_doc + ) + LIEF_DEFAULT_STR(x86::Operand) + ; + + nb::module_ operands = m.def_submodule("operands"); + create(operands); + create(operands); + create(operands); + create(operands); +} +} diff --git a/api/rust/autocxx_ffi.rs b/api/rust/autocxx_ffi.rs index b2639dc03e..08bdb6ada8 100644 --- a/api/rust/autocxx_ffi.rs +++ b/api/rust/autocxx_ffi.rs @@ -706,6 +706,26 @@ include_cpp! { /* X86 { */ generate!("asm_x86_Instruction") block_constructors!("asm_x86_Instruction") + + generate!("asm_x86_Instruction_it_operands") + block_constructors!("asm_x86_Instruction_it_operands") + + /* Operands { */ + generate!("asm_x86_Operand") + block_constructors!("asm_x86_Operand") + + generate!("asm_x86_operands_Register") + block_constructors!("asm_x86_operands_Register") + + generate!("asm_x86_operands_Memory") + block_constructors!("asm_x86_operands_Memory") + + generate!("asm_x86_operands_Immediate") + block_constructors!("asm_x86_operands_Immediate") + + generate!("asm_x86_operands_PCRelative") + block_constructors!("asm_x86_operands_PCRelative") + /* } */ /* } X86 */ /* Mips { */ diff --git a/api/rust/cargo/lief/src/assembly/instruction.rs b/api/rust/cargo/lief/src/assembly/instruction.rs index 9e36ec968e..d3c76e8390 100644 --- a/api/rust/cargo/lief/src/assembly/instruction.rs +++ b/api/rust/cargo/lief/src/assembly/instruction.rs @@ -2,9 +2,13 @@ use lief_ffi as ffi; +use bitflags::bitflags; + use crate::to_slice; use crate::common::FromFFI; +use crate::Error; +use crate::to_conv_result; use super::aarch64; use super::x86; @@ -14,6 +18,16 @@ use super::powerpc; use super::riscv; use super::ebpf; +bitflags! { + #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] + pub struct MemoryAccess: u64 { + const NONE = 0; + const READ = 1 << 0; + const WRITE = 1 << 1; + } +} + + /// This trait is shared by all [`Instructions`] supported by LIEF pub trait Instruction { #[doc(hidden)] @@ -68,6 +82,91 @@ pub trait Instruction { fn is_syscall(&self) -> bool { self.as_generic().is_syscall() } + + /// True if the instruction performs a memory access + fn is_memory_access(&self) -> bool { + self.as_generic().is_memory_access() + } + + /// True if the instruction is a register to register move. + fn is_move_reg(&self) -> bool { + self.as_generic().is_move_reg() + } + + /// True if the instruction performs an arithmetic addition. + fn is_add(&self) -> bool { + self.as_generic().is_add() + } + + /// True if the instruction is a trap. + /// + /// - On `x86/x86-64` this includes the `ud1/ud2` instructions + /// - On `AArch64` this includes the `brk/udf` instructions + fn is_trap(&self) -> bool { + self.as_generic().is_trap() + } + + /// True if the instruction prevents executing the instruction + /// that immediatly follows the current. This includes return + /// or unconditional branch instructions + fn is_barrier(&self) -> bool { + self.as_generic().is_barrier() + } + + /// True if the instruction is a return + fn is_return(&self) -> bool { + self.as_generic().is_return() + } + + /// True if the instruction is and indirect branch. + /// + /// This includes instructions that branch through a register (e.g. `jmp rax`, + /// `br x1`). + fn is_indirect_branch(&self) -> bool { + self.as_generic().is_indirect_branch() + } + + /// True if the instruction is **conditionally** jumping to the next + /// instruction **or** an instruction into some other basic block. + fn is_conditional_branch(&self) -> bool { + self.as_generic().is_conditional_branch() + } + + /// True if the instruction is jumping (**unconditionally**) to some other + /// basic block. + fn is_unconditional_branch(&self) -> bool { + self.as_generic().is_unconditional_branch() + } + + /// True if the instruction is a comparison + fn is_compare(&self) -> bool { + self.as_generic().is_compare() + } + + /// True if the instruction is moving an immediate + fn is_move_immediate(&self) -> bool { + self.as_generic().is_move_immediate() + } + + /// True if the instruction is doing a bitcast + fn is_bitcast(&self) -> bool { + self.as_generic().is_bitcast() + } + + /// Memory access flags + fn memory_access(&self) -> MemoryAccess { + MemoryAccess::from_bits_truncate(self.as_generic().memory_access()) + } + + /// Given a [`Instruction::is_branch`] instruction, try to evaluate the address of the + /// destination. + fn branch_target(&self) -> Result { + to_conv_result!( + ffi::asm_Instruction::branch_target, + self.as_generic(), + |value| value + ); + } } /// All instruction variants supported by LIEF diff --git a/api/rust/cargo/lief/src/assembly/x86.rs b/api/rust/cargo/lief/src/assembly/x86.rs index 4d5b2739f1..650419beae 100644 --- a/api/rust/cargo/lief/src/assembly/x86.rs +++ b/api/rust/cargo/lief/src/assembly/x86.rs @@ -3,9 +3,16 @@ pub mod opcodes; pub mod instruction; pub mod registers; +pub mod operands; #[doc(inline)] pub use opcodes::Opcode; #[doc(inline)] pub use instruction::Instruction; + +#[doc(inline)] +pub use operands::Operands; + +#[doc(inline)] +pub use operands::Operand; diff --git a/api/rust/cargo/lief/src/assembly/x86/instruction.rs b/api/rust/cargo/lief/src/assembly/x86/instruction.rs index 8b8a0615d1..0334169256 100644 --- a/api/rust/cargo/lief/src/assembly/x86/instruction.rs +++ b/api/rust/cargo/lief/src/assembly/x86/instruction.rs @@ -4,6 +4,9 @@ use crate::common::FromFFI; use crate::assembly; use super::Opcode; +use crate::declare_fwd_iterator; +use crate::assembly::x86; + /// This structure represents a x86/x86-64 instruction pub struct Instruction { ptr: cxx::UniquePtr, @@ -29,4 +32,17 @@ impl Instruction { pub fn opcode(&self) -> Opcode { Opcode::from(self.ptr.opcode()) } + + /// Return an iterator over the [`x86::Operands`] operands + pub fn operands(&self) -> Operands { + Operands::new(self.ptr.operands()) + } } + +declare_fwd_iterator!( + Operands, + x86::Operands, + ffi::asm_Instruction, + ffi::asm_x86_Operand, + ffi::asm_x86_Instruction_it_operands +); diff --git a/api/rust/cargo/lief/src/assembly/x86/operands.rs b/api/rust/cargo/lief/src/assembly/x86/operands.rs new file mode 100644 index 0000000000..fc1b6af5f9 --- /dev/null +++ b/api/rust/cargo/lief/src/assembly/x86/operands.rs @@ -0,0 +1,139 @@ +use lief_ffi as ffi; + +use crate::common::FromFFI; + +pub mod immediate; +pub mod memory; +pub mod pc_relative; +pub mod register; + +#[doc(inline)] +pub use register::Register; + +#[doc(inline)] +pub use pc_relative::PCRelative; + +#[doc(inline)] +pub use immediate::Immediate; + +#[doc(inline)] +pub use memory::Memory; + +/// Trait shared by **all** [`Operands`] +pub trait Operand { + #[doc(hidden)] + fn as_generic(&self) -> &ffi::asm_x86_Operand; + + /// Pretty representation of the operand + fn to_string(&self) -> String { + self.as_generic().to_string().to_string() + } +} + +/// This enum represents the different kind of operands associated with [`super::Instruction`] +pub enum Operands { + /// A register operand (e.g. `RIP`) + Reg(Register), + + /// A RIP/EIP-relative operand + PCRelative(PCRelative), + + /// An immediate value + Imm(Immediate), + + /// A memory operand + Mem(Memory), + + /// Operand that is not correctly supported + Unknown(Unknown), +} + +impl FromFFI for Operands { + fn from_ffi(ptr: cxx::UniquePtr) -> Self { + unsafe { + let op_ref = ptr.as_ref().unwrap(); + if ffi::asm_x86_operands_Memory::classof(op_ref) { + let raw = { + type From = cxx::UniquePtr; + type To = cxx::UniquePtr; + std::mem::transmute::(ptr) + }; + return Operands::Mem(Memory::from_ffi(raw)); + } + else if ffi::asm_x86_operands_Register::classof(op_ref) { + let raw = { + type From = cxx::UniquePtr; + type To = cxx::UniquePtr; + std::mem::transmute::(ptr) + }; + return Operands::Reg(Register::from_ffi(raw)); + } + else if ffi::asm_x86_operands_Immediate::classof(op_ref) { + let raw = { + type From = cxx::UniquePtr; + type To = cxx::UniquePtr; + std::mem::transmute::(ptr) + }; + return Operands::Imm(Immediate::from_ffi(raw)); + } + else if ffi::asm_x86_operands_PCRelative::classof(op_ref) { + let raw = { + type From = cxx::UniquePtr; + type To = cxx::UniquePtr; + std::mem::transmute::(ptr) + }; + return Operands::PCRelative(PCRelative::from_ffi(raw)); + } + return Operands::Unknown(Unknown::from_ffi(ptr)); + } + } +} + +impl Operand for Operands { + #[doc(hidden)] + fn as_generic(&self) -> &ffi::asm_x86_Operand { + match &self { + Operands::Reg(op) => { + op.as_generic() + } + + Operands::Imm(op) => { + op.as_generic() + } + + Operands::Mem(op) => { + op.as_generic() + } + + Operands::PCRelative(op) => { + op.as_generic() + } + + Operands::Unknown(op) => { + op.as_generic() + } + } + } +} + +pub struct Unknown { + ptr: cxx::UniquePtr, +} + +impl FromFFI for Unknown { + fn from_ffi(ptr: cxx::UniquePtr) -> Self { + Self { + ptr, + } + } +} + +impl Operand for Unknown { + #[doc(hidden)] + fn as_generic(&self) -> &ffi::asm_x86_Operand { + self.ptr.as_ref().unwrap() + } +} + + + diff --git a/api/rust/cargo/lief/src/assembly/x86/operands/immediate.rs b/api/rust/cargo/lief/src/assembly/x86/operands/immediate.rs new file mode 100644 index 0000000000..1a518f32e5 --- /dev/null +++ b/api/rust/cargo/lief/src/assembly/x86/operands/immediate.rs @@ -0,0 +1,40 @@ +use lief_ffi as ffi; + +use crate::common::FromFFI; + +use super::Operand; + +/// This structure represents an immediate operand. +/// +/// For instance: +/// +/// ```text +/// mov edi, 1; +/// | +/// +---> Immediate(1) +/// ``` +pub struct Immediate { + ptr: cxx::UniquePtr, +} + +impl FromFFI for Immediate { + fn from_ffi(ptr: cxx::UniquePtr) -> Self { + Self { + ptr, + } + } +} + +impl Operand for Immediate { + #[doc(hidden)] + fn as_generic(&self) -> &ffi::asm_x86_Operand { + self.ptr.as_ref().unwrap().as_ref() + } +} + +impl Immediate { + /// The constant value wrapped by this operand + pub fn value(&self) -> i64 { + self.ptr.value() + } +} diff --git a/api/rust/cargo/lief/src/assembly/x86/operands/memory.rs b/api/rust/cargo/lief/src/assembly/x86/operands/memory.rs new file mode 100644 index 0000000000..8a27492125 --- /dev/null +++ b/api/rust/cargo/lief/src/assembly/x86/operands/memory.rs @@ -0,0 +1,77 @@ +use lief_ffi as ffi; + +use crate::{assembly::x86::registers::Reg, common::FromFFI}; + +use super::Operand; + +/// This structure represents a memory operand. +/// +/// For instance: +/// +/// ```text +/// movq xmm3, qword ptr [rip + 823864]; +/// +/// | +/// | +/// Memory +/// | +/// +-----------+-----------+ +/// | | | +/// Base: rip Scale: 1 Displacement: 823864 +/// +/// ``` +pub struct Memory { + ptr: cxx::UniquePtr, +} + +impl FromFFI for Memory { + fn from_ffi(ptr: cxx::UniquePtr) -> Self { + Self { + ptr, + } + } +} + +impl Operand for Memory { + #[doc(hidden)] + fn as_generic(&self) -> &ffi::asm_x86_Operand { + self.ptr.as_ref().unwrap().as_ref() + } +} + +impl Memory { + /// The base register. + /// + /// For `lea rdx, [rip + 244634]` it would return [`Reg::RIP`] + pub fn base(&self) -> Reg { + Reg::from(self.ptr.base()) + } + + /// The scaled register. + /// + /// For `mov rdi, qword ptr [r13 + 8*r14]` it would return [`Reg::R14`] + pub fn scaled_register(&self) -> Reg { + Reg::from(self.ptr.scaled_register()) + } + + /// The segment register associated with the memory operation. + /// + /// For `mov eax, dword ptr gs:[0]` is would return [`Reg::GS`] + pub fn segment_register(&self) -> Reg { + Reg::from(self.ptr.segment_register()) + } + + /// The scale value associated with the [`Memory::scaled_register`] + /// + /// For `mov rdi, qword ptr [r13 + 8*r14]` it would return `8` + pub fn scale(&self) -> u64 { + self.ptr.scale() + } + + /// The displacement value + /// + /// For `call qword ptr [rip + 248779]` it would return `248779` + pub fn displacement(&self) -> i64 { + self.ptr.displacement() + } +} diff --git a/api/rust/cargo/lief/src/assembly/x86/operands/pc_relative.rs b/api/rust/cargo/lief/src/assembly/x86/operands/pc_relative.rs new file mode 100644 index 0000000000..d7e12ae485 --- /dev/null +++ b/api/rust/cargo/lief/src/assembly/x86/operands/pc_relative.rs @@ -0,0 +1,40 @@ +use lief_ffi as ffi; + +use crate::common::FromFFI; + +use super::Operand; + +/// This structure represents a RIP/EIP-relative operand. +/// +/// For instance: +/// +/// ```text +/// jmp 67633; +/// | +/// +----------> PCRelative(67633) +/// ``` +pub struct PCRelative { + ptr: cxx::UniquePtr, +} + +impl FromFFI for PCRelative { + fn from_ffi(ptr: cxx::UniquePtr) -> Self { + Self { + ptr, + } + } +} + +impl Operand for PCRelative { + #[doc(hidden)] + fn as_generic(&self) -> &ffi::asm_x86_Operand { + self.ptr.as_ref().unwrap().as_ref() + } +} + +impl PCRelative { + /// The effective value that is relative to the current `rip/eip` register + pub fn value(&self) -> i64 { + self.ptr.value() + } +} diff --git a/api/rust/cargo/lief/src/assembly/x86/operands/register.rs b/api/rust/cargo/lief/src/assembly/x86/operands/register.rs new file mode 100644 index 0000000000..84fc9cf350 --- /dev/null +++ b/api/rust/cargo/lief/src/assembly/x86/operands/register.rs @@ -0,0 +1,43 @@ +use lief_ffi as ffi; + +use crate::assembly::x86::registers::Reg; +use crate::common::FromFFI; + +use super::Operand; + +/// This structure represents a register operand. +/// +/// For instance: +/// +/// ```text +/// mov r15d, edi +/// | | +/// | +---------> Register(EDI) +/// | +/// +---------------> Register(R15D) +/// ``` +pub struct Register { + ptr: cxx::UniquePtr, +} + +impl FromFFI for Register { + fn from_ffi(ptr: cxx::UniquePtr) -> Self { + Self { + ptr, + } + } +} + +impl Operand for Register { + #[doc(hidden)] + fn as_generic(&self) -> &ffi::asm_x86_Operand { + self.ptr.as_ref().unwrap().as_ref() + } +} + +impl Register { + /// The effective register wrapped by this operand + pub fn value(&self) -> Reg { + Reg::from(self.ptr.value()) + } +} diff --git a/api/rust/cargo/lief/tests/disassembler_test.rs b/api/rust/cargo/lief/tests/disassembler_test.rs index 560a811b69..9dd883955a 100644 --- a/api/rust/cargo/lief/tests/disassembler_test.rs +++ b/api/rust/cargo/lief/tests/disassembler_test.rs @@ -1,5 +1,7 @@ mod utils; use lief; +use lief::assembly::x86; +use lief::assembly::x86::Operand; use lief::assembly::{Instruction, Instructions}; use lief::dwarf::types::{Base, ClassLike, DwarfType}; use lief::dwarf::{Parameter, Scope, Type}; @@ -8,8 +10,36 @@ use lief::generic::{Binary, Section}; use std::path::{Path, PathBuf}; fn process_instruction(inst: &lief::assembly::Instructions) { - format!("{} {} {} {}", inst.address(), inst.size(), inst.raw().len(), inst.mnemonic()); + //println!("{} {:#02x?}", inst.to_string(), inst.raw()); + println!("{}", inst.to_string()); + format!( + "{} {} {} {}", + inst.address(), + inst.size(), + inst.raw().len(), + inst.mnemonic() + ); format!("{}", inst.to_string()); + format!( + "{:?} {:?} {:?} {:?} {:?} {:?} {:?} {:?} {:?} {:?} {:?} {:?} {:?} {:?} {:?} {:?} {:?}", + inst.is_call(), + inst.is_syscall(), + inst.is_terminator(), + inst.is_branch(), + inst.is_memory_access(), + inst.is_move_reg(), + inst.is_add(), + inst.is_trap(), + inst.is_barrier(), + inst.is_return(), + inst.is_indirect_branch(), + inst.is_conditional_branch(), + inst.is_compare(), + inst.is_move_immediate(), + inst.is_bitcast(), + inst.memory_access(), + inst.branch_target().unwrap_or(0) + ); match inst { Instructions::AArch64(variant) => { format!("{:?}", variant.opcode()); @@ -18,7 +48,36 @@ fn process_instruction(inst: &lief::assembly::Instructions) { format!("{:?}", variant.opcode()); } Instructions::X86(variant) => { + //println!("{}", inst.to_string()); format!("{:?}", variant.opcode()); + for op in variant.operands() { + format!("{}", op.to_string()); + match op { + x86::Operands::Reg(reg) => { + format!("{:?}", reg.value()); + } + + x86::Operands::Imm(imm) => { + format!("{}", imm.value()); + } + + x86::Operands::PCRelative(pcr) => { + format!("{}", pcr.value()); + } + + x86::Operands::Mem(mem) => { + format!( + "{:?}{:?}{:?}{}{}", + mem.base(), + mem.scaled_register(), + mem.segment_register(), + mem.scale(), + mem.displacement() + ); + } + x86::Operands::Unknown(_) => {} + } + } } Instructions::Mips(variant) => { format!("{:?}", variant.opcode()); @@ -32,8 +91,7 @@ fn process_instruction(inst: &lief::assembly::Instructions) { Instructions::RiscV(variant) => { format!("{:?}", variant.opcode()); } - Instructions::Generic(_) => { - } + Instructions::Generic(_) => {} } } @@ -53,13 +111,13 @@ fn disa_from_address(name: &str, address: u64) { for inst in elf.disassemble_address(address) { process_instruction(&inst); } - }, + } lief::Binary::PE(pe) => { for inst in pe.disassemble_address(address) { process_instruction(&inst); } - }, + } lief::Binary::MachO(fat) => { for macho in fat.iter() { @@ -67,11 +125,10 @@ fn disa_from_address(name: &str, address: u64) { process_instruction(&inst); } } - }, + } } } - fn disa_from_symbol(name: &str, symbol: &str) { let bin = get_binary(name); @@ -80,13 +137,13 @@ fn disa_from_symbol(name: &str, symbol: &str) { for inst in elf.disassemble_symbol(symbol) { process_instruction(&inst); } - }, + } lief::Binary::PE(pe) => { for inst in pe.disassemble_symbol(symbol) { process_instruction(&inst); } - }, + } lief::Binary::MachO(fat) => { for macho in fat.iter() { @@ -94,11 +151,10 @@ fn disa_from_symbol(name: &str, symbol: &str) { process_instruction(&inst); } } - }, + } } } - #[test] fn test_from_slice() { if !lief::is_extended() { diff --git a/api/rust/include/LIEF/rust/ASM.hpp b/api/rust/include/LIEF/rust/ASM.hpp index 1cc90b4da5..5b0acfdb84 100644 --- a/api/rust/include/LIEF/rust/ASM.hpp +++ b/api/rust/include/LIEF/rust/ASM.hpp @@ -18,6 +18,9 @@ #include "LIEF/rust/asm/aarch64/Instruction.hpp" #include "LIEF/rust/asm/x86/Instruction.hpp" +#include "LIEF/rust/asm/x86/operands.hpp" +#include "LIEF/rust/asm/x86/operands/Register.hpp" + #include "LIEF/rust/asm/arm/Instruction.hpp" #include "LIEF/rust/asm/powerpc/Instruction.hpp" #include "LIEF/rust/asm/mips/Instruction.hpp" diff --git a/api/rust/include/LIEF/rust/asm/Instruction.hpp b/api/rust/include/LIEF/rust/asm/Instruction.hpp index 5448061899..ba5a986166 100644 --- a/api/rust/include/LIEF/rust/asm/Instruction.hpp +++ b/api/rust/include/LIEF/rust/asm/Instruction.hpp @@ -15,6 +15,8 @@ #pragma once #include "LIEF/asm/Instruction.hpp" #include "LIEF/rust/Span.hpp" +#include "LIEF/rust/helpers.hpp" +#include "LIEF/rust/error.hpp" #include "LIEF/rust/Mirror.hpp" @@ -41,5 +43,20 @@ class asm_Instruction : public Mirror { auto is_branch() const { return get().is_branch(); } auto is_syscall() const { return get().is_syscall(); } - + auto is_memory_access() const { return get().is_memory_access(); } + auto is_move_reg() const { return get().is_move_reg(); } + auto is_add() const { return get().is_add(); } + auto is_trap() const { return get().is_trap(); } + auto is_barrier() const { return get().is_barrier(); } + auto is_return() const { return get().is_return(); } + auto is_indirect_branch() const { return get().is_indirect_branch(); } + auto is_conditional_branch() const { return get().is_conditional_branch(); } + auto is_unconditional_branch() const { return get().is_unconditional_branch(); } + auto is_compare() const { return get().is_compare(); } + auto is_move_immediate() const { return get().is_move_immediate(); } + auto is_bitcast() const { return get().is_bitcast(); } + uint64_t memory_access() const { return to_int(get().memory_access()); } + uint64_t branch_target(uint32_t& err) const { + return details::make_error(get().branch_target(), err); + } }; diff --git a/api/rust/include/LIEF/rust/asm/x86/Instruction.hpp b/api/rust/include/LIEF/rust/asm/x86/Instruction.hpp index f1a274238d..c6b813f756 100644 --- a/api/rust/include/LIEF/rust/asm/x86/Instruction.hpp +++ b/api/rust/include/LIEF/rust/asm/x86/Instruction.hpp @@ -16,14 +16,28 @@ #include #include "LIEF/rust/asm/Instruction.hpp" +#include "LIEF/rust/asm/x86/Operand.hpp" #include "LIEF/rust/helpers.hpp" +#include "LIEF/rust/Iterator.hpp" class asm_x86_Instruction : public asm_Instruction { public: using lief_t = LIEF::assembly::x86::Instruction; - uint64_t opcode() const { - return to_int(impl().opcode()); + class it_operands : + public ForwardIterator + { + public: + it_operands(const asm_x86_Instruction::lief_t& src) + : ForwardIterator(src.operands()) { } + + auto next() { return ForwardIterator::next(); } + }; + + uint64_t opcode() const { return to_int(impl().opcode()); } + + auto operands() const { + return std::make_unique(impl()); } static bool classof(const asm_Instruction& inst) { diff --git a/api/rust/include/LIEF/rust/asm/x86/Operand.hpp b/api/rust/include/LIEF/rust/asm/x86/Operand.hpp new file mode 100644 index 0000000000..d027854d65 --- /dev/null +++ b/api/rust/include/LIEF/rust/asm/x86/Operand.hpp @@ -0,0 +1,28 @@ +/* Copyright 2022 - 2024 R. Thomas + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once +#include "LIEF/asm/x86/Operand.hpp" + +#include "LIEF/rust/Mirror.hpp" + +class asm_x86_Operand : public Mirror { + public: + using lief_t = LIEF::assembly::x86::Operand; + using Mirror::Mirror; + + std::string to_string() const { + return get().to_string(); + } +}; diff --git a/api/rust/include/LIEF/rust/asm/x86/operands.hpp b/api/rust/include/LIEF/rust/asm/x86/operands.hpp new file mode 100644 index 0000000000..c500ea7b04 --- /dev/null +++ b/api/rust/include/LIEF/rust/asm/x86/operands.hpp @@ -0,0 +1,6 @@ +#pragma once +#include "LIEF/rust/asm/x86/Operand.hpp" +#include "LIEF/rust/asm/x86/operands/Register.hpp" +#include "LIEF/rust/asm/x86/operands/Immediate.hpp" +#include "LIEF/rust/asm/x86/operands/PCRelative.hpp" +#include "LIEF/rust/asm/x86/operands/Memory.hpp" diff --git a/api/rust/include/LIEF/rust/asm/x86/operands/Immediate.hpp b/api/rust/include/LIEF/rust/asm/x86/operands/Immediate.hpp new file mode 100644 index 0000000000..5d749c7fe7 --- /dev/null +++ b/api/rust/include/LIEF/rust/asm/x86/operands/Immediate.hpp @@ -0,0 +1,34 @@ +/* Copyright 2024 R. Thomas + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once +#include + +#include "LIEF/rust/asm/x86/Operand.hpp" + +class asm_x86_operands_Immediate : public asm_x86_Operand { + public: + using lief_t = LIEF::assembly::x86::operands::Immediate; + + auto value() const { + return impl().value(); + } + + static bool classof(const asm_x86_Operand& inst) { + return lief_t::classof(&inst.get()); + } + + private: + const lief_t& impl() const { return as(this); } +}; diff --git a/api/rust/include/LIEF/rust/asm/x86/operands/Memory.hpp b/api/rust/include/LIEF/rust/asm/x86/operands/Memory.hpp new file mode 100644 index 0000000000..0567865bf4 --- /dev/null +++ b/api/rust/include/LIEF/rust/asm/x86/operands/Memory.hpp @@ -0,0 +1,38 @@ +/* Copyright 2024 R. Thomas + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once +#include + +#include "LIEF/rust/asm/x86/Operand.hpp" +#include "LIEF/rust/helpers.hpp" + +class asm_x86_operands_Memory : public asm_x86_Operand { + public: + using lief_t = LIEF::assembly::x86::operands::Memory; + + uint64_t base() const { return to_int(impl().base()); } + uint64_t scaled_register() const { return to_int(impl().scaled_register()); } + uint64_t segment_register() const { return to_int(impl().segment_register()); } + + auto scale() const { return impl().scale(); } + auto displacement() const { return impl().displacement(); } + + static bool classof(const asm_x86_Operand& inst) { + return lief_t::classof(&inst.get()); + } + + private: + const lief_t& impl() const { return as(this); } +}; diff --git a/api/rust/include/LIEF/rust/asm/x86/operands/PCRelative.hpp b/api/rust/include/LIEF/rust/asm/x86/operands/PCRelative.hpp new file mode 100644 index 0000000000..887f64dae0 --- /dev/null +++ b/api/rust/include/LIEF/rust/asm/x86/operands/PCRelative.hpp @@ -0,0 +1,34 @@ +/* Copyright 2024 R. Thomas + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once +#include + +#include "LIEF/rust/asm/x86/Operand.hpp" + +class asm_x86_operands_PCRelative : public asm_x86_Operand { + public: + using lief_t = LIEF::assembly::x86::operands::PCRelative; + + auto value() const { + return impl().value(); + } + + static bool classof(const asm_x86_Operand& inst) { + return lief_t::classof(&inst.get()); + } + + private: + const lief_t& impl() const { return as(this); } +}; diff --git a/api/rust/include/LIEF/rust/asm/x86/operands/Register.hpp b/api/rust/include/LIEF/rust/asm/x86/operands/Register.hpp new file mode 100644 index 0000000000..1c2ffe20de --- /dev/null +++ b/api/rust/include/LIEF/rust/asm/x86/operands/Register.hpp @@ -0,0 +1,35 @@ +/* Copyright 2024 R. Thomas + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once +#include + +#include "LIEF/rust/asm/x86/Operand.hpp" +#include "LIEF/rust/helpers.hpp" + +class asm_x86_operands_Register : public asm_x86_Operand { + public: + using lief_t = LIEF::assembly::x86::operands::Register; + + uint64_t value() const { + return to_int(impl().value()); + } + + static bool classof(const asm_x86_Operand& inst) { + return lief_t::classof(&inst.get()); + } + + private: + const lief_t& impl() const { return as(this); } +}; diff --git a/doc/sphinx/extended/disassembler/cpp/arch/x86.rst b/doc/sphinx/extended/disassembler/cpp/arch/x86.rst index e12ea391ad..5ab97fd5e2 100644 --- a/doc/sphinx/extended/disassembler/cpp/arch/x86.rst +++ b/doc/sphinx/extended/disassembler/cpp/arch/x86.rst @@ -10,3 +10,28 @@ Opcodes ******* See ``LIEF::assembly::x86::OPCODE`` in ``include/asm/x86/opcodes.hpp`` + +Operands +******** + +.. doxygenclass:: LIEF::assembly::x86::Operand + +Immediate +~~~~~~~~~ + +.. doxygenclass:: LIEF::assembly::x86::operands::Immediate + +Register +~~~~~~~~ + +.. doxygenclass:: LIEF::assembly::x86::operands::Register + +Memory +~~~~~~ + +.. doxygenclass:: LIEF::assembly::x86::operands::Memory + +PCRelative +~~~~~~~~~~ + +.. doxygenclass:: LIEF::assembly::x86::operands::PCRelative diff --git a/doc/sphinx/extended/disassembler/cpp/index.rst b/doc/sphinx/extended/disassembler/cpp/index.rst index 3801ae2b88..2feef63aae 100644 --- a/doc/sphinx/extended/disassembler/cpp/index.rst +++ b/doc/sphinx/extended/disassembler/cpp/index.rst @@ -9,10 +9,10 @@ Architectures :maxdepth: 1 arch/x86 - arch/arm arch/aarch64 - arch/ebpf arch/riscv + arch/arm + arch/ebpf arch/mips arch/powerpc diff --git a/doc/sphinx/extended/disassembler/index.rst b/doc/sphinx/extended/disassembler/index.rst index 9327e247ac..d672edb482 100644 --- a/doc/sphinx/extended/disassembler/index.rst +++ b/doc/sphinx/extended/disassembler/index.rst @@ -188,7 +188,7 @@ A disassembling API is also provided for the |lief-dsc-dyldsharedcache| object: dyld_cache: lief.dsc.DylibSharedCache = lief.dsc.load("macos-15.0.1/") - for inst in dyld_cache: + for inst in dyld_cache.disassemble(0x1886f4a44): print(inst) .. tab:: :fa:`regular fa-file-code` C++ diff --git a/doc/sphinx/extended/disassembler/python/arch/x86.rst b/doc/sphinx/extended/disassembler/python/arch/x86.rst new file mode 100644 index 0000000000..adb68883d4 --- /dev/null +++ b/doc/sphinx/extended/disassembler/python/arch/x86.rst @@ -0,0 +1,62 @@ +:fa:`solid fa-microchip` x86/x86-64 +----------------------------------- + +Instruction +************ + +.. lief-inheritance:: lief._lief.assembly.x86.Instruction + :top-classes: lief._lief.assembly.Instruction + :parts: 2 + +.. autoclass:: lief.assembly.x86.Instruction + +Opcodes +******* + +See: ``lief.assembly.x86.OPCODE`` + +Operands +******** + + +.. lief-inheritance:: lief._lief.assembly.x86.Operand + :top-classes: lief._lief.assembly.x86.Operand + :parts: 2 + +.. autoclass:: lief.assembly.x86.Operand + +Immediate +~~~~~~~~~ + +.. lief-inheritance:: lief._lief.assembly.x86.operands.Immediate + :top-classes: lief._lief.assembly.x86.Operand + :parts: 2 + +.. autoclass:: lief.assembly.x86.operands.Immediate + +Register +~~~~~~~~ + +.. lief-inheritance:: lief._lief.assembly.x86.operands.Register + :top-classes: lief._lief.assembly.x86.Operand + :parts: 2 + +.. autoclass:: lief.assembly.x86.operands.Register + +Memory +~~~~~~ + +.. lief-inheritance:: lief._lief.assembly.x86.operands.Memory + :top-classes: lief._lief.assembly.x86.Operand + :parts: 2 + +.. autoclass:: lief.assembly.x86.operands.Memory + +PCRelative +~~~~~~~~~~ + +.. lief-inheritance:: lief._lief.assembly.x86.operands.PCRelative + :top-classes: lief._lief.assembly.x86.Operand + :parts: 2 + +.. autoclass:: lief.assembly.x86.operands.PCRelative diff --git a/doc/sphinx/extended/disassembler/python/index.rst b/doc/sphinx/extended/disassembler/python/index.rst index eeaf6c5b04..2848d90259 100644 --- a/doc/sphinx/extended/disassembler/python/index.rst +++ b/doc/sphinx/extended/disassembler/python/index.rst @@ -8,8 +8,9 @@ Architectures :caption:  Architectures :maxdepth: 1 - arch/arm + arch/x86 arch/aarch64 + arch/arm arch/ebpf arch/riscv arch/mips diff --git a/include/LIEF/asm/Instruction.hpp b/include/LIEF/asm/Instruction.hpp index 1deca60211..14dbcb2ad2 100644 --- a/include/LIEF/asm/Instruction.hpp +++ b/include/LIEF/asm/Instruction.hpp @@ -16,6 +16,7 @@ #define LIEF_ASM_INST_H #include "LIEF/visibility.h" #include "LIEF/iterators.hpp" +#include "LIEF/errors.hpp" #include #include @@ -67,6 +68,14 @@ class LIEF_API Instruction { std::unique_ptr impl_; }; public: + /// Memory operation flags + enum class MemoryAccess : uint8_t { + NONE = 0, + READ = 1 << 0, + WRITE = 1 << 1, + READ_WRITE = READ | WRITE, + }; + /// Address of the instruction uint64_t address() const; @@ -94,6 +103,59 @@ class LIEF_API Instruction { /// True if the instruction is a syscall bool is_syscall() const; + /// True if the instruction performs a memory access + bool is_memory_access() const; + + /// True if the instruction is a register to register move. + bool is_move_reg() const; + + /// True if the instruction performs an arithmetic addition. + bool is_add() const; + + /// True if the instruction is a trap. + /// + /// - On `x86/x86-64` this includes the `ud1/ud2` instructions + /// - On `AArch64` this includes the `brk/udf` instructions + bool is_trap() const; + + /// True if the instruction prevents executing the instruction + /// that immediatly follows the current. This includes return + /// or unconditional branch instructions + bool is_barrier() const; + + /// True if the instruction is a return + bool is_return() const; + + /// True if the instruction is and indirect branch. + /// + /// This includes instructions that branch through a register (e.g. `jmp rax`, + /// `br x1`). + bool is_indirect_branch() const; + + /// True if the instruction is **conditionally** jumping to the next + /// instruction **or** an instruction into some other basic block. + bool is_conditional_branch() const; + + /// True if the instruction is jumping (**unconditionally**) to some other + /// basic block. + bool is_unconditional_branch() const; + + /// True if the instruction is a comparison + bool is_compare() const; + + /// True if the instruction is moving an immediate + bool is_move_immediate() const; + + /// True if the instruction is doing a bitcast + bool is_bitcast() const; + + /// Memory access flags + MemoryAccess memory_access() const; + + /// Given a is_branch() instruction, try to evaluate the address of the + /// destination. + result branch_target() const; + /// This function can be used to **down cast** an Instruction instance: /// /// ```cpp diff --git a/include/LIEF/asm/x86.hpp b/include/LIEF/asm/x86.hpp index e2e3d7a867..c2b2afc960 100644 --- a/include/LIEF/asm/x86.hpp +++ b/include/LIEF/asm/x86.hpp @@ -13,9 +13,10 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -#ifndef LIEF_ASM_AARCH64_H -#define LIEF_ASM_AARCH64_H -#include -#include -#include +#ifndef LIEF_ASM_X86_H +#define LIEF_ASM_X86_H +#include +#include +#include +#include #endif diff --git a/include/LIEF/asm/x86/Instruction.hpp b/include/LIEF/asm/x86/Instruction.hpp index fdee136be5..38338b4699 100644 --- a/include/LIEF/asm/x86/Instruction.hpp +++ b/include/LIEF/asm/x86/Instruction.hpp @@ -18,6 +18,7 @@ #include "LIEF/asm/Instruction.hpp" #include "LIEF/asm/x86/opcodes.hpp" +#include "LIEF/asm/x86/Operand.hpp" namespace LIEF { namespace assembly { @@ -30,9 +31,14 @@ class LIEF_API Instruction : public assembly::Instruction { public: using assembly::Instruction::Instruction; + using operands_it = iterator_range; + /// The instruction opcode as defined in LLVM OPCODE opcode() const; + /// Iterator over the operands of the current instruction + operands_it operands() const; + /// True if `inst` is an **effective** instance of x86::Instruction static bool classof(const assembly::Instruction* inst); diff --git a/include/LIEF/asm/x86/Operand.hpp b/include/LIEF/asm/x86/Operand.hpp new file mode 100644 index 0000000000..9feec7725b --- /dev/null +++ b/include/LIEF/asm/x86/Operand.hpp @@ -0,0 +1,125 @@ +/* Copyright 2022 - 2024 R. Thomas + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef LIEF_ASM_X86_OPERAND_H +#define LIEF_ASM_X86_OPERAND_H +#include "LIEF/visibility.h" +#include "LIEF/iterators.hpp" + +#include +#include +#include + +#include + +namespace LIEF { +namespace assembly { +namespace x86 { + +namespace details { +class Operand; +class OperandIt; +} + +/// This class represents an operand for an x86/x86-64 instruction +class LIEF_API Operand { + public: + + /// **Forward** iterator that outputs x86 Operand as `std::unique_ptr` + class Iterator final : + public iterator_facade_base, + std::ptrdiff_t, Operand*, std::unique_ptr> + { + public: + using implementation = details::OperandIt; + + LIEF_API Iterator(); + + LIEF_API Iterator(std::unique_ptr impl); + LIEF_API Iterator(const Iterator&); + LIEF_API Iterator& operator=(const Iterator&); + + LIEF_API Iterator(Iterator&&) noexcept; + LIEF_API Iterator& operator=(Iterator&&) noexcept; + + LIEF_API ~Iterator(); + + LIEF_API Iterator& operator++(); + + friend LIEF_API bool operator==(const Iterator& LHS, const Iterator& RHS); + + friend bool operator!=(const Iterator& LHS, const Iterator& RHS) { + return !(LHS == RHS); + } + + LIEF_API std::unique_ptr operator*() const; + + private: + std::unique_ptr impl_; + }; + + /// Pretty representation of the operand + std::string to_string() const; + + /// This function can be used to **down cast** an Operand instance: + /// + /// ```cpp + /// std::unique_ptr op = ...; + /// if (const auto* memory = inst->as()) { + /// const assembly::x86::REG base = memory->base(); + /// } + /// ``` + template + const T* as() const { + static_assert(std::is_base_of::value, + "Require Operand inheritance"); + if (T::classof(this)) { + return static_cast(this); + } + return nullptr; + } + + virtual ~Operand(); + + /// \private + static LIEF_LOCAL std::unique_ptr + create(std::unique_ptr impl); + + /// \private + LIEF_LOCAL const details::Operand& impl() const { + assert(impl_ != nullptr); + return *impl_; + } + + /// \private + LIEF_LOCAL details::Operand& impl() { + assert(impl_ != nullptr); + return *impl_; + } + + friend LIEF_API std::ostream& operator<<(std::ostream& os, const Operand& op) { + os << op.to_string(); + return os; + } + + protected: + LIEF_LOCAL Operand(std::unique_ptr impl); + std::unique_ptr impl_; +}; + +} +} +} + +#endif diff --git a/include/LIEF/asm/x86/operands.hpp b/include/LIEF/asm/x86/operands.hpp new file mode 100644 index 0000000000..b06d5da793 --- /dev/null +++ b/include/LIEF/asm/x86/operands.hpp @@ -0,0 +1,22 @@ +/* Copyright 2017 - 2024 R. Thomas + * Copyright 2017 - 2024 Quarkslab + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef LIEF_ASM_X86_OPERANDS_H +#define LIEF_ASM_X86_OPERANDS_H +#include +#include +#include +#include +#endif diff --git a/include/LIEF/asm/x86/operands/Immediate.hpp b/include/LIEF/asm/x86/operands/Immediate.hpp new file mode 100644 index 0000000000..60991c3a14 --- /dev/null +++ b/include/LIEF/asm/x86/operands/Immediate.hpp @@ -0,0 +1,49 @@ +/* Copyright 2022 - 2024 R. Thomas + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef LIEF_ASM_X86_OPERAND_IMM_H +#define LIEF_ASM_X86_OPERAND_IMM_H +#include "LIEF/asm/x86/Operand.hpp" + +namespace LIEF { +namespace assembly { +namespace x86 { +/// Namespace that wraps the different x86/x86-64 operands +namespace operands { + + +/// This class represents an immediate operand (i.e. a constant) +/// +/// For instance: +/// +/// ```text +/// mov edi, 1; +/// | +/// +---> Immediate(1) +/// ``` +class LIEF_API Immediate : public Operand { + public: + using Operand::Operand; + + /// The constant value wrapped by this operand + int64_t value() const; + + static bool classof(const Operand* op); + ~Immediate() override = default; +}; +} +} +} +} +#endif diff --git a/include/LIEF/asm/x86/operands/Memory.hpp b/include/LIEF/asm/x86/operands/Memory.hpp new file mode 100644 index 0000000000..15eac12540 --- /dev/null +++ b/include/LIEF/asm/x86/operands/Memory.hpp @@ -0,0 +1,77 @@ +/* Copyright 2022 - 2024 R. Thomas + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef LIEF_ASM_X86_OPERAND_MEMORY_H +#define LIEF_ASM_X86_OPERAND_MEMORY_H +#include "LIEF/asm/x86/Operand.hpp" +#include "LIEF/asm/x86/registers.hpp" + +namespace LIEF { +namespace assembly { +namespace x86 { +namespace operands { + +/// This class represents a memory operand. +/// +/// For instance: +/// +/// ```text +/// movq xmm3, qword ptr [rip + 823864]; +/// +/// | +/// | +/// Memory +/// | +/// +-----------+-----------+ +/// | | | +/// Base: rip Scale: 1 Displacement: 823864 +/// +/// ``` +class LIEF_API Memory : public Operand { + public: + using Operand::Operand; + + /// The base register. + /// + /// For `lea rdx, [rip + 244634]` it would return `rip` + REG base() const; + + /// The scaled register. + /// + /// For `mov rdi, qword ptr [r13 + 8*r14]` it would return `r14` + REG scaled_register() const; + + /// The segment register associated with the memory operation. + /// + /// For `mov eax, dword ptr gs:[0]` is would return `gs` + REG segment_register() const; + + /// The scale value associated with the scaled_register(): + /// + /// For `mov rdi, qword ptr [r13 + 8*r14]` it would return `8` + uint64_t scale() const; + + /// The displacement value + /// + /// For `call qword ptr [rip + 248779]` it would return `248779` + int64_t displacement() const; + + static bool classof(const Operand* op); + ~Memory() override = default; +}; +} +} +} +} +#endif diff --git a/include/LIEF/asm/x86/operands/PCRelative.hpp b/include/LIEF/asm/x86/operands/PCRelative.hpp new file mode 100644 index 0000000000..acc256a341 --- /dev/null +++ b/include/LIEF/asm/x86/operands/PCRelative.hpp @@ -0,0 +1,48 @@ +/* Copyright 2022 - 2024 R. Thomas + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef LIEF_ASM_X86_OPERAND_PCREL_H +#define LIEF_ASM_X86_OPERAND_PCREL_H +#include "LIEF/asm/x86/Operand.hpp" + +namespace LIEF { +namespace assembly { +namespace x86 { +namespace operands { + +/// This class represents a RIP/EIP-relative operand. +/// +/// For instance: +/// +/// ```text +/// jmp 67633; +/// | +/// +----------> PCRelative(67633) +/// ``` +class LIEF_API PCRelative : public Operand { + public: + using Operand::Operand; + + /// The effective value that is relative to the current `rip/eip` register + int64_t value() const; + + static bool classof(const Operand* op); + + ~PCRelative() override = default; +}; +} +} +} +} +#endif diff --git a/include/LIEF/asm/x86/operands/Register.hpp b/include/LIEF/asm/x86/operands/Register.hpp new file mode 100644 index 0000000000..7b6ca35715 --- /dev/null +++ b/include/LIEF/asm/x86/operands/Register.hpp @@ -0,0 +1,51 @@ +/* Copyright 2022 - 2024 R. Thomas + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef LIEF_ASM_X86_OPERAND_REG_H +#define LIEF_ASM_X86_OPERAND_REG_H + +#include "LIEF/asm/x86/Operand.hpp" +#include "LIEF/asm/x86/registers.hpp" + +namespace LIEF { +namespace assembly { +namespace x86 { +namespace operands { + +/// This class represents a register operand. +/// +/// For instance: +/// +/// ```text +/// mov r15d, edi +/// | | +/// | +---------> Register(EDI) +/// | +/// +---------------> Register(R15D) +/// ``` +class LIEF_API Register : public Operand { + public: + using Operand::Operand; + + /// The effective REG wrapped by this operand + REG value() const; + + static bool classof(const Operand* op); + ~Register() override = default; +}; +} +} +} +} +#endif diff --git a/src/asm/asm.cpp b/src/asm/asm.cpp index db8724d16c..2d3bb476f8 100644 --- a/src/asm/asm.cpp +++ b/src/asm/asm.cpp @@ -19,8 +19,14 @@ #include "LIEF/asm/aarch64/registers.hpp" #include "LIEF/asm/x86/Instruction.hpp" +#include "LIEF/asm/x86/Operand.hpp" #include "LIEF/asm/x86/registers.hpp" +#include "LIEF/asm/x86/operands/Immediate.hpp" +#include "LIEF/asm/x86/operands/Register.hpp" +#include "LIEF/asm/x86/operands/PCRelative.hpp" +#include "LIEF/asm/x86/operands/Memory.hpp" + #include "LIEF/asm/arm/Instruction.hpp" #include "LIEF/asm/arm/registers.hpp" @@ -85,6 +91,11 @@ class InstructionIt {}; class Engine {}; } +namespace x86::details { +class Operand {}; +class OperandIt {}; +} + // ---------------------------------------------------------------------------- // asm/Instruction.hpp // ---------------------------------------------------------------------------- @@ -169,6 +180,24 @@ bool Instruction::is_syscall() const { return false; } +bool Instruction::is_memory_access() const { return false; } +bool Instruction::is_move_reg() const { return false; } +bool Instruction::is_add() const { return false; } +bool Instruction::is_trap() const { return false; } +bool Instruction::is_barrier() const { return false; } +bool Instruction::is_return() const { return false; } +bool Instruction::is_indirect_branch() const { return false; } +bool Instruction::is_conditional_branch() const { return false; } +bool Instruction::is_unconditional_branch() const { return false; } +bool Instruction::is_compare() const { return false; } +bool Instruction::is_move_immediate() const { return false; } +bool Instruction::is_bitcast() const { return false; } +Instruction::MemoryAccess Instruction::memory_access() const { return MemoryAccess::NONE; } + +result Instruction::branch_target() const { + return make_error_code(lief_errors::not_implemented); +} + // ---------------------------------------------------------------------------- // asm/Engine.hpp // ---------------------------------------------------------------------------- @@ -234,6 +263,10 @@ const char* x86::get_register_name(REG) { return ""; } +x86::Instruction::operands_it x86::Instruction::operands() const { + return make_empty_iterator(); +} + // ---------------------------------------------------------------------------- // asm/arm/Instruction.hpp // ---------------------------------------------------------------------------- @@ -309,5 +342,99 @@ const char* powerpc::get_register_name(REG) { return ""; } +// ---------------------------------------------------------------------------- +// asm/x86/Operand.hpp +// ---------------------------------------------------------------------------- +x86::Operand::Iterator::Iterator() : + impl_(nullptr) +{} + +x86::Operand::Iterator::Iterator(std::unique_ptr) : + impl_(nullptr) +{} + +x86::Operand::Iterator::Iterator(const Iterator&) : + impl_(nullptr) +{} + +x86::Operand::Iterator& x86::Operand::Iterator::operator=(const Iterator&) { + return *this; +} + +x86::Operand::Iterator::Iterator(Iterator&&) noexcept = default; +x86::Operand::Iterator& x86::Operand::Iterator::operator=(Iterator&&) noexcept = default; + +x86::Operand::Iterator& x86::Operand::Iterator::operator++() { + return *this; +} + +std::unique_ptr x86::Operand::Iterator::operator*() const { + return nullptr; +} + +bool x86::operator==(const x86::Operand::Iterator&, const x86::Operand::Iterator&) { + return true; +} + +x86::Operand::Iterator::~Iterator() = default; + +x86::Operand::~Operand() = default; +x86::Operand::Operand(std::unique_ptr/*impl*/) : + impl_(nullptr) +{} + +std::string x86::Operand::to_string() const { + return ""; +} + +std::unique_ptr x86::Operand::create(std::unique_ptr /*impl*/) { + return nullptr; +} + +// ---------------------------------------------------------------------------- +// asm/x86/Memory.hpp +// ---------------------------------------------------------------------------- +bool x86::operands::Memory::classof(const Operand*) { return false; } + +x86::REG x86::operands::Memory::base() const { return x86::REG::NoRegister; } + +x86::REG x86::operands::Memory::scaled_register() const { + return x86::REG::NoRegister; +} + +x86::REG x86::operands::Memory::segment_register() const { + return x86::REG::NoRegister; +} + +uint64_t x86::operands::Memory::scale() const { return 0; } + +int64_t x86::operands::Memory::displacement() const { return 0; } + +// ---------------------------------------------------------------------------- +// asm/x86/PCRelative.hpp +// ---------------------------------------------------------------------------- +bool x86::operands::PCRelative::classof(const Operand*) { return false; } + +int64_t x86::operands::PCRelative::value() const { + return 0; +} + +// ---------------------------------------------------------------------------- +// asm/x86/Register.hpp +// ---------------------------------------------------------------------------- +bool x86::operands::Register::classof(const Operand*) { return false; } + +x86::REG x86::operands::Register::value() const { + return REG::NoRegister; +} + +// ---------------------------------------------------------------------------- +// asm/x86/Immediate.hpp +// ---------------------------------------------------------------------------- +bool x86::operands::Immediate::classof(const Operand*) { return false; } + +int64_t x86::operands::Immediate::value() const { + return 0; +} } } diff --git a/tests/assembly/test_x86.py b/tests/assembly/test_x86.py index 972462a41a..ec36d0f399 100644 --- a/tests/assembly/test_x86.py +++ b/tests/assembly/test_x86.py @@ -20,6 +20,8 @@ def test_pe_x86(): assert instructions[0].opcode == lief.assembly.x86.OPCODE.PUSHA32 assert instructions[5].to_string() == "0x4ad4ee7f: nop" + assert instructions[5].raw.hex(":") == "90" + assert instructions[5].size == 1 def test_pe_x86_64(): pe = lief.PE.parse(get_sample("PE/ntoskrnl.exe")) @@ -71,3 +73,91 @@ def test_macho_x86_64(): assert len(instructions) == 4903 assert instructions[0].to_string() == "0x001108: push rbp" assert instructions[23].to_string() == "0x001154: dec dword ptr [rip + 21174]" + +def test_x86_operands(): + pe = lief.PE.parse(get_sample("PE/ntoskrnl.exe")) + instructions = list(pe.disassemble(0x140200000)) + + # ------------------------------------------------------------------------- + + operands = list(instructions[8].operands) + + assert len(operands) == 2 + assert isinstance(operands[0], lief.assembly.x86.operands.Register) + assert isinstance(operands[1], lief.assembly.x86.operands.Register) + assert operands[0].value == lief.assembly.x86.REG.RAX + assert operands[1].value == lief.assembly.x86.REG.RSP + + # ------------------------------------------------------------------------- + + operands = list(instructions[9].operands) + + assert len(operands) == 2 + assert isinstance(operands[0], lief.assembly.x86.operands.Memory) + assert operands[0].base == lief.assembly.x86.REG.RAX + assert operands[0].scaled_register == lief.assembly.x86.REG.NoRegister + assert operands[0].scale == 1 + assert operands[0].displacement == 8 + + # ------------------------------------------------------------------------- + + operands = list(instructions[21].operands) + + assert len(operands) == 1 + assert isinstance(operands[0], lief.assembly.x86.operands.PCRelative) + assert operands[0].value == 0x26889d + + # ------------------------------------------------------------------------- + + operands = list(instructions[53].operands) + + assert len(operands) == 2 + assert isinstance(operands[1], lief.assembly.x86.operands.Immediate) + assert operands[1].value == -33 + +def test_x86_semnatic_info(): + pe = lief.PE.parse(get_sample("PE/ntoskrnl.exe")) + inst = next(pe.disassemble_from_bytes(pe.assemble(0x140200000, "syscall"))) + assert inst.is_syscall + + inst = next(pe.disassemble_from_bytes(pe.assemble(0x140200000, "call _foo"))) + assert inst.is_call + assert not inst.is_terminator + assert str(next(inst.operands)) == "PCRel=0x0" + + inst = next(pe.disassemble_from_bytes(pe.assemble(0x140200000, "jmp rax"))) + assert inst.is_branch + assert inst.is_barrier + assert inst.is_indirect_branch + + inst = next(pe.disassemble_from_bytes(pe.assemble(0x140200000, "vmovdqu xmm1, xmmword ptr [rdi]"))) + assert inst.is_memory_access + assert inst.memory_access == lief.assembly.Instruction.MemoryAccess.READ + + inst = next(pe.disassemble_from_bytes(pe.assemble(0x140200000, "mov rax, rbx"))) + assert inst.is_move_reg + + inst = next(pe.disassemble_from_bytes(pe.assemble(0x140200000, "add rax, rbx"))) + assert inst.is_add + + inst = next(pe.disassemble_from_bytes(pe.assemble(0x140200000, "ud1 rax, rax"))) + assert inst.is_trap + + inst = next(pe.disassemble_from_bytes(pe.assemble(0x140200000, "ret"))) + assert inst.is_return + + inst = next(pe.disassemble_from_bytes(b"\x75\x07")) + assert inst.is_conditional_branch + + inst = next(pe.disassemble_from_bytes(b"\xeb\x10")) + assert inst.is_unconditional_branch + assert inst.branch_target == 18 + + inst = next(pe.disassemble_from_bytes(pe.assemble(0x140200000, "cmp rax, rbx"))) + assert inst.is_compare + + inst = next(pe.disassemble_from_bytes(pe.assemble(0x140200000, "mov rax, 123"))) + assert inst.is_move_immediate + + inst = next(pe.disassemble_from_bytes(pe.assemble(0x140200000, "movq mm1, mm2"))) + assert inst.is_bitcast