Add more examples to README

2020-12-12 07:15:45 +01:00 · 2020-12-12 07:15:45 +01:00 · 0d63fd1fe7
parent 4612b66610
commit 0d63fd1fe7
2 changed files with 263 additions and 7 deletions
--- a/src/rust/iced-x86-py/README.md
+++ b/src/rust/iced-x86-py/README.md
@ -171,19 +171,273 @@ print(f"{instr:gG_xSs}")

 ## Create and encode instructions

-TODO:
+This example uses a `BlockEncoder` to encode created `Instruction`s.
+
+```python
+from iced_x86 import *
+
+bitness = 64
+
+# All created instructions get an IP of 0. The label id is just an IP.
+# The branch instruction's *target* IP should be equal to the IP of the
+# target instruction.
+label_id: int = 1
+
+def create_label() -> int:
+    global label_id
+    idd = label_id
+    label_id += 1
+    return idd
+
+def add_label(id: int, instruction: Instruction) -> Instruction:
+    instruction.ip = id
+    return instruction
+
+label1 = create_label()
+
+instructions = []
+instructions.append(Instruction.create_reg(Code.PUSH_R64, Register.RBP))
+instructions.append(Instruction.create_reg(Code.PUSH_R64, Register.RDI))
+instructions.append(Instruction.create_reg(Code.PUSH_R64, Register.RSI))
+instructions.append(Instruction.create_reg_u32(
+    Code.SUB_RM64_IMM32, Register.RSP, 0x50))
+instructions.append(Instruction.create(Code.VEX_VZEROUPPER))
+instructions.append(Instruction.create_reg_mem(
+    Code.LEA_R64_M, Register.RBP, MemoryOperand(Register.RSP, displ=0x60)))
+instructions.append(Instruction.create_reg_reg(
+    Code.MOV_R64_RM64, Register.RSI, Register.RCX))
+instructions.append(Instruction.create_reg_mem(
+    Code.LEA_R64_M, Register.RDI, MemoryOperand(Register.RBP, displ=-0x38)))
+instructions.append(Instruction.create_reg_i32(
+    Code.MOV_R32_IMM32, Register.ECX, 0x0A))
+instructions.append(Instruction.create_reg_reg(
+    Code.XOR_R32_RM32, Register.EAX, Register.EAX))
+instructions.append(Instruction.create_rep_stosd(bitness))
+instructions.append(Instruction.create_reg_u64(
+    Code.CMP_RM64_IMM32, Register.RSI, 0x1234_5678))
+# Create a branch instruction that references label1
+instructions.append(Instruction.create_branch(Code.JNE_REL32_64, label1))
+instructions.append(Instruction.create(Code.NOPD))
+# Add the instruction that is the target of the branch
+instructions.append(add_label(label1, Instruction.create_reg_reg(
+    Code.XOR_R32_RM32, Register.R15D, Register.R15D)))
+
+# Create an instruction that accesses some data using an RIP relative memory operand
+data1 = create_label()
+instructions.append(Instruction.create_reg_mem(
+    Code.LEA_R64_M, Register.R14, MemoryOperand(Register.RIP, displ=data1)))
+instructions.append(Instruction.create(Code.NOPD))
+raw_data = b"\x12\x34\x56\x78"
+instructions.append(
+    add_label(data1, Instruction.create_declare_byte(raw_data)))
+
+# Use BlockEncoder to encode a block of instructions. This block can contain any
+# number of branches and any number of instructions.
+# It uses Encoder to encode all instructions.
+# If the target of a branch is too far away, it can fix it to use a longer branch.
+# This can be disabled by passing in `False` to the ctor.
+target_rip = 0x0000_1248_FC84_0000
+encoder = BlockEncoder(bitness)
+encoder.add_many(instructions)
+encoded_bytes = encoder.encode(target_rip)
+
+# Now disassemble the encoded instructions. Note that the 'jmp near'
+# instruction was turned into a 'jmp short' instruction because we
+# didn't disable branch optimizations.
+bytes_code = encoded_bytes[0:len(encoded_bytes) - len(raw_data)]
+bytes_data = encoded_bytes[len(encoded_bytes) - len(raw_data):]
+decoder = Decoder(bitness, bytes_code)
+decoder.ip = target_rip
+formatter = Formatter(FormatterSyntax.GAS)
+formatter.first_operand_char_index = 8
+for instruction in decoder:
+    disasm = formatter.format(instruction)
+    print(f"{instruction.ip:016X} {disasm}")
+
+db = Instruction.create_declare_byte(bytes_data)
+print(f"{decoder.ip:016X} {formatter.format(db)}")
+
+# Output:
+# 00001248FC840000 push    %rbp
+# 00001248FC840001 push    %rdi
+# 00001248FC840002 push    %rsi
+# 00001248FC840003 sub     $0x50,%rsp
+# 00001248FC84000A vzeroupper
+# 00001248FC84000D lea     0x60(%rsp),%rbp
+# 00001248FC840012 mov     %rcx,%rsi
+# 00001248FC840015 lea     -0x38(%rbp),%rdi
+# 00001248FC840019 mov     $0xA,%ecx
+# 00001248FC84001E xor     %eax,%eax
+# 00001248FC840020 rep stos %eax,(%rdi)
+# 00001248FC840022 cmp     $0x12345678,%rsi
+# 00001248FC840029 jne     0x00001248FC84002C
+# 00001248FC84002B nop
+# 00001248FC84002C xor     %r15d,%r15d
+# 00001248FC84002F lea     0x1248FC840037,%r14
+# 00001248FC840036 nop
+# 00001248FC840037 .byte   0x12,0x34,0x56,0x78
+```

 ## Move code in memory (eg. hook a function)

-TODO:
+Uses instruction info API and the encoder to patch a function to jump to the programmer's function.
+
+```python
+from iced_x86 import *
+
+# Decodes instructions from some address, then encodes them starting at some
+# other address. This can be used to hook a function. You decode enough instructions
+# until you have enough bytes to add a JMP instruction that jumps to your code.
+# Your code will then conditionally jump to the original code that you re-encoded.
+#
+# This code uses the BlockEncoder which will help with some things, eg. converting
+# short branches to longer branches if the target is too far away.
+#
+# 64-bit mode also supports RIP relative addressing, but the encoder can't rewrite
+# those to use a longer displacement. If any of the moved instructions have RIP
+# relative addressing and it tries to access data too far away, the encoder will fail.
+# The easiest solution is to use OS alloc functions that allocate memory close to the
+# original code (+/-2GB).
+
+
+# This example produces the following output:
+# Original code:
+# 00007FFAC46ACDA4 mov [rsp+10h],rbx
+# 00007FFAC46ACDA9 mov [rsp+18h],rsi
+# 00007FFAC46ACDAE push rbp
+# 00007FFAC46ACDAF push rdi
+# 00007FFAC46ACDB0 push r14
+# 00007FFAC46ACDB2 lea rbp,[rsp-100h]
+# 00007FFAC46ACDBA sub rsp,200h
+# 00007FFAC46ACDC1 mov rax,[rel 7FFAC47524E0h]
+# 00007FFAC46ACDC8 xor rax,rsp
+# 00007FFAC46ACDCB mov [rbp+0F0h],rax
+# 00007FFAC46ACDD2 mov r8,[rel 7FFAC474F208h]
+# 00007FFAC46ACDD9 lea rax,[rel 7FFAC46F4A58h]
+# 00007FFAC46ACDE0 xor edi,edi
+#
+# Original + patched code:
+# 00007FFAC46ACDA4 mov rax,123456789ABCDEF0h
+# 00007FFAC46ACDAE jmp rax
+# 00007FFAC46ACDB0 push r14
+# 00007FFAC46ACDB2 lea rbp,[rsp-100h]
+# 00007FFAC46ACDBA sub rsp,200h
+# 00007FFAC46ACDC1 mov rax,[rel 7FFAC47524E0h]
+# 00007FFAC46ACDC8 xor rax,rsp
+# 00007FFAC46ACDCB mov [rbp+0F0h],rax
+# 00007FFAC46ACDD2 mov r8,[rel 7FFAC474F208h]
+# 00007FFAC46ACDD9 lea rax,[rel 7FFAC46F4A58h]
+# 00007FFAC46ACDE0 xor edi,edi
+#
+# Moved code:
+# 00007FFAC48ACDA4 mov [rsp+10h],rbx
+# 00007FFAC48ACDA9 mov [rsp+18h],rsi
+# 00007FFAC48ACDAE push rbp
+# 00007FFAC48ACDAF push rdi
+# 00007FFAC48ACDB0 jmp 00007FFAC46ACDB0h
+
+def disassemble(data: bytes, ip: int) -> None:
+    formatter = Formatter(FormatterSyntax.NASM)
+    decoder = Decoder(EXAMPLE_CODE_BITNESS, data)
+    decoder.ip = ip
+    for instruction in decoder:
+        disasm = formatter.format(instruction)
+        print(f"{instruction.ip:016X} {disasm}")
+    print()
+
+def how_to_move_code() -> None:
+    print("Original code:")
+    disassemble(EXAMPLE_CODE, EXAMPLE_CODE_RIP)
+
+    decoder = Decoder(EXAMPLE_CODE_BITNESS, EXAMPLE_CODE)
+    decoder.ip = EXAMPLE_CODE_RIP
+
+    # In 64-bit mode, we need 12 bytes to jump to any address:
+    #      mov rax,imm64   # 10
+    #      jmp rax         # 2
+    # We overwrite rax because it's probably not used by the called function.
+    # In 32-bit mode, a normal JMP is just 5 bytes
+    required_bytes = 10 + 2
+    total_bytes = 0
+    orig_instructions = []
+    for instr in decoder:
+        orig_instructions.append(instr)
+        total_bytes += instr.len
+        if not instr:
+            raise ValueError("Found garbage")
+        if total_bytes >= required_bytes:
+            break
+
+        cflow = instr.flow_control
+        if cflow == FlowControl.NEXT:
+            pass  # nothing
+        elif cflow == FlowControl.UNCONDITIONAL_BRANCH:
+            if instr.op0_kind == OpKind.NEAR_BRANCH64:
+                _target = instr.near_branch_target
+                # You could check if it's just jumping forward a few bytes and follow it
+                # but this is a simple example so we'll fail.
+            raise ValueError("Not supported by this simple example")
+        else:
+            raise ValueError("Not supported by this simple example")
+    if total_bytes < required_bytes:
+        raise ValueError("Not enough bytes!")
+    if len(orig_instructions) == 0:
+        raise ValueError("Should not be empty here")
+    # Create a JMP instruction that branches to the original code, except those instructions
+    # that we'll re-encode. We don't need to do it if it already ends in 'ret'
+    last_instr = orig_instructions[-1]
+    if last_instr.flow_control != FlowControl.RETURN:
+        orig_instructions.append(Instruction.create_branch(Code.JMP_REL32_64, last_instr.next_ip))
+
+    # Relocate the code to some new location. It can fix short/near branches and
+    # convert them to short/near/long forms if needed. This also works even if it's a
+    # jrcxz/loop/loopcc instruction which only have short forms.
+    #
+    # It can currently only fix RIP relative operands if the new location is within 2GB
+    # of the target data location.
+    relocated_base_address = EXAMPLE_CODE_RIP + 0x20_0000
+    encoder = BlockEncoder(decoder.bitness)
+    encoder.add_many(orig_instructions)
+    new_code = encoder.encode(relocated_base_address)
+
+    # Patch the original code. Pretend that we use some OS API to write to memory...
+    # We could use the BlockEncoder/Encoder for this but it's easy to do yourself too.
+    # This is 'mov rax,imm64; jmp rax'
+    YOUR_FUNC: int = 0x1234_5678_9ABC_DEF0  # Address of your code
+    example_code = bytearray(EXAMPLE_CODE)
+    example_code[0] = 0x48  # \ 'MOV RAX,imm64'
+    example_code[1] = 0xB8  # /
+    v = YOUR_FUNC
+    for i in range(2, 10):
+        example_code[i] = v & 0xFF
+        v >>= 8
+    example_code[10] = 0xFF  # \ JMP RAX
+    example_code[11] = 0xE0  # /
+
+    # Disassemble it
+    print("Original + patched code:")
+    disassemble(example_code, EXAMPLE_CODE_RIP)
+
+    # Disassemble the moved code
+    print("Moved code:")
+    disassemble(new_code, relocated_base_address)
+
+
+EXAMPLE_CODE_BITNESS: int = 64
+EXAMPLE_CODE_RIP: int = 0x0000_7FFA_C46A_CDA4
+EXAMPLE_CODE: bytes = \
+    b"\x48\x89\x5C\x24\x10\x48\x89\x74\x24\x18\x55\x57\x41\x56\x48\x8D" \
+    b"\xAC\x24\x00\xFF\xFF\xFF\x48\x81\xEC\x00\x02\x00\x00\x48\x8B\x05" \
+    b"\x18\x57\x0A\x00\x48\x33\xC4\x48\x89\x85\xF0\x00\x00\x00\x4C\x8B" \
+    b"\x05\x2F\x24\x0A\x00\x48\x8D\x05\x78\x7C\x04\x00\x33\xFF"
+
+how_to_move_code()
+```

 ## Get instruction info, eg. read/written regs/mem, control flow info, etc

-Shows how to get used registers/memory and other info. It uses [`Instruction`] methods
-and an [`InstructionInfoFactory`] to get this info.
-
-[`Instruction`]: https://docs.rs/iced-x86/1.9.1/iced_x86/struct.Instruction.html
-[`InstructionInfoFactory`]: https://docs.rs/iced-x86/1.9.1/iced_x86/struct.InstructionInfoFactory.html
+Shows how to get used registers/memory and other info. It uses `Instruction` methods
+and an `InstructionInfoFactory` to get this info.

 ```python
 from iced_x86 import *
--- a/src/rust/iced-x86-py/tests/instr_create_test.py
+++ b/src/rust/iced-x86-py/tests/instr_create_test.py
@ -527,6 +527,8 @@ def test_dq(instr: Instruction, data: bytes):
@pytest.mark.parametrize("data", [
 	b"",
 	b"A" * 17,
+	bytearray(b""),
+	bytearray(b"A" * 17),
 ])
 def test_invalid_db_slice_len(data):
 	with pytest.raises(ValueError):