-
Notifications
You must be signed in to change notification settings - Fork 0
/
image_utils.h
174 lines (141 loc) · 6.05 KB
/
image_utils.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
// Copyright 2015 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef COURGETTE_IMAGE_UTILS_H_
#define COURGETTE_IMAGE_UTILS_H_
#include <stddef.h>
#include <stdint.h>
#include <iterator>
#include <vector>
// COURGETTE_HISTOGRAM_TARGETS prints out a histogram of how frequently
// different target addresses are referenced. Purely for debugging.
#define COURGETTE_HISTOGRAM_TARGETS 0
namespace courgette {
// There are several ways to reason about addresses in an image:
// - File Offset: Position relative to start of image.
// - VA (Virtual Address): Virtual memory address of a loaded image. This is
// subject to relocation by the OS.
// - RVA (Relative Virtual Address): VA relative to some base address. This is
// the preferred way to specify pointers in an image.
//
// In Courgette we consider two types of addresses:
// - abs32: In an image these are directly stored as VA whose locations are
// stored in the relocation table.
// - rel32: In an image these appear in branch/call opcodes, and are represented
// as offsets from an instruction address.
using RVA = uint32_t;
const RVA kUnassignedRVA = 0xFFFFFFFFU;
const RVA kNoRVA = 0xFFFFFFFFU;
using FileOffset = size_t;
const FileOffset kNoFileOffset = UINTPTR_MAX;
// An interface translate and read addresses. The main conversion path is:
// (1) Location RVA.
// (2) Location FileOffset.
// (3) Pointer in image.
// (4) Target VA (32-bit or 64-bit).
// (5) Target RVA (32-bit).
// For abs32, we get (1) from relocation table, and convert to (5).
// For rel32, we get (2) from scanning opcode, and convert to (1).
class AddressTranslator {
public:
// (2) -> (1): Returns the RVA corresponding to |file_offset|, or kNoRVA if
// nonexistent.
virtual RVA FileOffsetToRVA(FileOffset file_offset) const = 0;
// (1) -> (2): Returns the file offset corresponding to |rva|, or
// kNoFileOffset if nonexistent.
virtual FileOffset RVAToFileOffset(RVA rva) const = 0;
// (2) -> (3): Returns image data pointer correspnoding to |file_offset|.
// Assumes 0 <= |file_offset| <= image size.
// If |file_offset| == image size, then the resulting pointer is an end bound
// for iteration, and should not be dereferenced.
virtual const uint8_t* FileOffsetToPointer(FileOffset file_offset) const = 0;
// (1) -> (3): Returns the pointer to the image data for |rva|, or null if
// |rva| is invalid.
virtual const uint8_t* RVAToPointer(RVA rva) const = 0;
// (3) -> (5): Returns the target RVA located at |p|, where |p| is a pointer
// to image data.
virtual RVA PointerToTargetRVA(const uint8_t* p) const = 0;
};
// A Label is a symbolic reference to an address. Unlike a conventional
// assembly language, we always know the address. The address will later be
// stored in a table and the Label will be replaced with the index into the
// table.
// TODO(huangs): Make this a struct, and remove "_" from member names.
class Label {
public:
enum : int { kNoIndex = -1 };
explicit Label(RVA rva) : rva_(rva) {}
Label(RVA rva, int index) : rva_(rva), index_(index) {}
Label(RVA rva, int index, int32_t count)
: rva_(rva), index_(index), count_(count) {}
bool operator==(const Label& other) const {
return rva_ == other.rva_ && index_ == other.index_ &&
count_ == other.count_;
}
RVA rva_ = kUnassignedRVA; // Address referred to by the label.
int index_ = kNoIndex; // Index of address in address table.
int32_t count_ = 0;
};
// An interface for sequential visit of RVAs.
// Use case: Translating from RVA locations to RVA targets is platform-specific,
// and works differently for abs32 vs. rel32. A function that sequentually
// visits RVA targets only requires an RvaVisitor. The caller can provide an
// implementation that stores a fixed list of RVA locations, and translates each
// to the matching RVA target on demand without extra storage.
class RvaVisitor {
public:
virtual ~RvaVisitor() { }
// Returns the number of remaining RVAs to visit.
virtual size_t Remaining() const = 0;
// Returns the current RVA.
virtual RVA Get() const = 0;
// Advances to the next RVA.
virtual void Next() = 0;
};
// RvaVisitor whose data are backed by std::vector<T>. Translating from T to RVA
// is should be implemented in Get().
template <typename T>
class VectorRvaVisitor : public RvaVisitor {
public:
// Assumes |v| does not change for the lifetime of this instance.
explicit VectorRvaVisitor(const std::vector<T>& v)
: it_(v.begin()), end_(v.end()) { }
~VectorRvaVisitor() override { }
// RvaVisitor interfaces.
size_t Remaining() const override { return std::distance(it_, end_); }
virtual RVA Get() const override = 0;
void Next() override { ++it_; }
protected:
typename std::vector<T>::const_iterator it_;
typename std::vector<T>::const_iterator end_;
};
// RvaVisitor that simply stores a list of RVAs for traversal. For testing.
class TrivialRvaVisitor : public VectorRvaVisitor<RVA> {
public:
explicit TrivialRvaVisitor(const std::vector<RVA>& rvas)
: VectorRvaVisitor<RVA>(rvas) { }
~TrivialRvaVisitor() override { }
// VectorRvaVisitor<RVA> interfaces.
RVA Get() const override { return *it_; }
};
// These helper functions avoid the need for casts in the main code.
inline uint16_t ReadU16(const uint8_t* address, size_t offset) {
return *reinterpret_cast<const uint16_t*>(address + offset);
}
inline uint32_t ReadU32(const uint8_t* address, size_t offset) {
return *reinterpret_cast<const uint32_t*>(address + offset);
}
inline uint64_t ReadU64(const uint8_t* address, size_t offset) {
return *reinterpret_cast<const uint64_t*>(address + offset);
}
inline uint16_t Read16LittleEndian(const void* address) {
return *reinterpret_cast<const uint16_t*>(address);
}
inline uint32_t Read32LittleEndian(const void* address) {
return *reinterpret_cast<const uint32_t*>(address);
}
inline uint64_t Read64LittleEndian(const void* address) {
return *reinterpret_cast<const uint64_t*>(address);
}
} // namespace courgette
#endif // COURGETTE_IMAGE_UTILS_H_