My favorites | Sign in
Project Home Downloads Wiki Issues Source
Checkout   Browse   Changes  
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
#!/usr/bin/env python

"""Fuzz tester for the bytecode -> LLVM IR compiler.

The fuzzer implements two strategies for attacking the compiler:
- Generate a random code object.
- Take a known-good code object and change one byte to a random replacement.

Neither of these is a terribly sophisticated, but combined were sufficient to
expose multiple fatal errors in the compiler. Strategies that were tried, but
failed to find further problems:
- Very large code objects with thousands of opcodes.
- Take a known-good code object and shuffle the opcodes without fixing jump
targets.
- Take a known-good code object and shuffle the opcodes, fixing jump targets.
- Take a known-good code object and replace opcodes with other valid opcodes
(jump opcodes replace other jump opcodes, nullary opcodes replace other
nullary opcodes, etc).

The code objects produced by these strategies would either be caught by the
JIT's bytecode validator or would be compiled successfully. The experience was
that the compiler has no trouble with syntactically-correct bytecode, even if
the semantics are invalid.

The fuzzer has yet to generate bytecode that causes problems for LLVM; all
errors so far have been in the bytecode -> LLVM IR frontend.

Example:
/unladen/swallow/python jitfuzz.py --random_seed=12345678
"""

# Python imports
import opcode
import optparse
import random
import sys
import traceback
import types


def find_code_objects(*modules):
"""Find most code objects in the given modules."""
for module in modules:
for val in module.__dict__.itervalues():
if isinstance(val, types.FunctionType):
yield val.__code__
if isinstance(val, type):
for x in val.__dict__.values():
if isinstance(x, types.MethodType):
yield x.__code__


# These are known-good code objects for us to screw with.
CODE_OBJS = list(find_code_objects(traceback, optparse, random))

# The order of this list must match the order of parameters to types.CodeType().
CODE_ATTRS = ["argcount", "nlocals", "stacksize", "flags", "code",
"consts", "names", "varnames", "filename", "name",
"firstlineno", "lnotab", "freevars", "cellvars"]


def stderr(message, *args):
print >>sys.stderr, message % args


def init_random_seed(random_seed):
if random_seed == -1:
random_seed = int(random.random() * 1e9)
random.seed(random_seed)
return random_seed


def clone_code_object(code_obj, **changes):
"""Copy a given code object, possibly changing some attributes.

Example:
clone_code_object(code, code=new_bytecode, flags=new_flags)

Args:
code_obj: baseline code object to clone.
**changes: keys should be names in CODE_ATTRS, values should be the
new value for that attribute name.

Returns:
A new code object.
"""
members = []
for attr in CODE_ATTRS:
if attr in changes:
members.append(changes[attr])
else:
full_attr = "co_" + attr
members.append(getattr(code_obj, full_attr))
return types.CodeType(*members)


def random_int(lower=0, upper=10):
return random.randint(lower, upper)


def random_char(lower=1, upper=255):
return chr(random.randint(lower, upper))


def random_string(length=None):
if length is None:
length = random_int(upper=5000)
# Not random, but nothing looks at the contents of the strings.
return "a" * length


def random_list(func, length=None):
if length is None:
length = random_int(upper=500)
return [func() for _ in xrange(length)]


def random_object():
return random.choice([None, True, 3e8, random_list,
"foo", u"bar", (9,), []])


def random_code_object():
correct = (random.random() < 0.5)

argcount = random_int()
nlocals = random_int(upper=100)
stacksize = random_int(upper=10000)
flags = random_int(upper=1024)
codestring = random_string()
constants = tuple(random_list(random_object))
names = tuple(random_list(random_string))
filename = "attack-jit.py"
name = random_string()
firstlineno = random_int(lower=-1000, upper=1000)
lnotab = ""
freevars = tuple(random_list(random_string))
cellvars = tuple(random_list(random_string))
if correct:
varnames = tuple(random_list(random_string, nlocals))
else:
varnames = tuple(random_list(random_string))

code = types.CodeType(argcount, nlocals, stacksize, flags, codestring,
constants, names, varnames, filename, name,
firstlineno, lnotab, freevars, cellvars)
return code


def permute_code_object(baseline):
"""Take a code object and change one byte of the bytecode."""
bytecode = list(baseline.co_code)
bytecode[random.randint(0, len(bytecode) - 1)] = random_char()
return clone_code_object(baseline, code="".join(bytecode))


def generate_code():
"""Yield new code objects forever."""
while True:
if random.random() < 0.5:
yield random_code_object()
else:
yield permute_code_object(random.choice(CODE_OBJS))


def attack_jit():
# Track how many code objects are approved by the validator. If too many
# are being rejected by the validator, we're not stressing LLVM enough.
valid = 0
rejected = 0
for i, code in enumerate(generate_code()):
code.co_use_jit = True
try:
code.co_optimization = 2
valid += 1
except:
traceback.print_exc()
rejected += 1
if i % 100 == 0:
print
print "### %d attacks successfully repulsed" % i
print "### Validated: %d; rejected: %d" % (valid, rejected)
print


def main(argv):
parser = optparse.OptionParser()
parser.add_option("-r", "--random_seed",
help="Random seed", type="int", default=-1)
options, _ = parser.parse_args(argv)

rand_seed = init_random_seed(options.random_seed)
stderr("Using random seed: %s", rand_seed)
attack_jit()


if __name__ == "__main__":
main(sys.argv)

Change log

r1159 by collinw on Jun 15, 2010   Diff
Add a fuzzing tool for the JIT; add a
bytecode validation layer that prevents
several crashes/asserts/aborts uncovered
by the tool.
Go to: 
Project members, sign in to write a code review

Older revisions

All revisions of this file

File info

Size: 6228 bytes, 198 lines
Powered by Google Project Hosting