My favorites | Sign in
Project Home Downloads Wiki Issues Source
Checkout   Browse   Changes    
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
#!/usr/local/bin/python2.5

"""Demonstrate mr_simple in two ways.
When called with no arguments we show the word frequency
in two strings.

When called with an argument we assume it's a directory and we
scan it for python files and save the frequency to a shelve, to
demonstrate a 'consumer' that persists the data. Then the shelve is
reopened and printed out.
"""

import mr_simple
import re
import os
import sys
import shelve

_words = re.compile( '[a-zA-Z]+')

def dummy_producer():
"""Produce test data."""
return [("f1", "now is the time"),
("f2", "is the time of day def?")]

def word_count_mapper(name, value):
"""Mapper that emits a series of (word,1) tuples for
every word in 'value'.
"""
for w in _words.findall(value.lower()):
yield w, 1

def word_count_reducer(name, values):
"""Reducer that sums the values."""
return sum(values)

def print_consumer(n, v):
"""Consumer that prints out all name,value pairs we're passed."""
print "consume: ", n, v


class FileProducer:
"""Producer that traverses a directory tree and returns
tuples of filename,content pairs.
"""
def __init__(self, dn):
self.dn = dn

def __call__(self):
for (dirpath, dirnames, filenames) in os.walk(self.dn):
for fn in [f for f in filenames if f.endswith('.py')]:
full = os.path.join(dirpath, fn)
yield full, file(full).read()

class ShelveConsumer:
"""Consumer that writes to a shelve.
"""

def __init__(self, fn):
self.s = shelve.open(fn, 'c')

def __call__(self, n, v):
self.s[n] = v

if __name__ == '__main__':
demo = True
if len(sys.argv) == 1:
print "Demo"
producer = dummy_producer
consumer = print_consumer
else:
demo = False
print "Will scan ", sys.argv[1]
producer = FileProducer(sys.argv[1])
os.unlink('mr.shelve')
consumer = ShelveConsumer('mr.shelve')

mr_simple.MrSimple(producer,
word_count_mapper,
word_count_reducer,
consumer)

if not demo:
consumer = None
s = shelve.open('mr.shelve', 'r')
for n,v in s.iteritems():
print n, v

Change log

r48 by david.spencerian on Jul 11, 2008   Diff
nite
Go to: 
Project members, sign in to write a code review

Older revisions

r42 by david.spencerian on Jul 11, 2008   Diff
simplify and cleanup
r37 by david.spencerian on Jul 9, 2008   Diff
annoying
r36 by david.spencerian on Jul 9, 2008   Diff
touchup
All revisions of this file

File info

Size: 2146 bytes, 88 lines
Powered by Google Project Hosting