My favorites | Sign in
Project Logo
                
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
package org.karticks.mapreduce;

import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.TreeMap;

/**
* This class implements the Reduce phase of the Map-Reduce design pattern.
* The class has only one method (<code>doReduce</code>) which takes the results (as a <code>List</code>
* of <code>Maps</code>) from the Map phase and produces the final result (also as a <code>Map</code>).
*
* @author Kartick Suriamoorthy
*
*/
public class Reducer
{
/**
* Executes the Reduce phase of the Map-Reduce pattern by iterating over
* all the input <code>Maps</code> to find common keys and aggregating their results.
* Stores and returns the final results in the output <code>Map</code>.
*
* @param inputMaps A list of results from Map phase
* @return A <code>Map</code> that contains the final result
*/
public Map<String, Integer> doReduce(List<Map<String, Integer>> inputMaps)
{
// decided to use a tree-map as it easier to output the final results
// as all the keys are sorted. this can be easily replaced with a
// hashtable or hashmap with no adverse effects.
Map<String, Integer> outputMap = new TreeMap<String, Integer>();

int mapIndex = 0;

// outer loop - iterate over all maps
for (Map<String, Integer> map : inputMaps)
{
mapIndex++;

Iterator<String> it = map.keySet().iterator();

while (it.hasNext())
{
String key = it.next();

// Get the value from the current map
Integer value = map.get(key);

// Now iterate over the rest of maps. The mapIndex variable starts
// at 1 and keeps increasing because once we are done with all the
// keys in the first map, we don't need to inspect it any more, and
// the same holds for the second map, third map, and so on.
for (int j = mapIndex; j < inputMaps.size(); j++)
{
Integer v = inputMaps.get(j).get(key);

// if you find a value for the key, add it to the current value
// and then remove that key from that map.
if (v != null)
{
value += v;
inputMaps.get(j).remove(key);
}
}

// finished aggregating all the values for this key, now store it
// in the output map
outputMap.put(key, value);
}
}

return outputMap;
}
}
Show details Hide details

Change log

r68 by kartick.suriamoorthy on Jul 31, 2009   Diff
documented the choice of a TreeMap as
compared to a Hashtable or Hashmap.
Go to: 
Project members, sign in to write a code review

Older revisions

r65 by kartick.suriamoorthy on Jul 29, 2009   Diff
changed the Map to be a TreeMap
(previously was a Hashtable). Did this
so that keys are sorted, and I can
compare results from different runs
easily.
r59 by kartick.suriamoorthy on Jul 29, 2009   Diff
initial checkin of map-reduce sources
All revisions of this file

File info

Size: 2360 bytes, 74 lines
Hosted by Google Code