My favorites
|
Sign in
dalalstreet
Open source software for managing your "Indian Stock Market Portfolio"
Project Home
Downloads
Wiki
Issues
Source
Checkout
|
Browse
|
Changes
|
r72
Source path:
svn
/
trunk
/
MapReduce
/
src
/
org
/
karticks
/
mapreduce
/
WordCountFromFiles.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
package org.karticks.mapreduce;
import java.io.InputStream;
import java.util.Iterator;
import java.util.Map;
/**
* A class that counts the occurences of words from different files using Map/Reduce.
* This class assumes that files just contain words separated by spaces, and do not
* contain any punctuations or line-endings. The files should be located in the
* classpath of the program.
*
* @author Kartick Suriamoorthy
*
*/
public class WordCountFromFiles
{
public static void main(String[] args)
{
try
{
String[] files = null;
// Uncomment the following two lines if you want run the word count example
// files = {"frankenstein-shelley-excerpt.txt", "iliad-homer-excerpt.txt", "metamorphosis-kafka-excerpt.txt",
//"prince-machiavelli-excerpt.txt", "ulysses-joyce-excerpt.txt"};
// Uncomment the following lines if you want to run the sorting example
// files = {"integers_to_be_sorted1.txt", "integers_to_be_sorted2.txt", "integers_to_be_sorted3.txt",
//"integers_to_be_sorted4.txt", "integers_to_be_sorted5.txt"};
MapReduceWorker worker = new MapReduceWorker();
for (String file : files)
{
Mapper mapper = new WordCountMapper();
InputStream is = worker.getClass().getResourceAsStream("/" + file);
worker.addMapper(mapper, is);
}
Map<String, Integer> result = worker.doWork();
Iterator<String> iterator = result.keySet().iterator();
StringBuffer buffer = new StringBuffer();
StringBuffer highCount = new StringBuffer();
Integer totalCount = 0;
while (iterator.hasNext())
{
String key = iterator.next();
Integer value = result.get(key);
buffer.append(key + " (" + value + "), ");
if (value >= 50)
{
highCount.append(key + " (" + value + "), ");
}
totalCount += value;
}
String output = buffer.substring(0, buffer.length() - 2);
System.out.println("=====================================================");
System.out.println("Final result : " + output + ".");
String highFrequencyWords = highCount.substring(0, highCount.length() - 2);
System.out.println("=====================================================");
System.out.println("High frequency (greater than or equal to 10) words : " + highFrequencyWords + ".");
System.out.println("=====================================================");
System.out.println("Total count of words : " + totalCount + ".");
System.out.println("=====================================================");
}
catch (Throwable t)
{
System.out.println("Caught an exception. Error message : " + t.getMessage());
t.printStackTrace();
}
}
}
Show details
Hide details
Change log
r72
by kartick.suriamoorthy on Aug 04, 2009
Diff
map-reduce example.
Go to:
...apreduce/WordCountFromFiles.java
Project members,
sign in
to write a code review
Older revisions
All revisions of this file
File info
Size: 2821 bytes, 89 lines
View raw file
Hosted by