My favorites | Sign in
Project Home Downloads Wiki Issues Source
Checkout   Browse   Changes    
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
package main

import (
"fmt"
"io/ioutil"
"regexp"
"strings"
"time"
)

func train(training_data string) map[string]int {
NWORDS := make(map[string]int)
pattern := regexp.MustCompile("[a-z]+")
if content, err := ioutil.ReadFile(training_data); err == nil {
for _, w := range pattern.FindAllString(strings.ToLower(string(content)), -1) {
NWORDS[w]++;
}
} else {
panic("Failed loading training data. Get it from http://norvig.com/big.txt.")
}
return NWORDS
}

func edits1(word string, ch chan string) {
const alphabet = "abcdefghijklmnopqrstuvwxyz"
type Pair struct{a, b string}
var splits []Pair
for i := 0; i < len(word) + 1; i++ {
splits = append(splits, Pair{word[:i], word[i:]}) }

for _, s := range splits {
if len(s.b) > 0 { ch <- s.a + s.b[1:] }
if len(s.b) > 1 { ch <- s.a + string(s.b[1]) + string(s.b[0]) + s.b[2:] }
for _, c := range alphabet { if len(s.b) > 0 { ch <- s.a + string(c) + s.b[1:] }}
for _, c := range alphabet { ch <- s.a + string(c) + s.b }
}
}

func edits2(word string, ch chan string) {
ch1 := make(chan string, 1024*1024)
go func() { edits1(word, ch1); ch1 <- "" }()
for e1 := range ch1 {
if e1 == "" { break }
edits1(e1, ch)
}
}

func best(word string, edits func(string, chan string), model map[string]int) string {
ch := make(chan string, 1024*1024)
go func() { edits(word, ch); ch <- "" }()
maxFreq := 0
correction := ""
for word := range ch {
if word == "" { break }
if freq, present := model[word]; present && freq > maxFreq {
maxFreq, correction = freq, word
}
}
return correction
}

func correct(word string, model map[string]int) string {
if _, present := model[word]; present { return word }
if correction := best(word, edits1, model); correction != "" { return correction }
if correction := best(word, edits2, model); correction != "" { return correction }
return word
}

func main() {
model := train("big.txt")
startTime := time.Nanoseconds()
for i := 0; i < 1; i++ { fmt.Println(correct("korrecter", model)) }
fmt.Printf("Time : %v\n", float64(time.Nanoseconds() - startTime) / float64(1e9))
}

Change log

r9 by Yi.Wang.2005 on Feb 17, 2012   Diff
Fix bug: Consumer in edits2 cannot go
edits1 for lack of synchronization.
Go to: 
Project members, sign in to write a code review

Older revisions

r8 by Yi.Wang.2005 on Feb 17, 2012   Diff
Fix bug: correction with smaller edit
distance is infinitely better than
that with larger distance
r6 by Yi.Wang.2005 on Feb 17, 2012   Diff
edits1and2 invokes edits1
simultaneously, and add time cost
measurement.
r5 by Yi.Wang.2005 on Feb 17, 2012   Diff
Remove inocation of edits1 in correct,
instead reuse result of edits1s
invoked by edits1and2.
All revisions of this file

File info

Size: 2104 bytes, 74 lines
Powered by Google Project Hosting