My favorites | Sign in
Project Logo
Project hosting will be READ-ONLY Wednesday at 8am PST due to brief network maintenance.
                
Repository:
Checkout | Browse | Changes | Clones |
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
#!/usr/bin/python

##
# Author: bont
# Date: September 8, 2009
#
# Takes a list of Spanish words or files with words in them and gets the definitions of those words.
##

import urllib2, htmllib, formatter

class SpanishDict(htmllib.HTMLParser):
def __init__(self):
htmllib.HTMLParser.__init__(self, formatter.NullFormatter())
self.word = ''
self.div = 0

def translate(self, word):
self.word = word
self.data = ''

url = 'http://www.spanishdict.com/translate/' + urllib2.quote(self.word)

try:
f = urllib2.urlopen(url)
self.feed(f.read())
self.close()
f.close()
except urllib2.URLError, err:
if err.code == 8:
print "Unable to connect to the internet. Check your connection and try again."
else:
print err

data = re.sub(r'('+w+' \[.*?\])', r'\n\n\1', self.data) + "\n"
data = re.sub(r'([0-9]+\.)', r"\n\1", data)
data = re.sub(r'(adjective|adverb|verb)', r"\n\n\1", data)
data = re.sub(r'([^or](masculine|feminine)*[^\]]noun)', r"\n\n\1", data)
data = re.sub(r'(masculine or\n\n feminine noun)', r"\n\nmasculine or feminine noun", data)
data = re.sub(r'(]noun)', r"]\n\nnoun", data)

return data

def start_div(self, attributes):
if self.div >= 1:
self.div += 1
return

for name, value in attributes:
if name == 'id' and value.startswith('word_'):
self.div = 1
self.save_bgn()
return

def end_div(self):
if self.div < 1:
return
elif self.div >= 1:
self.div -= 1

if self.div <= 0:
self.data += self.save_end()

if __name__ == '__main__':
from optparse import OptionParser
import re

parser = OptionParser("usage: %prog [options] file [file2, file3, ...]")
parser.add_option('-o', help="where to send the output, file name or '-' to indicate stdout [default: %default]", default='{first file}.defs', dest='output')
parser.add_option('--words', help='interpret the input as words, not file names [default: %default]', default=False, action='store_true')
parser.add_option('--delimiter', help='delimiter between words in the files [default: %default]', default='\\n')

(options, args) = parser.parse_args()

if options.words == True:
words = args
else:
delim = options.delimiter
if delim == '\\n':
delim = "\n"
words = []
for filename in args:
f = open(filename)
words.extend(f.read().split(delim))
f.close()

translator = SpanishDict()
data = ''

for w in words:
if w is not '':
text = translator.translate(w)
data += "\n==== "+w+" ====\n\n"+text

if options.output is '-':
print data
else:
if options.output is '{first file}.defs':
outfile = args[0] + '.defs'
else:
outfile = options.output
f = open(outfile, 'w')
f.write(data)
f.close()
Show details Hide details

Change log

faa8e978d1 by bont <jamesbont009> on Sep 13, 2009   Diff
Initial import
Go to: 
Project members, sign in to write a code review

Older revisions

All revisions of this file

File info

Size: 2714 bytes, 104 lines
Hosted by Google Code