My favorites | Sign in
Project Home Downloads Wiki Issues Source
Checkout   Browse   Changes    
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
# Natural Language Toolkit: Miscellaneous container classes
#
# Copyright (C) 2001-2011 NLTK Project
# Author: Steven Bird <sb@csse.unimelb.edu.au>
# URL: <http://www.nltk.org/>
# For license information, see LICENSE.TXT

class SortedDict(dict):
"""
A very rudamentary sorted dictionary, whose main purpose is to
allow dictionaries to be displayed in a consistent order in
regression tests. keys(), items(), values(), iter*(), and
__repr__ all sort their return values before returning them.
(note that the sort order for values() does *not* correspond to
the sort order for keys(). I.e., zip(d.keys(), d.values()) is not
necessarily equal to d.items().
"""
def keys(self): return sorted(dict.keys(self))
def items(self): return sorted(dict.items(self))
def values(self): return sorted(dict.values(self))
def iterkeys(self): return iter(sorted(dict.keys(self)))
def iteritems(self): return iter(sorted(dict.items(self)))
def itervalues(self): return iter(sorted(dict.values(self)))
def __iter__(self): return iter(sorted(dict.keys(self)))
def repr(self):
items = ['%s=%s' % t for t in sorted(self.items())]
return '{%s}' % ', '.join(items)

# OrderedDict: Written Doug Winter
# http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/438823

class OrderedDict(dict):
"""
This implementation of a dictionary keeps track of the order
in which keys were inserted.
"""

def __init__(self, d={}):
self._keys = d.keys()
dict.__init__(self, d)

def __delitem__(self, key):
dict.__delitem__(self, key)
self._keys.remove(key)

def __setitem__(self, key, item):
dict.__setitem__(self, key, item)
# a peculiar sharp edge from copy.deepcopy
# we'll have our set item called without __init__
if not hasattr(self, '_keys'):
self._keys = [key,]
if key not in self._keys:
self._keys.append(key)

def clear(self):
dict.clear(self)
self._keys = []

def items(self):
for i in self._keys:
yield i, self[i]

def keys(self):
return self._keys

def popitem(self):
if len(self._keys) == 0:
raise KeyError('dictionary is empty')
else:
key = self._keys[-1]
val = self[key]
del self[key]
return key, val

def setdefault(self, key, failobj = None):
dict.setdefault(self, key, failobj)
if key not in self._keys:
self._keys.append(key)

def update(self, d):
for key in d.keys():
if key not in self:
self._keys.append(key)
dict.update(self, d)

def values(self):
for i in self._keys:
yield self[i]

def move(self, key, index):

""" Move the specified to key to *before* the specified index. """

try:
cur = self._keys.index(key)
except ValueError:
raise KeyError(key)
self._keys.insert(index, key)
# this may have shifted the position of cur, if it is after index
if cur >= index: cur = cur + 1
del self._keys[cur]

def index(self, key):
if key not in self:
raise KeyError(key)
return self._keys.index(key)


##########################################################################
# TRIES
##########################################################################

# Trie structure, by James Tauber and Leonardo Maffi (V. 1.2, July 18 2006)
# Extended by Steven Bird

class Trie:
"""A Trie is like a dictionary in that it maps keys to
values. However, because of the way keys are stored, it allows
look up based on the longest prefix that matches. Keys must be
strings.
"""

def __init__(self, trie=None):
if trie is None:
self._root = [None, {}, 0]
else:
self._root = trie

def clear(self):
self._root = [None, {}, 0]

def isleaf(self, key):
"""Return True if the key is present and it's a leaf of the
Trie, False otherwise."""

curr_node = self._root
for char in key:
curr_node_1 = curr_node[1]
if char in curr_node_1:
curr_node = curr_node_1[char]
else:
return False
return curr_node[0] is not None

def find_prefix(self, key):
"""Find as much of the key as one can, by using the longest
prefix that has a value. Return (value, remainder) where
remainder is the rest of the given string."""

curr_node = self._root
remainder = key
for char in key:
if char in curr_node[1]:
curr_node = curr_node[1][char]
else:
return curr_node[0], remainder
remainder = remainder[1:]
return curr_node[0], remainder

def subtrie(self, key):
curr_node = self._root
for char in key:
curr_node = curr_node[1][char]
return Trie(trie=curr_node)

def __len__(self):
return self._root[2]

def __eq__(self, other):
return self._root == other._root

def __ne__(self, other):
return not (self == other)

def __setitem__(self, key, value):
curr_node = self._root
for char in key:
curr_node[2] += 1
curr_node = curr_node[1].setdefault(char, [None, {}, 0])
curr_node[0] = value
curr_node[2] += 1

def __getitem__(self, key):
"""Return the value for the given key if it is present, raises
a KeyError if key not found, and return None if it is present
a key2 that starts with key."""

curr_node = self._root
for char in key:
curr_node = curr_node[1][char]
return curr_node[0]

def __contains__(self, key):
"""Return True if the key is present or if it is present a
key2 string that starts with key."""

curr_node = self._root
for char in key:
curr_node_1 = curr_node[1]
if char in curr_node_1:
curr_node = curr_node_1[char]
else:
return False
return True

def _iter_values(self, curr_node=None):
if curr_node is None:
curr_node = self._root

if curr_node[0] is not None:
yield curr_node[0]

for key, node in curr_node[1].iteritems():
for subchild in self._iter_values(node):
yield subchild

def _iter_items(self, curr_node=None, curr_key=''):
if curr_node is None:
curr_node = self._root

if curr_node[0] is not None:
yield curr_key, curr_node[0]

for key, node in curr_node[1].iteritems():
for subchild in self._iter_items(node, curr_key + key):
yield subchild

def _iter_keys(self, curr_node=None, curr_key=''):
if curr_node is None:
curr_node = self._root

if curr_node[0] is not None:
yield curr_key

for key, node in curr_node[1].iteritems():
for subchild in self._iter_keys(node, curr_key + key):
yield subchild

def __iter__(self):
return self._iter_keys()

def keys(self):
return list(self._iter_keys())

def items(self):
return list(self._iter_items())

def values(self):
return list(self._iter_values())

def iterkeys(self):
return self._iter_keys()

def iteritems(self):
return self._iter_items()

def itervalues(self):
return self._iter_values()

def __str__(self):
return str(self._root)

def __repr__(self):
return "Trie(%r)" % self._root

Change log

r8813 by stevenbird1 on Aug 8, 2011   Diff
added support for iterating over trie
objects, contributed by xim@8d.no;
resolves  issue 685 
Go to: 
Sign in to write a code review

Older revisions

r8730 by StevenBird1 on Mar 7, 2011   Diff
Updated NLTK copyright year range from
2001-2010 to 2001-2011
r8479 by StevenBird1 on Jan 12, 2010   Diff
Updated copyright period to 2001-2010
r7460 by StevenBird1 on Jan 28, 2009   Diff
changed copyright period to 2001-2009
All revisions of this file

File info

Size: 7844 bytes, 265 lines
Powered by Google Project Hosting