My favorites | Sign in
Project Home Downloads Wiki Issues Source
Checkout   Browse   Changes    
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
# Natural Language Toolkit (NLTK) Help
#
# Copyright (C) 2001-2011 NLTK Project
# Authors: Steven Bird <sb@csse.unimelb.edu.au>
# URL: <http://www.nltk.org/>
# For license information, see LICENSE.TXT

"""
Provide structured access to documentation.
"""

import re
from textwrap import wrap

from nltk.data import load

def brown_tagset(tagpattern=None):
_format_tagset("brown_tagset", tagpattern)

def claws5_tagset(tagpattern=None):
_format_tagset("claws5_tagset", tagpattern)

def upenn_tagset(tagpattern=None):
_format_tagset("upenn_tagset", tagpattern)

#####################################################################
# UTILITIES
#####################################################################

def _print_entries(tags, tagdict):
for tag in tags:
entry = tagdict[tag]
defn = [tag + ": " + entry[0]]
examples = wrap(entry[1], width=75, initial_indent=' ', subsequent_indent=' ')
print "\n".join(defn + examples)

def _format_tagset(tagset, tagpattern=None):
tagdict = load("help/tagsets/" + tagset + ".pickle")
if not tagpattern:
_print_entries(sorted(tagdict), tagdict)
elif tagpattern in tagdict:
_print_entries([tagpattern], tagdict)
else:
tagpattern = re.compile(tagpattern)
tags = [tag for tag in sorted(tagdict) if tagpattern.match(tag)]
if tags:
_print_entries(tags, tagdict)
else:
print "No matching tags found."

if __name__ == '__main__':
brown_tagset(r'NN.*')
upenn_tagset(r'.*\$')
claws5_tagset('UNDEFINED')
brown_tagset(r'NN')

Change log

r8730 by StevenBird1 on Mar 7, 2011   Diff
Updated NLTK copyright year range from
2001-2010 to 2001-2011
Go to: 
Sign in to write a code review

Older revisions

r8479 by StevenBird1 on Jan 12, 2010   Diff
Updated copyright period to 2001-2010
r7680 by StevenBird1 on Feb 19, 2009   Diff
nltk_data/collections/*.xml
* updated to include new tagsets
collection

nltk/nltk_data/tagsets.py
...
All revisions of this file

File info

Size: 1615 bytes, 55 lines
Powered by Google Project Hosting