My favorites | Sign in
Project Home Downloads Wiki Issues Source
Repository:
Checkout   Browse   Changes   Clones  
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
#LICENCE

'''
Logic related to string (Unicode) input.
'''

from string import digits
from sys import maxunicode
from unicodedata import category

from lepl.rxpy.alphabet.base import BaseAlphabet
from lepl.rxpy.parser.support import ParserState
from lepl.support.lib import basestring, str, chr
from lepl.rxpy.alphabet.bytes import ASCII_WORD

u = str
c = chr


UNICODE_WORD = {u('Ll'), u('Lo'), u('Lt'), u('Lu'), u('Mc'), u('Me'), u('Mn'), u('Nd'), u('Nl'), u('No'), u('Pc')}


class String(BaseAlphabet):
'''
Define character sets etc for (Unicode) strings. This expects the
regular expression to also be a Unicode string.

See base class for full documentation.
'''

def __init__(self):
super(String, self).__init__(0, maxunicode, u('\\'))

def code_to_letter(self, code):
'''
Convert a code - an integer value between min and max, that maps the
alphabet to a contiguous set of integers - to a character in the
alphabet.
'''
return c(code)

def letter_to_code(self, letter):
'''
Convert a character in the alphabet to a code - an integer value
between min and max, that maps the alphabet to a contiguous set of
integers.
'''
return ord(letter)

def validate_expression(self, expression, flags):
if not isinstance(expression, basestring):
raise TypeError('Expression for string (Unicode) alphabet must be a string')

def validate_input(self, input, flags):
if not isinstance(input, str):
raise TypeError('Input for string (Unicode) alphabet must be a string')

def expression_to_letter(self, char):
return char

def expression_to_str(self, char):
return char if char else None

def letter_to_str(self, letter):
if letter is None: return None
text = repr(letter)
if text[0] == 'u':
text = text[1:]
return text[1:-1] # drop quotes

def expression_to_charset(self, char, flags):
if flags & ParserState.IGNORECASE and \
(flags & ParserState.UNICODE or ord(char) < 128):
lo = char.lower()
hi = char.upper()
if lo != hi:
return True, (lo, hi)
return False, char

def join(self, *strings):
'''
Construct a word in the alphabet, given a list of words and/or
characters.
'''
return ''.join(strings)

def digit(self, char, flags):
'''Test whether the character is a digit or not.'''
# http://bugs.python.org/issue1693050
if flags & ParserState.ASCII:
return char in digits
else:
return char and category(char) == 'Nd'

def space(self, char, flags):
'''Test whether the character is a whitespace or not.'''
# http://bugs.python.org/issue1693050
return char and (char in u(' \t\n\r\f\v') or
(flags & ParserState.UNICODE and category(char) == u('Z')))

def word(self, char, flags):
'''Test whether the character is a word character or not.'''
# http://bugs.python.org/issue1693050
return char and (char in ASCII_WORD or
(flags & ParserState.UNICODE and category(char) in UNICODE_WORD))

def unescape(self, code):
'''No idea why, but needed for some tests.'''
return self.code_to_letter(code % 256)

Change log

7bccdac7cd5e by and...@acooke.org on Jan 15, 2012   Diff
various 2.7 fixes for rxpy
Go to: 
Project members, sign in to write a code review

Older revisions

5b14542ff43b by and...@acooke.org on Jan 10, 2012   Diff
various small fixes for bytes
3c562f5ef9e0 by and...@acooke.org on Nov 17, 2011   Diff
cleanup small
09a59c3a482e by and...@acooke.org on Jun 30, 2011   Diff
slowly fixing backtrack
All revisions of this file

File info

Size: 3493 bytes, 109 lines
Powered by Google Project Hosting