My favorites | Sign in
Project Home Downloads Wiki Issues Source
Repository:
Checkout   Browse   Changes   Clones  
Changes to /src/lepl/rxpy/engine/_test/test_re.py
7c88be681227 vs. f200c7d42c97 Compare: vs.  Format:
Revision f200c7d42c97
Go to: 
Project members, sign in to write a code review
/src/lepl/rxpy/engine/_test/test_re.py   7c88be681227 /src/lepl/rxpy/engine/_test/test_re.py   f200c7d42c97
1 1
2 # THIS FILE FROM PYTHON SOURCE - SEPARATE LICENCE 2 # THIS FILE FROM PYTHON SOURCE - SEPARATE LICENCE
3 3
4 import sys, traceback 4 import sys, traceback
5 from weakref import proxy 5 from weakref import proxy
6 6
7 from lepl.rxpy.engine._test.base import BaseTest 7 from lepl.rxpy.engine._test.base import BaseTest
8 8
9 # Misc tests from Tim Peters' self._re.doc 9 # Misc tests from Tim Peters' self._re.doc
10 10
11 # WARNING: Don't change details in these tests if you don't know 11 # WARNING: Don't change details in these tests if you don't know
12 # what you're doing. Some of these tests were carefully modeled to 12 # what you're doing. Some of these tests were carefully modeled to
13 # cover most of the code. 13 # cover most of the code.
14 14
15 15
16 #noinspection PyUnresolvedReferences 16 #noinspection PyUnresolvedReferences
17 class ReTests(BaseTest): 17 class ReTests(BaseTest):
18 18
19 def test_weakref(self): 19 def test_weakref(self):
20 s = 'QabbbcR' 20 s = 'QabbbcR'
21 x = self._re.compile('ab+c') 21 x = self._re.compile('ab+c')
22 y = proxy(x) 22 y = proxy(x)
23 self.assertEqual(x.findall('QabbbcR'), y.findall('QabbbcR')) 23 self.assertEqual(x.findall('QabbbcR'), y.findall('QabbbcR'))
24 24
25 def test_search_star_plus(self): 25 def test_search_star_plus(self):
26 self.assertEqual(self._re.search('x*', 'axx').span(0), (0, 0)) 26 self.assertEqual(self._re.search('x*', 'axx').span(0), (0, 0))
27 self.assertEqual(self._re.search('x*', 'axx').span(), (0, 0)) 27 self.assertEqual(self._re.search('x*', 'axx').span(), (0, 0))
28 self.assertEqual(self._re.search('x+', 'axx').span(0), (1, 3)) 28 self.assertEqual(self._re.search('x+', 'axx').span(0), (1, 3))
29 self.assertEqual(self._re.search('x+', 'axx').span(), (1, 3)) 29 self.assertEqual(self._re.search('x+', 'axx').span(), (1, 3))
30 self.assertEqual(self._re.search('x', 'aaa'), None) 30 self.assertEqual(self._re.search('x', 'aaa'), None)
31 self.assertEqual(self._re.match('a*', 'xxx').span(0), (0, 0)) 31 self.assertEqual(self._re.match('a*', 'xxx').span(0), (0, 0))
32 self.assertEqual(self._re.match('a*', 'xxx').span(), (0, 0)) 32 self.assertEqual(self._re.match('a*', 'xxx').span(), (0, 0))
33 self.assertEqual(self._re.match('x*', 'xxxa').span(0), (0, 3)) 33 self.assertEqual(self._re.match('x*', 'xxxa').span(0), (0, 3))
34 self.assertEqual(self._re.match('x*', 'xxxa').span(), (0, 3)) 34 self.assertEqual(self._re.match('x*', 'xxxa').span(), (0, 3))
35 self.assertEqual(self._re.match('a+', 'xxx'), None) 35 self.assertEqual(self._re.match('a+', 'xxx'), None)
36 36
37 def bump_num(self, matchobj): 37 def bump_num(self, matchobj):
38 int_value = int(matchobj.group(0)) 38 int_value = int(matchobj.group(0))
39 return str(int_value + 1) 39 return str(int_value + 1)
40 40
41 def test_basic_re_sub(self): 41 def test_basic_re_sub(self):
42 self.assertEqual(self._re.sub("(?i)b+", "x", "bbbb BBBB"), 'x x') 42 self.assertEqual(self._re.sub("(?i)b+", "x", "bbbb BBBB"), 'x x')
43 self.assertEqual(self._re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y'), 43 self.assertEqual(self._re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y'),
44 '9.3 -3 24x100y') 44 '9.3 -3 24x100y')
45 self.assertEqual(self._re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y', 3), 45 self.assertEqual(self._re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y', 3),
46 '9.3 -3 23x99y') 46 '9.3 -3 23x99y')
47 47
48 self.assertEqual(self._re.sub('.', lambda m: r"\n", 'x'), '\\n') 48 self.assertEqual(self._re.sub('.', lambda m: r"\n", 'x'), '\\n')
49 self.assertEqual(self._re.sub('.', r"\n", 'x'), '\n') 49 self.assertEqual(self._re.sub('.', r"\n", 'x'), '\n')
50 50
51 s = r"\1\1" 51 s = r"\1\1"
52 self.assertEqual(self._re.sub('(.)', s, 'x'), 'xx') 52 self.assertEqual(self._re.sub('(.)', s, 'x'), 'xx')
53 self.assertEqual(self._re.sub('(.)', self._re.escape(s), 'x'), s) 53 self.assertEqual(self._re.sub('(.)', self._re.escape(s), 'x'), s)
54 self.assertEqual(self._re.sub('(.)', lambda m: s, 'x'), s) 54 self.assertEqual(self._re.sub('(.)', lambda m: s, 'x'), s)
55 55
56 self.assertEqual(self._re.sub('(?P<a>x)', '\g<a>\g<a>', 'xx'), 'xxxx') 56 self.assertEqual(self._re.sub('(?P<a>x)', '\g<a>\g<a>', 'xx'), 'xxxx')
57 self.assertEqual(self._re.sub('(?P<a>x)', '\g<a>\g<1>', 'xx'), 'xxxx') 57 self.assertEqual(self._re.sub('(?P<a>x)', '\g<a>\g<1>', 'xx'), 'xxxx')
58 self.assertEqual(self._re.sub('(?P<unk>x)', '\g<unk>\g<unk>', 'xx'), 'xxxx') 58 self.assertEqual(self._re.sub('(?P<unk>x)', '\g<unk>\g<unk>', 'xx'), 'xxxx')
59 self.assertEqual(self._re.sub('(?P<unk>x)', '\g<1>\g<1>', 'xx'), 'xxxx') 59 self.assertEqual(self._re.sub('(?P<unk>x)', '\g<1>\g<1>', 'xx'), 'xxxx')
60 60
61 self.assertEqual(self._re.sub('a',r'\t\n\v\r\f\a\b\B\Z\a\A\w\W\s\S\d\D','a'), 61 self.assertEqual(self._re.sub('a',r'\t\n\v\r\f\a\b\B\Z\a\A\w\W\s\S\d\D','a'),
62 '\t\n\v\r\f\a\b\\B\\Z\a\\A\\w\\W\\s\\S\\d\\D') 62 '\t\n\v\r\f\a\b\\B\\Z\a\\A\\w\\W\\s\\S\\d\\D')
63 self.assertEqual(self._re.sub('a', '\t\n\v\r\f\a', 'a'), '\t\n\v\r\f\a') 63 self.assertEqual(self._re.sub('a', '\t\n\v\r\f\a', 'a'), '\t\n\v\r\f\a')
64 self.assertEqual(self._re.sub('a', '\t\n\v\r\f\a', 'a'), 64 self.assertEqual(self._re.sub('a', '\t\n\v\r\f\a', 'a'),
65 (chr(9)+chr(10)+chr(11)+chr(13)+chr(12)+chr(7))) 65 (chr(9)+chr(10)+chr(11)+chr(13)+chr(12)+chr(7)))
66 66
67 self.assertEqual(self._re.sub('^\s*', 'X', 'test'), 'Xtest') 67 self.assertEqual(self._re.sub('^\s*', 'X', 'test'), 'Xtest')
68 68
69 def test_bug_449964(self): 69 def test_bug_449964(self):
70 # fails for group followed by other escape 70 # fails for group followed by other escape
71 self.assertEqual(self._re.sub(r'(?P<unk>x)', '\g<1>\g<1>\\b', 'xx'), 71 self.assertEqual(self._re.sub(r'(?P<unk>x)', '\g<1>\g<1>\\b', 'xx'),
72 'xx\bxx\b') 72 'xx\bxx\b')
73 73
74 def test_bug_449000(self): 74 def test_bug_449000(self):
75 # Test for sub() on escaped characters 75 # Test for sub() on escaped characters
76 self.assertEqual(self._re.sub(r'\r\n', r'\n', 'abc\r\ndef\r\n'), 76 self.assertEqual(self._re.sub(r'\r\n', r'\n', 'abc\r\ndef\r\n'),
77 'abc\ndef\n') 77 'abc\ndef\n')
78 self.assertEqual(self._re.sub('\r\n', r'\n', 'abc\r\ndef\r\n'), 78 self.assertEqual(self._re.sub('\r\n', r'\n', 'abc\r\ndef\r\n'),
79 'abc\ndef\n') 79 'abc\ndef\n')
80 self.assertEqual(self._re.sub(r'\r\n', '\n', 'abc\r\ndef\r\n'), 80 self.assertEqual(self._re.sub(r'\r\n', '\n', 'abc\r\ndef\r\n'),
81 'abc\ndef\n') 81 'abc\ndef\n')
82 self.assertEqual(self._re.sub('\r\n', '\n', 'abc\r\ndef\r\n'), 82 self.assertEqual(self._re.sub('\r\n', '\n', 'abc\r\ndef\r\n'),
83 'abc\ndef\n') 83 'abc\ndef\n')
84 84
85 def test_bug_1661(self): 85 def test_bug_1661(self):
86 # Verify that flags do not get silently ignored with compiled patterns 86 # Verify that flags do not get silently ignored with compiled patterns
87 pattern = self._re.compile('.') 87 pattern = self._re.compile('.')
88 self.assertRaises(ValueError, self._re.match, pattern, 'A', self._re.I) 88 self.assertRaises(ValueError, self._re.match, pattern, 'A', self._re.I)
89 self.assertRaises(ValueError, self._re.search, pattern, 'A', self._re.I) 89 self.assertRaises(ValueError, self._re.search, pattern, 'A', self._re.I)
90 self.assertRaises(ValueError, self._re.findall, pattern, 'A', self._re.I) 90 self.assertRaises(ValueError, self._re.findall, pattern, 'A', self._re.I)
91 self.assertRaises(ValueError, self._re.compile, pattern, self._re.I) 91 self.assertRaises(ValueError, self._re.compile, pattern, self._re.I)
92 92
93 def test_bug_3629(self): 93 def test_bug_3629(self):
94 # A regex that triggered a bug in the sre-code validator 94 # A regex that triggered a bug in the sre-code validator
95 self._re.compile("(?P<quote>)(?(quote))") 95 self._re.compile("(?P<quote>)(?(quote))")
96 96
97 def test_sub_template_numeric_escape(self): 97 def test_sub_template_numeric_escape(self):
98 # bug 776311 and friends 98 # bug 776311 and friends
99 self.assertEqual(self._re.sub('x', r'\0', 'x'), '\0') 99 self.assertEqual(self._re.sub('x', r'\0', 'x'), '\0')
100 self.assertEqual(self._re.sub('x', r'\000', 'x'), '\000') 100 self.assertEqual(self._re.sub('x', r'\000', 'x'), '\000')
101 self.assertEqual(self._re.sub('x', r'\001', 'x'), '\001') 101 self.assertEqual(self._re.sub('x', r'\001', 'x'), '\001')
102 self.assertEqual(self._re.sub('x', r'\008', 'x'), '\0' + '8') 102 self.assertEqual(self._re.sub('x', r'\008', 'x'), '\0' + '8')
103 self.assertEqual(self._re.sub('x', r'\009', 'x'), '\0' + '9') 103 self.assertEqual(self._re.sub('x', r'\009', 'x'), '\0' + '9')
104 self.assertEqual(self._re.sub('x', r'\111', 'x'), '\111') 104 self.assertEqual(self._re.sub('x', r'\111', 'x'), '\111')
105 self.assertEqual(self._re.sub('x', r'\117', 'x'), '\117') 105 self.assertEqual(self._re.sub('x', r'\117', 'x'), '\117')
106 106
107 self.assertEqual(self._re.sub('x', r'\1111', 'x'), '\1111') 107 self.assertEqual(self._re.sub('x', r'\1111', 'x'), '\1111')
108 self.assertEqual(self._re.sub('x', r'\1111', 'x'), '\111' + '1') 108 self.assertEqual(self._re.sub('x', r'\1111', 'x'), '\111' + '1')
109 109
110 self.assertEqual(self._re.sub('x', r'\00', 'x'), '\x00') 110 self.assertEqual(self._re.sub('x', r'\00', 'x'), '\x00')
111 self.assertEqual(self._re.sub('x', r'\07', 'x'), '\x07') 111 self.assertEqual(self._re.sub('x', r'\07', 'x'), '\x07')
112 self.assertEqual(self._re.sub('x', r'\08', 'x'), '\0' + '8') 112 self.assertEqual(self._re.sub('x', r'\08', 'x'), '\0' + '8')
113 self.assertEqual(self._re.sub('x', r'\09', 'x'), '\0' + '9') 113 self.assertEqual(self._re.sub('x', r'\09', 'x'), '\0' + '9')
114 self.assertEqual(self._re.sub('x', r'\0a', 'x'), '\0' + 'a') 114 self.assertEqual(self._re.sub('x', r'\0a', 'x'), '\0' + 'a')
115 115
116 self.assertEqual(self._re.sub('x', r'\400', 'x'), '\0') 116 self.assertEqual(self._re.sub('x', r'\400', 'x'), '\0')
117 self.assertEqual(self._re.sub('x', r'\777', 'x'), '\377') 117 self.assertEqual(self._re.sub('x', r'\777', 'x'), '\377')
118 118
119 self.assertRaises(self._re.error, self._re.sub, 'x', r'\1', 'x') 119 self.assertRaises(self._re.error, self._re.sub, 'x', r'\1', 'x')
120 self.assertRaises(self._re.error, self._re.sub, 'x', r'\8', 'x') 120 self.assertRaises(self._re.error, self._re.sub, 'x', r'\8', 'x')
121 self.assertRaises(self._re.error, self._re.sub, 'x', r'\9', 'x') 121 self.assertRaises(self._re.error, self._re.sub, 'x', r'\9', 'x')
122 self.assertRaises(self._re.error, self._re.sub, 'x', r'\11', 'x') 122 self.assertRaises(self._re.error, self._re.sub, 'x', r'\11', 'x')
123 self.assertRaises(self._re.error, self._re.sub, 'x', r'\18', 'x') 123 self.assertRaises(self._re.error, self._re.sub, 'x', r'\18', 'x')
124 self.assertRaises(self._re.error, self._re.sub, 'x', r'\1a', 'x') 124 self.assertRaises(self._re.error, self._re.sub, 'x', r'\1a', 'x')
125 self.assertRaises(self._re.error, self._re.sub, 'x', r'\90', 'x') 125 self.assertRaises(self._re.error, self._re.sub, 'x', r'\90', 'x')
126 self.assertRaises(self._re.error, self._re.sub, 'x', r'\99', 'x') 126 self.assertRaises(self._re.error, self._re.sub, 'x', r'\99', 'x')
127 self.assertRaises(self._re.error, self._re.sub, 'x', r'\118', 'x') # r'\11' + '8' 127 self.assertRaises(self._re.error, self._re.sub, 'x', r'\118', 'x') # r'\11' + '8'
128 self.assertRaises(self._re.error, self._re.sub, 'x', r'\11a', 'x') 128 self.assertRaises(self._re.error, self._re.sub, 'x', r'\11a', 'x')
129 self.assertRaises(self._re.error, self._re.sub, 'x', r'\181', 'x') # r'\18' + '1' 129 self.assertRaises(self._re.error, self._re.sub, 'x', r'\181', 'x') # r'\18' + '1'
130 self.assertRaises(self._re.error, self._re.sub, 'x', r'\800', 'x') # r'\80' + '0' 130 self.assertRaises(self._re.error, self._re.sub, 'x', r'\800', 'x') # r'\80' + '0'
131 131
132 # in python2.3 (etc), these loop endlessly in sre_parser.py 132 # in python2.3 (etc), these loop endlessly in sre_parser.py
133 self.assertEqual(self._re.sub('(((((((((((x)))))))))))', r'\11', 'x'), 'x') 133 self.assertEqual(self._re.sub('(((((((((((x)))))))))))', r'\11', 'x'), 'x')
134 self.assertEqual(self._re.sub('((((((((((y))))))))))(.)', r'\118', 'xyz'), 134 self.assertEqual(self._re.sub('((((((((((y))))))))))(.)', r'\118', 'xyz'),
135 'xz8') 135 'xz8')
136 self.assertEqual(self._re.sub('((((((((((y))))))))))(.)', r'\11a', 'xyz'), 136 self.assertEqual(self._re.sub('((((((((((y))))))))))(.)', r'\11a', 'xyz'),
137 'xza') 137 'xza')
138 138
139 def test_qualified_re_sub(self): 139 def test_qualified_re_sub(self):
140 self.assertEqual(self._re.sub('a', 'b', 'aaaaa'), 'bbbbb') 140 self.assertEqual(self._re.sub('a', 'b', 'aaaaa'), 'bbbbb')
141 self.assertEqual(self._re.sub('a', 'b', 'aaaaa', 1), 'baaaa') 141 self.assertEqual(self._re.sub('a', 'b', 'aaaaa', 1), 'baaaa')
142 142
143 def test_bug_114660(self): 143 def test_bug_114660(self):
144 self.assertEqual(self._re.sub(r'(\S)\s+(\S)', r'\1 \2', 'hello there'), 144 self.assertEqual(self._re.sub(r'(\S)\s+(\S)', r'\1 \2', 'hello there'),
145 'hello there') 145 'hello there')
146 146
147 def test_bug_462270(self): 147 def test_bug_462270(self):
148 # Test for empty sub() behaviour, see SF bug #462270 148 # Test for empty sub() behaviour, see SF bug #462270
149 self.assertEqual(self._re.sub('x*', '-', 'abxd'), '-a-b-d-') 149 self.assertEqual(self._re.sub('x*', '-', 'abxd'), '-a-b-d-')
150 self.assertEqual(self._re.sub('x+', '-', 'abxd'), 'ab-d') 150 self.assertEqual(self._re.sub('x+', '-', 'abxd'), 'ab-d')
151 151
152 def test_symbolic_refs(self): 152 def test_symbolic_refs(self):
153 self.assertRaises(self._re.error, self._re.sub, '(?P<a>x)', '\g<a', 'xx') 153 self.assertRaises(self._re.error, self._re.sub, '(?P<a>x)', '\g<a', 'xx')
154 self.assertRaises(self._re.error, self._re.sub, '(?P<a>x)', '\g<', 'xx') 154 self.assertRaises(self._re.error, self._re.sub, '(?P<a>x)', '\g<', 'xx')
155 self.assertRaises(self._re.error, self._re.sub, '(?P<a>x)', '\g', 'xx') 155 self.assertRaises(self._re.error, self._re.sub, '(?P<a>x)', '\g', 'xx')
156 self.assertRaises(self._re.error, self._re.sub, '(?P<a>x)', '\g<a a>', 'xx') 156 self.assertRaises(self._re.error, self._re.sub, '(?P<a>x)', '\g<a a>', 'xx')
157 self.assertRaises(self._re.error, self._re.sub, '(?P<a>x)', '\g<1a1>', 'xx') 157 self.assertRaises(self._re.error, self._re.sub, '(?P<a>x)', '\g<1a1>', 'xx')
158 self.assertRaises(IndexError, self._re.sub, '(?P<a>x)', '\g<ab>', 'xx') 158 self.assertRaises(IndexError, self._re.sub, '(?P<a>x)', '\g<ab>', 'xx')
159 self.assertRaises(self._re.error, self._re.sub, '(?P<a>x)|(?P<b>y)', '\g<b>', 'xx') 159 self.assertRaises(self._re.error, self._re.sub, '(?P<a>x)|(?P<b>y)', '\g<b>', 'xx')
160 self.assertRaises(self._re.error, self._re.sub, '(?P<a>x)|(?P<b>y)', '\\2', 'xx') 160 self.assertRaises(self._re.error, self._re.sub, '(?P<a>x)|(?P<b>y)', '\\2', 'xx')
161 self.assertRaises(self._re.error, self._re.sub, '(?P<a>x)', '\g<-1>', 'xx') 161 self.assertRaises(self._re.error, self._re.sub, '(?P<a>x)', '\g<-1>', 'xx')
162 162
163 def test_re_subn(self): 163 def test_re_subn(self):
164 self.assertEqual(self._re.subn("(?i)b+", "x", "bbbb BBBB"), ('x x', 2)) 164 self.assertEqual(self._re.subn("(?i)b+", "x", "bbbb BBBB"), ('x x', 2))
165 self.assertEqual(self._re.subn("b+", "x", "bbbb BBBB"), ('x BBBB', 1)) 165 self.assertEqual(self._re.subn("b+", "x", "bbbb BBBB"), ('x BBBB', 1))
166 self.assertEqual(self._re.subn("b+", "x", "xyz"), ('xyz', 0)) 166 self.assertEqual(self._re.subn("b+", "x", "xyz"), ('xyz', 0))
167 self.assertEqual(self._re.subn("b*", "x", "xyz"), ('xxxyxzx', 4)) 167 self.assertEqual(self._re.subn("b*", "x", "xyz"), ('xxxyxzx', 4))
168 self.assertEqual(self._re.subn("b*", "x", "xyz", 2), ('xxxyz', 2)) 168 self.assertEqual(self._re.subn("b*", "x", "xyz", 2), ('xxxyz', 2))
169 169
170 def test_re_split(self): 170 def test_re_split(self):
171 self.assertEqual(self._re.split(":", ":a:b::c"), ['', 'a', 'b', '', 'c']) 171 self.assertEqual(self._re.split(":", ":a:b::c"), ['', 'a', 'b', '', 'c'])
172 self.assertEqual(self._re.split(":*", ":a:b::c"), ['', 'a', 'b', 'c']) 172 self.assertEqual(self._re.split(":*", ":a:b::c"), ['', 'a', 'b', 'c'])
173 self.assertEqual(self._re.split("(:*)", ":a:b::c"), 173 self.assertEqual(self._re.split("(:*)", ":a:b::c"),
174 ['', ':', 'a', ':', 'b', '::', 'c']) 174 ['', ':', 'a', ':', 'b', '::', 'c'])
175 self.assertEqual(self._re.split("(?::*)", ":a:b::c"), ['', 'a', 'b', 'c']) 175 self.assertEqual(self._re.split("(?::*)", ":a:b::c"), ['', 'a', 'b', 'c'])
176 self.assertEqual(self._re.split("(:)*", ":a:b::c"), 176 self.assertEqual(self._re.split("(:)*", ":a:b::c"),
177 ['', ':', 'a', ':', 'b', ':', 'c']) 177 ['', ':', 'a', ':', 'b', ':', 'c'])
178 self.assertEqual(self._re.split("([b:]+)", ":a:b::c"), 178 self.assertEqual(self._re.split("([b:]+)", ":a:b::c"),
179 ['', ':', 'a', ':b::', 'c']) 179 ['', ':', 'a', ':b::', 'c'])
180 self.assertEqual(self._re.split("(b)|(:+)", ":a:b::c"), 180 self.assertEqual(self._re.split("(b)|(:+)", ":a:b::c"),
181 ['', None, ':', 'a', None, ':', '', 'b', None, '', 181 ['', None, ':', 'a', None, ':', '', 'b', None, '',
182 None, '::', 'c']) 182 None, '::', 'c'])
183 self.assertEqual(self._re.split("(?:b)|(?::+)", ":a:b::c"), 183 self.assertEqual(self._re.split("(?:b)|(?::+)", ":a:b::c"),
184 ['', 'a', '', '', 'c']) 184 ['', 'a', '', '', 'c'])
185 185
186 def test_qualified_re_split(self): 186 def test_qualified_re_split(self):
187 self.assertEqual(self._re.split(":", ":a:b::c", 2), ['', 'a', 'b::c']) 187 self.assertEqual(self._re.split(":", ":a:b::c", 2), ['', 'a', 'b::c'])
188 self.assertEqual(self._re.split(':', 'a:b:c:d', 2), ['a', 'b', 'c:d']) 188 self.assertEqual(self._re.split(':', 'a:b:c:d', 2), ['a', 'b', 'c:d'])
189 self.assertEqual(self._re.split("(:)", ":a:b::c", 2), 189 self.assertEqual(self._re.split("(:)", ":a:b::c", 2),
190 ['', ':', 'a', ':', 'b::c']) 190 ['', ':', 'a', ':', 'b::c'])
191 self.assertEqual(self._re.split("(:*)", ":a:b::c", 2), 191 self.assertEqual(self._re.split("(:*)", ":a:b::c", 2),
192 ['', ':', 'a', ':', 'b::c']) 192 ['', ':', 'a', ':', 'b::c'])
193 193
194 def test_re_findall(self): 194 def test_re_findall(self):
195 self.assertEqual(self._re.findall(":+", "abc"), []) 195 self.assertEqual(self._re.findall(":+", "abc"), [])
196 self.assertEqual(self._re.findall(":+", "a:b::c:::d"), [":", "::", ":::"]) 196 self.assertEqual(self._re.findall(":+", "a:b::c:::d"), [":", "::", ":::"])
197 self.assertEqual(self._re.findall("(:+)", "a:b::c:::d"), [":", "::", ":::"]) 197 self.assertEqual(self._re.findall("(:+)", "a:b::c:::d"), [":", "::", ":::"])
198 self.assertEqual(self._re.findall("(:)(:*)", "a:b::c:::d"), [(":", ""), 198 self.assertEqual(self._re.findall("(:)(:*)", "a:b::c:::d"), [(":", ""),
199 (":", ":"), 199 (":", ":"),
200 (":", "::")]) 200 (":", "::")])
201 201
202 def test_bug_117612(self): 202 def test_bug_117612(self):
203 self.assertEqual(self._re.findall(r"(a|(b))", "aba"), 203 self.assertEqual(self._re.findall(r"(a|(b))", "aba"),
204 [("a", ""),("b", "b"),("a", "")]) 204 [("a", ""),("b", "b"),("a", "")])
205 205
206 def test_re_match(self): 206 def test_re_match(self):
207 self.assertEqual(self._re.match('a', 'a').groups(), ()) 207 self.assertEqual(self._re.match('a', 'a').groups(), ())
208 self.assertEqual(self._re.match('(a)', 'a').groups(), ('a',)) 208 self.assertEqual(self._re.match('(a)', 'a').groups(), ('a',))
209 self.assertEqual(self._re.match(r'(a)', 'a').group(0), 'a') 209 self.assertEqual(self._re.match(r'(a)', 'a').group(0), 'a')
210 self.assertEqual(self._re.match(r'(a)', 'a').group(1), 'a') 210 self.assertEqual(self._re.match(r'(a)', 'a').group(1), 'a')
211 self.assertEqual(self._re.match(r'(a)', 'a').group(1, 1), ('a', 'a')) 211 self.assertEqual(self._re.match(r'(a)', 'a').group(1, 1), ('a', 'a'))
212 212
213 pat = self._re.compile('((a)|(b))(c)?') 213 pat = self._re.compile('((a)|(b))(c)?')
214 self.assertEqual(pat.match('a').groups(), ('a', 'a', None, None)) 214 self.assertEqual(pat.match('a').groups(), ('a', 'a', None, None))
215 self.assertEqual(pat.match('b').groups(), ('b', None, 'b', None)) 215 self.assertEqual(pat.match('b').groups(), ('b', None, 'b', None))
216 self.assertEqual(pat.match('ac').groups(), ('a', 'a', None, 'c')) 216 self.assertEqual(pat.match('ac').groups(), ('a', 'a', None, 'c'))
217 self.assertEqual(pat.match('bc').groups(), ('b', None, 'b', 'c')) 217 self.assertEqual(pat.match('bc').groups(), ('b', None, 'b', 'c'))
218 self.assertEqual(pat.match('bc').groups(""), ('b', "", 'b', 'c')) 218 self.assertEqual(pat.match('bc').groups(""), ('b', "", 'b', 'c'))
219 219
220 # A single group 220 # A single group
221 m = self._re.match('(a)', 'a') 221 m = self._re.match('(a)', 'a')
222 self.assertEqual(m.group(0), 'a') 222 self.assertEqual(m.group(0), 'a')
223 self.assertEqual(m.group(0), 'a') 223 self.assertEqual(m.group(0), 'a')
224 self.assertEqual(m.group(1), 'a') 224 self.assertEqual(m.group(1), 'a')
225 self.assertEqual(m.group(1, 1), ('a', 'a')) 225 self.assertEqual(m.group(1, 1), ('a', 'a'))
226 226
227 pat = self._re.compile('(?:(?P<a1>a)|(?P<b2>b))(?P<c3>c)?') 227 pat = self._re.compile('(?:(?P<a1>a)|(?P<b2>b))(?P<c3>c)?')
228 self.assertEqual(pat.match('a').group(1, 2, 3), ('a', None, None)) 228 self.assertEqual(pat.match('a').group(1, 2, 3), ('a', None, None))
229 self.assertEqual(pat.match('b').group('a1', 'b2', 'c3'), 229 self.assertEqual(pat.match('b').group('a1', 'b2', 'c3'),
230 (None, 'b', None)) 230 (None, 'b', None))
231 self.assertEqual(pat.match('ac').group(1, 'b2', 3), ('a', None, 'c')) 231 self.assertEqual(pat.match('ac').group(1, 'b2', 3), ('a', None, 'c'))
232 232
233 def test_re_groupref_exists(self): 233 def test_re_groupref_exists(self):
234 self.assertEqual(self._re.match('^(\()?([^()]+)(?(1)\))$', '(a)').groups(), 234 self.assertEqual(self._re.match('^(\()?([^()]+)(?(1)\))$', '(a)').groups(),
235 ('(', 'a')) 235 ('(', 'a'))
236 self.assertEqual(self._re.match('^(\()?([^()]+)(?(1)\))$', 'a').groups(), 236 self.assertEqual(self._re.match('^(\()?([^()]+)(?(1)\))$', 'a').groups(),
237 (None, 'a')) 237 (None, 'a'))
238 self.assertEqual(self._re.match('^(\()?([^()]+)(?(1)\))$', 'a)'), None) 238 self.assertEqual(self._re.match('^(\()?([^()]+)(?(1)\))$', 'a)'), None)
239 self.assertEqual(self._re.match('^(\()?([^()]+)(?(1)\))$', '(a'), None) 239 self.assertEqual(self._re.match('^(\()?([^()]+)(?(1)\))$', '(a'), None)
240 self.assertEqual(self._re.match('^(?:(a)|c)((?(1)b|d))$', 'ab').groups(), 240 self.assertEqual(self._re.match('^(?:(a)|c)((?(1)b|d))$', 'ab').groups(),
241 ('a', 'b')) 241 ('a', 'b'))
242 self.assertEqual(self._re.match('^(?:(a)|c)((?(1)b|d))$', 'cd').groups(), 242 self.assertEqual(self._re.match('^(?:(a)|c)((?(1)b|d))$', 'cd').groups(),
243 (None, 'd')) 243 (None, 'd'))
244 self.assertEqual(self._re.match('^(?:(a)|c)((?(1)|d))$', 'cd').groups(), 244 self.assertEqual(self._re.match('^(?:(a)|c)((?(1)|d))$', 'cd').groups(),
245 (None, 'd')) 245 (None, 'd'))
246 self.assertEqual(self._re.match('^(?:(a)|c)((?(1)|d))$', 'a').groups(), 246 self.assertEqual(self._re.match('^(?:(a)|c)((?(1)|d))$', 'a').groups(),
247 ('a', '')) 247 ('a', ''))
248 248
249 # Tests for bug #1177831: exercise groups other than the first group 249 # Tests for bug #1177831: exercise groups other than the first group
250 p = self._re.compile('(?P<g1>a)(?P<g2>b)?((?(g2)c|d))') 250 p = self._re.compile('(?P<g1>a)(?P<g2>b)?((?(g2)c|d))')
251 self.assertEqual(p.match('abc').groups(), 251 self.assertEqual(p.match('abc').groups(),
252 ('a', 'b', 'c')) 252 ('a', 'b', 'c'))
253 self.assertEqual(p.match('ad').groups(), 253 self.assertEqual(p.match('ad').groups(),
254 ('a', None, 'd')) 254 ('a', None, 'd'))
255 self.assertEqual(p.match('abd'), None) 255 self.assertEqual(p.match('abd'), None)
256 self.assertEqual(p.match('ac'), None) 256 self.assertEqual(p.match('ac'), None)
257 257
258 def test_re_groupref(self): 258 def test_re_groupref(self):
259 self.assertEqual(self._re.match(r'^(\|)?([^()]+)\1$', '|a|').groups(), 259 self.assertEqual(self._re.match(r'^(\|)?([^()]+)\1$', '|a|').groups(),
260 ('|', 'a')) 260 ('|', 'a'))
261 self.assertEqual(self._re.match(r'^(\|)?([^()]+)\1?$', 'a').groups(), 261 self.assertEqual(self._re.match(r'^(\|)?([^()]+)\1?$', 'a').groups(),
262 (None, 'a')) 262 (None, 'a'))
263 self.assertEqual(self._re.match(r'^(\|)?([^()]+)\1$', 'a|'), None) 263 self.assertEqual(self._re.match(r'^(\|)?([^()]+)\1$', 'a|'), None)
264 self.assertEqual(self._re.match(r'^(\|)?([^()]+)\1$', '|a'), None) 264 self.assertEqual(self._re.match(r'^(\|)?([^()]+)\1$', '|a'), None)
265 self.assertEqual(self._re.match(r'^(?:(a)|c)(\1)$', 'aa').groups(), 265 self.assertEqual(self._re.match(r'^(?:(a)|c)(\1)$', 'aa').groups(),
266 ('a', 'a')) 266 ('a', 'a'))
267 self.assertEqual(self._re.match(r'^(?:(a)|c)(\1)?$', 'c').groups(), 267 self.assertEqual(self._re.match(r'^(?:(a)|c)(\1)?$', 'c').groups(),
268 (None, None)) 268 (None, None))
269 269
270 def test_groupdict(self): 270 def test_groupdict(self):
271 self.assertEqual(self._re.match('(?P<first>first) (?P<second>second)', 271 self.assertEqual(self._re.match('(?P<first>first) (?P<second>second)',
272 'first second').groupdict(), 272 'first second').groupdict(),
273 {'first':'first', 'second':'second'}) 273 {'first':'first', 'second':'second'})
274 274
275 def test_expand(self): 275 def test_expand(self):
276 self.assertEqual(self._re.match("(?P<first>first) (?P<second>second)", 276 self.assertEqual(self._re.match("(?P<first>first) (?P<second>second)",
277 "first second") 277 "first second")
278 .expand(r"\2 \1 \g<second> \g<first>"), 278 .expand(r"\2 \1 \g<second> \g<first>"),
279 "second first second first") 279 "second first second first")
280 280
281 def test_repeat_minmax(self): 281 def test_repeat_minmax(self):
282 self.assertEqual(self._re.match("^(\w){1}$", "abc"), None) 282 self.assertEqual(self._re.match("^(\w){1}$", "abc"), None)
283 self.assertEqual(self._re.match("^(\w){1}?$", "abc"), None) 283 self.assertEqual(self._re.match("^(\w){1}?$", "abc"), None)
284 self.assertEqual(self._re.match("^(\w){1,2}$", "abc"), None) 284 self.assertEqual(self._re.match("^(\w){1,2}$", "abc"), None)
285 self.assertEqual(self._re.match("^(\w){1,2}?$", "abc"), None) 285 self.assertEqual(self._re.match("^(\w){1,2}?$", "abc"), None)
286 286
287 self.assertEqual(self._re.match("^(\w){3}$", "abc").group(1), "c") 287 self.assertEqual(self._re.match("^(\w){3}$", "abc").group(1), "c")
288 self.assertEqual(self._re.match("^(\w){1,3}$", "abc").group(1), "c") 288 self.assertEqual(self._re.match("^(\w){1,3}$", "abc").group(1), "c")
289 self.assertEqual(self._re.match("^(\w){1,4}$", "abc").group(1), "c") 289 self.assertEqual(self._re.match("^(\w){1,4}$", "abc").group(1), "c")
290 self.assertEqual(self._re.match("^(\w){3,4}?$", "abc").group(1), "c") 290 self.assertEqual(self._re.match("^(\w){3,4}?$", "abc").group(1), "c")
291 self.assertEqual(self._re.match("^(\w){3}?$", "abc").group(1), "c") 291 self.assertEqual(self._re.match("^(\w){3}?$", "abc").group(1), "c")
292 self.assertEqual(self._re.match("^(\w){1,3}?$", "abc").group(1), "c") 292 self.assertEqual(self._re.match("^(\w){1,3}?$", "abc").group(1), "c")
293 self.assertEqual(self._re.match("^(\w){1,4}?$", "abc").group(1), "c") 293 self.assertEqual(self._re.match("^(\w){1,4}?$", "abc").group(1), "c")
294 self.assertEqual(self._re.match("^(\w){3,4}?$", "abc").group(1), "c") 294 self.assertEqual(self._re.match("^(\w){3,4}?$", "abc").group(1), "c")
295 295
296 self.assertEqual(self._re.match("^x{1}$", "xxx"), None) 296 self.assertEqual(self._re.match("^x{1}$", "xxx"), None)
297 self.assertEqual(self._re.match("^x{1}?$", "xxx"), None) 297 self.assertEqual(self._re.match("^x{1}?$", "xxx"), None)
298 self.assertEqual(self._re.match("^x{1,2}$", "xxx"), None) 298 self.assertEqual(self._re.match("^x{1,2}$", "xxx"), None)
299 self.assertEqual(self._re.match("^x{1,2}?$", "xxx"), None) 299 self.assertEqual(self._re.match("^x{1,2}?$", "xxx"), None)
300 300
301 self.assertNotEqual(self._re.match("^x{3}$", "xxx"), None) 301 self.assertNotEqual(self._re.match("^x{3}$", "xxx"), None)
302 self.assertNotEqual(self._re.match("^x{1,3}$", "xxx"), None) 302 self.assertNotEqual(self._re.match("^x{1,3}$", "xxx"), None)
303 self.assertNotEqual(self._re.match("^x{1,4}$", "xxx"), None) 303 self.assertNotEqual(self._re.match("^x{1,4}$", "xxx"), None)
304 self.assertNotEqual(self._re.match("^x{3,4}?$", "xxx"), None) 304 self.assertNotEqual(self._re.match("^x{3,4}?$", "xxx"), None)
305 self.assertNotEqual(self._re.match("^x{3}?$", "xxx"), None) 305 self.assertNotEqual(self._re.match("^x{3}?$", "xxx"), None)
306 self.assertNotEqual(self._re.match("^x{1,3}?$", "xxx"), None) 306 self.assertNotEqual(self._re.match("^x{1,3}?$", "xxx"), None)
307 self.assertNotEqual(self._re.match("^x{1,4}?$", "xxx"), None) 307 self.assertNotEqual(self._re.match("^x{1,4}?$", "xxx"), None)
308 self.assertNotEqual(self._re.match("^x{3,4}?$", "xxx"), None) 308 self.assertNotEqual(self._re.match("^x{3,4}?$", "xxx"), None)
309 309
310 self.assertEqual(self._re.match("^x{}$", "xxx"), None) 310 self.assertEqual(self._re.match("^x{}$", "xxx"), None)
311 self.assertNotEqual(self._re.match("^x{}$", "x{}"), None) 311 self.assertNotEqual(self._re.match("^x{}$", "x{}"), None)
312 312
313 def test_getattr(self): 313 def test_getattr(self):
314 self.assertEqual(self._re.compile("(?i)(a)(b)").pattern, "(?i)(a)(b)") 314 self.assertEqual(self._re.compile("(?i)(a)(b)").pattern, "(?i)(a)(b)")
315 self.assertEqual(self._re.compile("(?i)(a)(b)").flags, self._re.I | self._re.U) 315 # the 63 below masks the expended flags used by RXPY
316 self.assertEqual(self._re.compile("(?i)(a)(b)").flags & 63, self._re.I | self._re.U)
316 self.assertEqual(self._re.compile("(?i)(a)(b)").groups, 2) 317 self.assertEqual(self._re.compile("(?i)(a)(b)").groups, 2)
317 self.assertEqual(self._re.compile("(?i)(a)(b)").groupindex, {}) 318 self.assertEqual(self._re.compile("(?i)(a)(b)").groupindex, {})
318 self.assertEqual(self._re.compile("(?i)(?P<first>a)(?P<other>b)").groupindex, 319 self.assertEqual(self._re.compile("(?i)(?P<first>a)(?P<other>b)").groupindex,
319 {'first': 1, 'other': 2}) 320 {'first': 1, 'other': 2})
320 321
321 self.assertEqual(self._re.match("(a)", "a").pos, 0) 322 self.assertEqual(self._re.match("(a)", "a").pos, 0)
322 self.assertEqual(self._re.match("(a)", "a").endpos, 1) 323 self.assertEqual(self._re.match("(a)", "a").endpos, 1)
323 self.assertEqual(self._re.match("(a)", "a").string, "a") 324 self.assertEqual(self._re.match("(a)", "a").string, "a")
324 self.assertEqual(self._re.match("(a)", "a").regs, ((0, 1), (0, 1))) 325 self.assertEqual(self._re.match("(a)", "a").regs, ((0, 1), (0, 1)))
325 self.assertNotEqual(self._re.match("(a)", "a").re, None) 326 self.assertNotEqual(self._re.match("(a)", "a").re, None)
326 327
327 def test_special_escapes(self): 328 def test_special_escapes(self):
328 self.assertEqual(self._re.search(r"\b(b.)\b", 329 self.assertEqual(self._re.search(r"\b(b.)\b",
329 "abcd abc bcd bx").group(1), "bx") 330 "abcd abc bcd bx").group(1), "bx")
330 self.assertEqual(self._re.search(r"\B(b.)\B", 331 self.assertEqual(self._re.search(r"\B(b.)\B",
331 "abc bcd bc abxd").group(1), "bx") 332 "abc bcd bc abxd").group(1), "bx")
332 # self.assertEqual(self._re.search(r"\b(b.)\b", 333 # self.assertEqual(self._re.search(r"\b(b.)\b",
333 # "abcd abc bcd bx", self._re.LOCALE).group(1), "bx") 334 # "abcd abc bcd bx", self._re.LOCALE).group(1), "bx")
334 # self.assertEqual(self._re.search(r"\B(b.)\B", 335 # self.assertEqual(self._re.search(r"\B(b.)\B",
335 # "abc bcd bc abxd", self._re.LOCALE).group(1), "bx") 336 # "abc bcd bc abxd", self._re.LOCALE).group(1), "bx")
336 self.assertEqual(self._re.search(r"\b(b.)\b", 337 self.assertEqual(self._re.search(r"\b(b.)\b",
337 "abcd abc bcd bx", self._re.UNICODE).group(1), "bx" 338 "abcd abc bcd bx", self._re.UNICODE).group(1), "bx"
338 ) 339 )
339 self.assertEqual(self._re.search(r"\B(b.)\B", 340 self.assertEqual(self._re.search(r"\B(b.)\B",
340 "abc bcd bc abxd", self._re.UNICODE).group(1), "bx" 341 "abc bcd bc abxd", self._re.UNICODE).group(1), "bx"
341 ) 342 )
342 self.assertEqual(self._re.search(r"^abc$", "\nabc\n", self._re.M).group(0), "abc") 343 self.assertEqual(self._re.search(r"^abc$", "\nabc\n", self._re.M).group(0), "abc")
343 self.assertEqual(self._re.search(r"^\Aabc\Z$", "abc", self._re.M).group(0), "abc") 344 self.assertEqual(self._re.search(r"^\Aabc\Z$", "abc", self._re.M).group(0), "abc")
344 self.assertEqual(self._re.search(r"^\Aabc\Z$", "\nabc\n", self._re.M), None) 345 self.assertEqual(self._re.search(r"^\Aabc\Z$", "\nabc\n", self._re.M), None)
345 self.assertEqual(self._re.search(r"\b(b.)\b", 346 self.assertEqual(self._re.search(r"\b(b.)\b",
346 "abcd abc bcd bx").group(1), "bx") 347 "abcd abc bcd bx").group(1), "bx")
347 self.assertEqual(self._re.search(r"\B(b.)\B", 348 self.assertEqual(self._re.search(r"\B(b.)\B",
348 "abc bcd bc abxd").group(1), "bx") 349 "abc bcd bc abxd").group(1), "bx")
349 self.assertEqual(self._re.search(r"^abc$", "\nabc\n", self._re.M).group(0), "abc") 350 self.assertEqual(self._re.search(r"^abc$", "\nabc\n", self._re.M).group(0), "abc")
350 self.assertEqual(self._re.search(r"^\Aabc\Z$", "abc", self._re.M).group(0), "abc") 351 self.assertEqual(self._re.search(r"^\Aabc\Z$", "abc", self._re.M).group(0), "abc")
351 self.assertEqual(self._re.search(r"^\Aabc\Z$", "\nabc\n", self._re.M), None) 352 self.assertEqual(self._re.search(r"^\Aabc\Z$", "\nabc\n", self._re.M), None)
352 self.assertEqual(self._re.search(r"\d\D\w\W\s\S", 353 self.assertEqual(self._re.search(r"\d\D\w\W\s\S",
353 "1aa! a").group(0), "1aa! a") 354 "1aa! a").group(0), "1aa! a")
354 # self.assertEqual(self._re.search(r"\d\D\w\W\s\S", 355 # self.assertEqual(self._re.search(r"\d\D\w\W\s\S",
355 # "1aa! a", self._re.LOCALE).group(0), "1aa! a") 356 # "1aa! a", self._re.LOCALE).group(0), "1aa! a")
356 self.assertEqual(self._re.search(r"\d\D\w\W\s\S", 357 self.assertEqual(self._re.search(r"\d\D\w\W\s\S",
357 "1aa! a", self._re.UNICODE).group(0), "1aa! a") 358 "1aa! a", self._re.UNICODE).group(0), "1aa! a")
358 359
359 def test_bigcharset(self): 360 def test_bigcharset(self):
360 self.assertEqual(self._re.match("([\u2222\u2223])", 361 self.assertEqual(self._re.match("([\u2222\u2223])",
361 "\u2222").group(1), "\u2222") 362 "\u2222").group(1), "\u2222")
362 self.assertEqual(self._re.match("([\u2222\u2223])", 363 self.assertEqual(self._re.match("([\u2222\u2223])",
363 "\u2222", self._re.UNICODE).group(1), "\u2222") 364 "\u2222", self._re.UNICODE).group(1), "\u2222")
364 365
365 def test_anyall(self): 366 def test_anyall(self):
366 self.assertEqual(self._re.match("a.b", "a\nb", self._re.DOT_ALL).group(0), 367 self.assertEqual(self._re.match("a.b", "a\nb", self._re.DOT_ALL).group(0),
367 "a\nb") 368 "a\nb")
368 self.assertEqual(self._re.match("a.*b", "a\n\nb", self._re.DOT_ALL).group(0), 369 self.assertEqual(self._re.match("a.*b", "a\n\nb", self._re.DOT_ALL).group(0),
369 "a\n\nb") 370 "a\n\nb")
370 371
371 def test_non_consuming(self): 372 def test_non_consuming(self):
372 self.assertEqual(self._re.match("(a(?=\s[^a]))", "a b").group(1), "a") 373 self.assertEqual(self._re.match("(a(?=\s[^a]))", "a b").group(1), "a")
373 self.assertEqual(self._re.match("(a(?=\s[^a]*))", "a b").group(1), "a") 374 self.assertEqual(self._re.match("(a(?=\s[^a]*))", "a b").group(1), "a")
374 self.assertEqual(self._re.match("(a(?=\s[abc]))", "a b").group(1), "a") 375 self.assertEqual(self._re.match("(a(?=\s[abc]))", "a b").group(1), "a")
375 self.assertEqual(self._re.match("(a(?=\s[abc]*))", "a bc").group(1), "a") 376 self.assertEqual(self._re.match("(a(?=\s[abc]*))", "a bc").group(1), "a")
376 self.assertEqual(self._re.match(r"(a)(?=\s\1)", "a a").group(1), "a") 377 self.assertEqual(self._re.match(r"(a)(?=\s\1)", "a a").group(1), "a")
377 self.assertEqual(self._re.match(r"(a)(?=\s\1*)", "a aa").group(1), "a") 378 self.assertEqual(self._re.match(r"(a)(?=\s\1*)", "a aa").group(1), "a")
378 self.assertEqual(self._re.match(r"(a)(?=\s(abc|a))", "a a").group(1), "a") 379 self.assertEqual(self._re.match(r"(a)(?=\s(abc|a))", "a a").group(1), "a")
379 380
380 self.assertEqual(self._re.match(r"(a(?!\s[^a]))", "a a").group(1), "a") 381 self.assertEqual(self._re.match(r"(a(?!\s[^a]))", "a a").group(1), "a")
381 self.assertEqual(self._re.match(r"(a(?!\s[abc]))", "a d").group(1), "a") 382 self.assertEqual(self._re.match(r"(a(?!\s[abc]))", "a d").group(1), "a")
382 self.assertEqual(self._re.match(r"(a)(?!\s\1)", "a b").group(1), "a") 383 self.assertEqual(self._re.match(r"(a)(?!\s\1)", "a b").group(1), "a")
383 self.assertEqual(self._re.match(r"(a)(?!\s(abc|a))", "a b").group(1), "a") 384 self.assertEqual(self._re.match(r"(a)(?!\s(abc|a))", "a b").group(1), "a")
384 385
385 def test_ignore_case(self): 386 def test_ignore_case(self):
386 self.assertEqual(self._re.match("abc", "ABC", self._re.I).group(0), "ABC") 387 self.assertEqual(self._re.match("abc", "ABC", self._re.I).group(0), "ABC")
387 self.assertEqual(self._re.match("abc", "ABC", self._re.I).group(0), "ABC") 388 self.assertEqual(self._re.match("abc", "ABC", self._re.I).group(0), "ABC")
388 self.assertEqual(self._re.match(r"(a\s[^a])", "a b", self._re.I).group(1), "a b") 389 self.assertEqual(self._re.match(r"(a\s[^a])", "a b", self._re.I).group(1), "a b")
389 self.assertEqual(self._re.match(r"(a\s[^a]*)", "a bb", self._re.I).group(1), "a bb") 390 self.assertEqual(self._re.match(r"(a\s[^a]*)", "a bb", self._re.I).group(1), "a bb")
390 self.assertEqual(self._re.match(r"(a\s[abc])", "a b", self._re.I).group(1), "a b") 391 self.assertEqual(self._re.match(r"(a\s[abc])", "a b", self._re.I).group(1), "a b")
391 self.assertEqual(self._re.match(r"(a\s[abc]*)", "a bb", self._re.I).group(1), "a bb" 392 self.assertEqual(self._re.match(r"(a\s[abc]*)", "a bb", self._re.I).group(1), "a bb"
392 ) 393 )
393 self.assertEqual(self._re.match(r"((a)\s\2)", "a a", self._re.I).group(1), "a a") 394 self.assertEqual(self._re.match(r"((a)\s\2)", "a a", self._re.I).group(1), "a a")
394 self.assertEqual(self._re.match(r"((a)\s\2*)", "a aa", self._re.I).group(1), "a aa") 395 self.assertEqual(self._re.match(r"((a)\s\2*)", "a aa", self._re.I).group(1), "a aa")
395 self.assertEqual(self._re.match(r"((a)\s(abc|a))", "a a", self._re.I).group(1), "a a") 396 self.assertEqual(self._re.match(r"((a)\s(abc|a))", "a a", self._re.I).group(1), "a a")
396 self.assertEqual(self._re.match(r"((a)\s(abc|a)*)", "a aa", self._re.I).group(1), "a aa") 397 self.assertEqual(self._re.match(r"((a)\s(abc|a)*)", "a aa", self._re.I).group(1), "a aa")
397 398
398 def test_category(self): 399 def test_category(self):
399 self.assertEqual(self._re.match(r"(\s)", " ").group(1), " ") 400 self.assertEqual(self._re.match(r"(\s)", " ").group(1), " ")
400 401
401 def test_getlower(self): 402 def test_getlower(self):
402 import _sre 403 import _sre
403 self.assertEqual(_sre.getlower(ord('A'), 0), ord('a')) 404 self.assertEqual(_sre.getlower(ord('A'), 0), ord('a'))
404 # self.assertEqual(_sre.getlower(ord('A'), self._re.LOCALE), ord('a')) 405 # self.assertEqual(_sre.getlower(ord('A'), self._re.LOCALE), ord('a'))
405 self.assertEqual(_sre.getlower(ord('A'), self._re.UNICODE), ord('a')) 406 self.assertEqual(_sre.getlower(ord('A'), self._re.UNICODE), ord('a'))
406 407
407 self.assertEqual(self._re.match("abc", "ABC", self._re.I).group(0), "ABC") 408 self.assertEqual(self._re.match("abc", "ABC", self._re.I).group(0), "ABC")
408 self.assertEqual(self._re.match("abc", "ABC", self._re.I).group(0), "ABC") 409 self.assertEqual(self._re.match("abc", "ABC", self._re.I).group(0), "ABC")
409 410
410 def test_not_literal(self): 411 def test_not_literal(self):
411 self.assertEqual(self._re.search("\s([^a])", " b").group(1), "b") 412 self.assertEqual(self._re.search("\s([^a])", " b").group(1), "b")
412 self.assertEqual(self._re.search("\s([^a]*)", " bb").group(1), "bb") 413 self.assertEqual(self._re.search("\s([^a]*)", " bb").group(1), "bb")
413 414
414 def test_search_coverage(self): 415 def test_search_coverage(self):
415 self.assertEqual(self._re.search("\s(b)", " b").group(1), "b") 416 self.assertEqual(self._re.search("\s(b)", " b").group(1), "b")
416 self.assertEqual(self._re.search("a\s", "a ").group(0), "a ") 417 self.assertEqual(self._re.search("a\s", "a ").group(0), "a ")
417 418
418 def test_re_escape(self): 419 def test_re_escape(self):
419 p="" 420 p=""
420 self.assertEqual(self._re.escape(p), p) 421 self.assertEqual(self._re.escape(p), p)
421 for i in range(0, 256): 422 for i in range(0, 256):
422 p = p + chr(i) 423 p = p + chr(i)
423 self.assertEqual(self._re.match(self._re.escape(chr(i)), chr(i)) is not None, 424 self.assertEqual(self._re.match(self._re.escape(chr(i)), chr(i)) is not None,
424 True) 425 True)
425 self.assertEqual(self._re.match(self._re.escape(chr(i)), chr(i)).span(), (0,1)) 426 self.assertEqual(self._re.match(self._re.escape(chr(i)), chr(i)).span(), (0,1))
426 427
427 pat=self._re.compile(self._re.escape(p)) 428 pat=self._re.compile(self._re.escape(p))
428 self.assertEqual(pat.match(p) is not None, True) 429 self.assertEqual(pat.match(p) is not None, True)
429 self.assertEqual(pat.match(p).span(), (0,256)) 430 self.assertEqual(pat.match(p).span(), (0,256))
430 431
431 def test_re_escape_byte(self): 432 def test_re_escape_byte(self):
432 p=b"" 433 p=b""
433 self.assertEqual(self._re.escape(p), p) 434 self.assertEqual(self._re.escape(p), p)
434 for i in range(0, 256): 435 for i in range(0, 256):
435 b = bytes([i]) 436 b = bytes([i])
436 p += b 437 p += b
437 self.assertEqual(self._re.match(self._re.escape(b), b) is not None, True) 438 self.assertEqual(self._re.match(self._re.escape(b), b) is not None, True)
438 self.assertEqual(self._re.match(self._re.escape(b), b).span(), (0,1)) 439 self.assertEqual(self._re.match(self._re.escape(b), b).span(), (0,1))
439 440
440 pat=self._re.compile(self._re.escape(p)) 441 pat=self._re.compile(self._re.escape(p))
441 self.assertEqual(pat.match(p) is not None, True) 442 self.assertEqual(pat.match(p) is not None, True)
442 self.assertEqual(pat.match(p).span(), (0,256)) 443 self.assertEqual(pat.match(p).span(), (0,256))
443 444
444 def pickle_test(self, pickle): 445 def pickle_test(self, pickle):
445 oldpat = self._re.compile('a(?:b|(c|e){1,2}?|d)+?(.)') 446 oldpat = self._re.compile('a(?:b|(c|e){1,2}?|d)+?(.)')
446 s = pickle.dumps(oldpat) 447 s = pickle.dumps(oldpat)
447 newpat = pickle.loads(s) 448 newpat = pickle.loads(s)
448 self.assertEqual(oldpat, newpat) 449 self.assertEqual(oldpat, newpat)
449 450
450 def test_constants(self): 451 def test_constants(self):
451 self.assertEqual(self._re.I, self._re.IGNORE_CASE) 452 self.assertEqual(self._re.I, self._re.IGNORE_CASE)
452 # self.assertEqual(self._re.L, self._re.LOCALE) 453 # self.assertEqual(self._re.L, self._re.LOCALE)
453 self.assertEqual(self._re.M, self._re.MULTILINE) 454 self.assertEqual(self._re.M, self._re.MULTILINE)
454 self.assertEqual(self._re.S, self._re.DOT_ALL) 455 self.assertEqual(self._re.S, self._re.DOT_ALL)
455 self.assertEqual(self._re.X, self._re.VERBOSE) 456 self.assertEqual(self._re.X, self._re.VERBOSE)
456 457
457 def test_flags(self): 458 def test_flags(self):
458 # for flag in [self._re.I, self._re.M, self._re.X, self._re.S, self._re.L]: 459 # for flag in [self._re.I, self._re.M, self._re.X, self._re.S, self._re.L]:
459 for flag in [self._re.I, self._re.M, self._re.X, self._re.S]: 460 for flag in [self._re.I, self._re.M, self._re.X, self._re.S]:
460 self.assertNotEqual(self._re.compile('^pattern$', flag), None) 461 self.assertNotEqual(self._re.compile('^pattern$', flag), None)
461 462
462 def test_sre_character_literals(self): 463 def test_sre_character_literals(self):
463 for i in [0, 8, 16, 32, 64, 127, 128, 255]: 464 for i in [0, 8, 16, 32, 64, 127, 128, 255]:
464 self.assertNotEqual(self._re.match(r"\%03o" % i, chr(i)), None) 465 self.assertNotEqual(self._re.match(r"\%03o" % i, chr(i)), None)
465 self.assertNotEqual(self._re.match(r"\%03o0" % i, chr(i)+"0"), None) 466 self.assertNotEqual(self._re.match(r"\%03o0" % i, chr(i)+"0"), None)
466 self.assertNotEqual(self._re.match(r"\%03o8" % i, chr(i)+"8"), None) 467 self.assertNotEqual(self._re.match(r"\%03o8" % i, chr(i)+"8"), None)
467 self.assertNotEqual(self._re.match(r"\x%02x" % i, chr(i)), None) 468 self.assertNotEqual(self._re.match(r"\x%02x" % i, chr(i)), None)
468 self.assertNotEqual(self._re.match(r"\x%02x0" % i, chr(i)+"0"), None) 469 self.assertNotEqual(self._re.match(r"\x%02x0" % i, chr(i)+"0"), None)
469 self.assertNotEqual(self._re.match(r"\x%02xz" % i, chr(i)+"z"), None) 470 self.assertNotEqual(self._re.match(r"\x%02xz" % i, chr(i)+"z"), None)
470 self.assertRaises(self._re.error, self._re.match, "\911", "") 471 self.assertRaises(self._re.error, self._re.match, "\911", "")
471 472
472 def test_sre_character_class_literals(self): 473 def test_sre_character_class_literals(self):
473 for i in [0, 8, 16, 32, 64, 127, 128, 255]: 474 for i in [0, 8, 16, 32, 64, 127, 128, 255]:
474 self.assertNotEqual(self._re.match(r"[\%03o]" % i, chr(i)), None) 475 self.assertNotEqual(self._re.match(r"[\%03o]" % i, chr(i)), None)
475 self.assertNotEqual(self._re.match(r"[\%03o0]" % i, chr(i)), None) 476 self.assertNotEqual(self._re.match(r"[\%03o0]" % i, chr(i)), None)
476 self.assertNotEqual(self._re.match(r"[\%03o8]" % i, chr(i)), None) 477 self.assertNotEqual(self._re.match(r"[\%03o8]" % i, chr(i)), None)
477 self.assertNotEqual(self._re.match(r"[\x%02x]" % i, chr(i)), None) 478 self.assertNotEqual(self._re.match(r"[\x%02x]" % i, chr(i)), None)
478 self.assertNotEqual(self._re.match(r"[\x%02x0]" % i, chr(i)), None) 479 self.assertNotEqual(self._re.match(r"[\x%02x0]" % i, chr(i)), None)
479 self.assertNotEqual(self._re.match(r"[\x%02xz]" % i, chr(i)), None) 480 self.assertNotEqual(self._re.match(r"[\x%02xz]" % i, chr(i)), None)
480 self.assertRaises(self._re.error, self._re.match, "[\911]", "") 481 self.assertRaises(self._re.error, self._re.match, "[\911]", "")
481 482
482 def test_bug_113254(self): 483 def test_bug_113254(self):
483 self.assertEqual(self._re.match(r'(a)|(b)', 'b').start(1), -1) 484 self.assertEqual(self._re.match(r'(a)|(b)', 'b').start(1), -1)
484 self.assertEqual(self._re.match(r'(a)|(b)', 'b').end(1), -1) 485 self.assertEqual(self._re.match(r'(a)|(b)', 'b').end(1), -1)
485 self.assertEqual(self._re.match(r'(a)|(b)', 'b').span(1), (-1, -1)) 486 self.assertEqual(self._re.match(r'(a)|(b)', 'b').span(1), (-1, -1))
486 487
487 def test_bug_527371(self): 488 def test_bug_527371(self):
488 # bug described in patches 527371/672491 489 # bug described in patches 527371/672491
489 self.assertEqual(self._re.match(r'(a)?a','a').lastindex, None) 490 self.assertEqual(self._re.match(r'(a)?a','a').lastindex, None)
490 self.assertEqual(self._re.match(r'(a)(b)?b','ab').lastindex, 1) 491 self.assertEqual(self._re.match(r'(a)(b)?b','ab').lastindex, 1)
491 self.assertEqual(self._re.match(r'(?P<a>a)(?P<b>b)?b','ab').lastgroup, 'a') 492 self.assertEqual(self._re.match(r'(?P<a>a)(?P<b>b)?b','ab').lastgroup, 'a')
492 self.assertEqual(self._re.match("(?P<a>a(b))", "ab").lastgroup, 'a') 493 self.assertEqual(self._re.match("(?P<a>a(b))", "ab").lastgroup, 'a')
493 self.assertEqual(self._re.match("((a))", "a").lastindex, 1) 494 self.assertEqual(self._re.match("((a))", "a").lastindex, 1)
494 495
495 def test_bug_545855(self): 496 def test_bug_545855(self):
496 # bug 545855 -- This pattern failed to cause a compile error as it 497 # bug 545855 -- This pattern failed to cause a compile error as it
497 # should, instead provoking a TypeError. 498 # should, instead provoking a TypeError.
498 self.assertRaises(self._re.error, self._re.compile, 'foo[a-') 499 self.assertRaises(self._re.error, self._re.compile, 'foo[a-')
499 500
500 def test_bug_418626(self): 501 def test_bug_418626(self):
501 # bugs 418626 at al. -- Testing Greg Chapman's addition of op code 502 # bugs 418626 at al. -- Testing Greg Chapman's addition of op code
502 # SRE_OP_MIN_REPEAT_ONE for eliminating recursion on simple uses of 503 # SRE_OP_MIN_REPEAT_ONE for eliminating recursion on simple uses of
503 # pattern '*?' on a long string. 504 # pattern '*?' on a long string.
504 self.assertEqual(self._re.match('.*?c', 10000*'ab'+'cd').end(0), 20001) 505 self.assertEqual(self._re.match('.*?c', 10000*'ab'+'cd').end(0), 20001)
505 self.assertEqual(self._re.match('.*?cd', 5000*'ab'+'c'+5000*'ab'+'cde').end(0), 506 self.assertEqual(self._re.match('.*?cd', 5000*'ab'+'c'+5000*'ab'+'cde').end(0),
506 20003) 507 20003)
507 self.assertEqual(self._re.match('.*?cd', 20000*'abc'+'de').end(0), 60001) 508 self.assertEqual(self._re.match('.*?cd', 20000*'abc'+'de').end(0), 60001)
508 # non-simple '*?' still used to hit the recursion limit, before the 509 # non-simple '*?' still used to hit the recursion limit, before the
509 # non-recursive scheme was implemented. 510 # non-recursive scheme was implemented.
510 self.assertEqual(self._re.search('(a|b)*?c', 10000*'ab'+'cd').end(0), 20001) 511 self.assertEqual(self._re.search('(a|b)*?c', 10000*'ab'+'cd').end(0), 20001)
511 512
512 def test_bug_612074(self): 513 def test_bug_612074(self):
513 pat="["+self._re.escape("\u2039")+"]" 514 pat="["+self._re.escape("\u2039")+"]"
514 self.assertEqual(self._re.compile(pat) and 1, 1) 515 self.assertEqual(self._re.compile(pat) and 1, 1)
515 516
516 def test_scanner(self): 517 def test_scanner(self):
517 def s_ident(scanner, token): return token 518 def s_ident(scanner, token): return token
518 def s_operator(scanner, token): return "op%s" % token 519 def s_operator(scanner, token): return "op%s" % token
519 def s_float(scanner, token): return float(token) 520 def s_float(scanner, token): return float(token)
520 def s_int(scanner, token): return int(token) 521 def s_int(scanner, token): return int(token)
521 522
522 scanner = self._re.Scanner([ 523 scanner = self._re.Scanner([
523 (r"[a-zA-Z_]\w*", s_ident), 524 (r"[a-zA-Z_]\w*", s_ident),
524 (r"\d+\.\d*", s_float), 525 (r"\d+\.\d*", s_float),
525 (r"\d+", s_int), 526 (r"\d+", s_int),
526 (r"=|\+|-|\*|/", s_operator), 527 (r"=|\+|-|\*|/", s_operator),
527 (r"\s+", None), 528 (r"\s+", None),
528 ]) 529 ])
529 530
530 self.assertNotEqual(scanner.scanner.scanner("").pattern, None) 531 self.assertNotEqual(scanner.scanner.scanner("").pattern, None)
531 532
532 self.assertEqual(scanner.scan("sum = 3*foo + 312.50 + bar"), 533 self.assertEqual(scanner.scan("sum = 3*foo + 312.50 + bar"),
533 (['sum', 'op=', 3, 'op*', 'foo', 'op+', 312.5, 534 (['sum', 'op=', 3, 'op*', 'foo', 'op+', 312.5,
534 'op+', 'bar'], '')) 535 'op+', 'bar'], ''))
535 536
536 def test_bug_448951(self): 537 def test_bug_448951(self):
537 # bug 448951 (similar to 429357, but with single char match) 538 # bug 448951 (similar to 429357, but with single char match)
538 # (Also test greedy matches.) 539 # (Also test greedy matches.)
539 for op in '','?','*': 540 for op in '','?','*':
540 self.assertEqual(self._re.match(r'((.%s):)?z'%op, 'z').groups(), 541 self.assertEqual(self._re.match(r'((.%s):)?z'%op, 'z').groups(),
541 (None, None)) 542 (None, None))
542 self.assertEqual(self._re.match(r'((.%s):)?z'%op, 'a:z').groups(), 543 self.assertEqual(self._re.match(r'((.%s):)?z'%op, 'a:z').groups(),
543 ('a:', 'a')) 544 ('a:', 'a'))
544 545
545 def test_bug_725106(self): 546 def test_bug_725106(self):
546 # capturing groups in alternatives in repeats 547 # capturing groups in alternatives in repeats
547 self.assertEqual(self._re.match('^((a)|b)*', 'abc').groups(), 548 self.assertEqual(self._re.match('^((a)|b)*', 'abc').groups(),
548 ('b', 'a')) 549 ('b', 'a'))
549 self.assertEqual(self._re.match('^(([ab])|c)*', 'abc').groups(), 550 self.assertEqual(self._re.match('^(([ab])|c)*', 'abc').groups(),
550 ('c', 'b')) 551 ('c', 'b'))
551 self.assertEqual(self._re.match('^((d)|[ab])*', 'abc').groups(), 552 self.assertEqual(self._re.match('^((d)|[ab])*', 'abc').groups(),
552 ('b', None)) 553 ('b', None))
553 self.assertEqual(self._re.match('^((a)c|[ab])*', 'abc').groups(), 554 self.assertEqual(self._re.match('^((a)c|[ab])*', 'abc').groups(),
554 ('b', None)) 555 ('b', None))
555 self.assertEqual(self._re.match('^((a)|b)*?c', 'abc').groups(), 556 self.assertEqual(self._re.match('^((a)|b)*?c', 'abc').groups(),
556 ('b', 'a')) 557 ('b', 'a'))
557 self.assertEqual(self._re.match('^(([ab])|c)*?d', 'abcd').groups(), 558 self.assertEqual(self._re.match('^(([ab])|c)*?d', 'abcd').groups(),
558 ('c', 'b')) 559 ('c', 'b'))
559 self.assertEqual(self._re.match('^((d)|[ab])*?c', 'abc').groups(), 560 self.assertEqual(self._re.match('^((d)|[ab])*?c', 'abc').groups(),
560 ('b', None)) 561 ('b', None))
561 self.assertEqual(self._re.match('^((a)c|[ab])*?c', 'abc').groups(), 562 self.assertEqual(self._re.match('^((a)c|[ab])*?c', 'abc').groups(),
562 ('b', None)) 563 ('b', None))
563 564
564 def test_bug_725149(self): 565 def test_bug_725149(self):
565 # mark_stack_base restoring before restoring marks 566 # mark_stack_base restoring before restoring marks
566 self.assertEqual(self._re.match('(a)(?:(?=(b)*)c)*', 'abb').groups(), 567 self.assertEqual(self._re.match('(a)(?:(?=(b)*)c)*', 'abb').groups(),
567 ('a', None)) 568 ('a', None))
568 self.assertEqual(self._re.match('(?_e)(a)((?!(b)*))*', 'abb').groups(), 569 self.assertEqual(self._re.match('(?_e)(a)((?!(b)*))*', 'abb').groups(),
569 ('a', None, None)) 570 ('a', None, None))
570 571
571 def test_bug_764548(self): 572 def test_bug_764548(self):
572 # bug 764548, self._re.compile() barfs on str/unicode subclasses 573 # bug 764548, self._re.compile() barfs on str/unicode subclasses
573 class my_unicode(str): pass 574 class my_unicode(str): pass
574 pat = self._re.compile(my_unicode("abc")) 575 pat = self._re.compile(my_unicode("abc"))
575 self.assertEqual(pat.match("xyz"), None) 576 self.assertEqual(pat.match("xyz"), None)
576 577
577 def test_finditer(self): 578 def test_finditer(self):
578 iter = self._re.finditer(r":+", "a:b::c:::d") 579 iter = self._re.finditer(r":+", "a:b::c:::d")
579 self.assertEqual([item.group(0) for item in iter], 580 self.assertEqual([item.group(0) for item in iter],
580 [":", "::", ":::"]) 581 [":", "::", ":::"])
581 582
582 def test_bug_926075(self): 583 def test_bug_926075(self):
583 self.assertTrue(self._re.compile('bug_926075') is not 584 self.assertTrue(self._re.compile('bug_926075') is not
584 self._re.compile(b'bug_926075')) 585 self._re.compile(b'bug_926075'))
585 586
586 def test_bug_931848(self): 587 def test_bug_931848(self):
587 pattern = eval('"[\u002E\u3002\uFF0E\uFF61]"') 588 pattern = eval('"[\u002E\u3002\uFF0E\uFF61]"')
588 self.assertEqual(self._re.compile(pattern).split("a.b.c"), 589 self.assertEqual(self._re.compile(pattern).split("a.b.c"),
589 ['a','b','c']) 590 ['a','b','c'])
590 591
591 def test_bug_581080(self): 592 def test_bug_581080(self):
592 iter = self._re.finditer(r"\s", "a b") 593 iter = self._re.finditer(r"\s", "a b")
593 self.assertEqual(next(iter).span(), (1,2)) 594 self.assertEqual(next(iter).span(), (1,2))
594 self.assertRaises(StopIteration, next, iter) 595 self.assertRaises(StopIteration, next, iter)
595 596
596 scanner = self._re.compile(r"\s").scanner("a b") 597 scanner = self._re.compile(r"\s").scanner("a b")
597 self.assertEqual(scanner.search().span(), (1, 2)) 598 self.assertEqual(scanner.search().span(), (1, 2))
598 self.assertEqual(scanner.search(), None) 599 self.assertEqual(scanner.search(), None)
599 600
600 def test_bug_817234(self): 601 def test_bug_817234(self):
601 iter = self._re.finditer(r".*", "asdf") 602 iter = self._re.finditer(r".*", "asdf")
602 self.assertEqual(next(iter).span(), (0, 4)) 603 self.assertEqual(next(iter).span(), (0, 4))
603 self.assertEqual(next(iter).span(), (4, 4)) 604 self.assertEqual(next(iter).span(), (4, 4))
604 self.assertRaises(StopIteration, next, iter) 605 self.assertRaises(StopIteration, next, iter)
605 606
606 def test_bug_6561(self): 607 def test_bug_6561(self):
607 # '\d' should match characters in Unicode category 'Nd' 608 # '\d' should match characters in Unicode category 'Nd'
608 # (Number, Decimal Digit), but not those in 'Nl' (Number, 609 # (Number, Decimal Digit), but not those in 'Nl' (Number,
609 # Letter) or 'No' (Number, Other). 610 # Letter) or 'No' (Number, Other).
610 decimal_digits = [ 611 decimal_digits = [
611 '\u0037', # '\N{DIGIT SEVEN}', category 'Nd' 612 '\u0037', # '\N{DIGIT SEVEN}', category 'Nd'
612 '\u0e58', # '\N{THAI DIGIT SIX}', category 'Nd' 613 '\u0e58', # '\N{THAI DIGIT SIX}', category 'Nd'
613 '\uff10', # '\N{FULLWIDTH DIGIT ZERO}', category 'Nd' 614 '\uff10', # '\N{FULLWIDTH DIGIT ZERO}', category 'Nd'
614 ] 615 ]
615 for x in decimal_digits: 616 for x in decimal_digits:
616 self.assertEqual(self._re.match('^\d$', x).group(0), x) 617 self.assertEqual(self._re.match('^\d$', x).group(0), x)
617 618
618 not_decimal_digits = [ 619 not_decimal_digits = [
619 '\u2165', # '\N{ROMAN NUMERAL SIX}', category 'Nl' 620 '\u2165', # '\N{ROMAN NUMERAL SIX}', category 'Nl'
620 '\u3039', # '\N{HANGZHOU NUMERAL TWENTY}', category 'Nl' 621 '\u3039', # '\N{HANGZHOU NUMERAL TWENTY}', category 'Nl'
621 '\u2082', # '\N{SUBSCRIPT TWO}', category 'No' 622 '\u2082', # '\N{SUBSCRIPT TWO}', category 'No'
622 '\u32b4', # '\N{CIRCLED NUMBER THIRTY NINE}', category 'No' 623 '\u32b4', # '\N{CIRCLED NUMBER THIRTY NINE}', category 'No'
623 ] 624 ]
624 for x in not_decimal_digits: 625 for x in not_decimal_digits:
625 self.assertIsNone(self._re.match('^\d$', x)) 626 self.assertIsNone(self._re.match('^\d$', x))
626 627
627 def test_empty_array(self): 628 def test_empty_array(self):
628 # SF buf 1647541 629 # SF buf 1647541
629 import array 630 import array
630 for typecode in 'bBuhHiIlLfd': 631 for typecode in 'bBuhHiIlLfd':
631 a = array.array(typecode) 632 a = array.array(typecode)
632 self.assertEqual(self._re.compile(b"bla").match(a), None) 633 self.assertEqual(self._re.compile(b"bla").match(a), None)
633 self.assertEqual(self._re.compile(b"").match(a).groups(), ()) 634 self.assertEqual(self._re.compile(b"").match(a).groups(), ())
634 635
635 def test_inline_flags(self): 636 def test_inline_flags(self):
636 # Bug #1700 637 # Bug #1700
637 upper_char = chr(0x1ea0) # Latin Capital Letter A with Dot Bellow 638 upper_char = chr(0x1ea0) # Latin Capital Letter A with Dot Bellow
638 lower_char = chr(0x1ea1) # Latin Small Letter A with Dot Bellow 639 lower_char = chr(0x1ea1) # Latin Small Letter A with Dot Bellow
639 640
640 p = self._re.compile(upper_char, self._re.I | self._re.U) 641 p = self._re.compile(upper_char, self._re.I | self._re.U)
641 q = p.match(lower_char) 642 q = p.match(lower_char)
642 self.assertNotEqual(q, None) 643 self.assertNotEqual(q, None)
643 644
644 p = self._re.compile(lower_char, self._re.I | self._re.U) 645 p = self._re.compile(lower_char, self._re.I | self._re.U)
645 q = p.match(upper_char) 646 q = p.match(upper_char)
646 self.assertNotEqual(q, None) 647 self.assertNotEqual(q, None)
647 648
648 p = self._re.compile('(?i)' + upper_char, self._re.U) 649 p = self._re.compile('(?i)' + upper_char, self._re.U)
649 q = p.match(lower_char) 650 q = p.match(lower_char)
650 self.assertNotEqual(q, None) 651 self.assertNotEqual(q, None)
651 652
652 p = self._re.compile('(?i)' + lower_char, self._re.U) 653 p = self._re.compile('(?i)' + lower_char, self._re.U)
653 q = p.match(upper_char) 654 q = p.match(upper_char)
654 self.assertNotEqual(q, None) 655 self.assertNotEqual(q, None)
655 656
656 p = self._re.compile('(?iu)' + upper_char) 657 p = self._re.compile('(?iu)' + upper_char)
657 q = p.match(lower_char) 658 q = p.match(lower_char)
658 self.assertNotEqual(q, None) 659 self.assertNotEqual(q, None)
659 660
660 p = self._re.compile('(?iu)' + lower_char) 661 p = self._re.compile('(?iu)' + lower_char)
661 q = p.match(upper_char) 662 q = p.match(upper_char)
662 self.assertNotEqual(q, None) 663 self.assertNotEqual(q, None)
663 664
664 def test_dollar_matches_twice(self): 665 def test_dollar_matches_twice(self):
665 "$ matches the end of string, and just before the terminating \n" 666 "$ matches the end of string, and just before the terminating \n"
666 pattern = self._re.compile('$') 667 pattern = self._re.compile('$')
667 self.assertEqual(pattern.sub('#', 'a\nb\n'), 'a\nb#\n#') 668 self.assertEqual(pattern.sub('#', 'a\nb\n'), 'a\nb#\n#')
668 self.assertEqual(pattern.sub('#', 'a\nb\nc'), 'a\nb\nc#') 669 self.assertEqual(pattern.sub('#', 'a\nb\nc'), 'a\nb\nc#')
669 self.assertEqual(pattern.sub('#', '\n'), '#\n#') 670 self.assertEqual(pattern.sub('#', '\n'), '#\n#')
670 671
671 pattern = self._re.compile('$', self._re.MULTILINE) 672 pattern = self._re.compile('$', self._re.MULTILINE)
672 self.assertEqual(pattern.sub('#', 'a\nb\n' ), 'a#\nb#\n#' ) 673 self.assertEqual(pattern.sub('#', 'a\nb\n' ), 'a#\nb#\n#' )
673 self.assertEqual(pattern.sub('#', 'a\nb\nc'), 'a#\nb#\nc#') 674 self.assertEqual(pattern.sub('#', 'a\nb\nc'), 'a#\nb#\nc#')
674 self.assertEqual(pattern.sub('#', '\n'), '#\n#') 675 self.assertEqual(pattern.sub('#', '\n'), '#\n#')
675 676
676 def test_bytes_str_mixing(self): 677 def test_bytes_str_mixing(self):
677 # Mixing str and bytes is disallowed 678 # Mixing str and bytes is disallowed
678 pat = self._re.compile('.') 679 pat = self._re.compile('.')
679 bpat = self._re.compile(b'.') 680 bpat = self._re.compile(b'.')
680 self.assertRaises(TypeError, pat.match, b'b') 681 self.assertRaises(TypeError, pat.match, b'b')
681 self.assertRaises(TypeError, bpat.match, 'b') 682 self.assertRaises(TypeError, bpat.match, 'b')
682 self.assertRaises(TypeError, pat.sub, b'b', 'c') 683 self.assertRaises(TypeError, pat.sub, b'b', 'c')
683 self.assertRaises(TypeError, pat.sub, 'b', b'c') 684 self.assertRaises(TypeError, pat.sub, 'b', b'c')
684 self.assertRaises(TypeError, pat.sub, b'b', b'c') 685 self.assertRaises(TypeError, pat.sub, b'b', b'c')
685 self.assertRaises(TypeError, bpat.sub, b'b', 'c') 686 self.assertRaises(TypeError, bpat.sub, b'b', 'c')
686 self.assertRaises(TypeError, bpat.sub, 'b', b'c') 687 self.assertRaises(TypeError, bpat.sub, 'b', b'c')
687 self.assertRaises(TypeError, bpat.sub, 'b', 'c') 688 self.assertRaises(TypeError, bpat.sub, 'b', 'c')
688 689
689 def test_ascii_and_unicode_flag(self): 690 def test_ascii_and_unicode_flag(self):
690 # String patterns 691 # String patterns
691 for flags in (0, self._re.UNICODE): 692 for flags in (0, self._re.UNICODE):
692 pat = self._re.compile('\xc0', flags | self._re.IGNORE_CASE) 693 pat = self._re.compile('\xc0', flags | self._re.IGNORE_CASE)
693 self.assertNotEqual(pat.match('\xe0'), None) 694 self.assertNotEqual(pat.match('\xe0'), None)
694 pat = self._re.compile('\w', flags) 695 pat = self._re.compile('\w', flags)
695 self.assertNotEqual(pat.match('\xe0'), None) 696 self.assertNotEqual(pat.match('\xe0'), None)
696 pat = self._re.compile('\xc0', self._re.ASCII | self._re.IGNORE_CASE) 697 pat = self._re.compile('\xc0', self._re.ASCII | self._re.IGNORE_CASE)
697 self.assertEqual(pat.match('\xe0'), None) 698 self.assertEqual(pat.match('\xe0'), None)
698 pat = self._re.compile('(?a)\xc0', self._re.IGNORE_CASE) 699 pat = self._re.compile('(?a)\xc0', self._re.IGNORE_CASE)
699 self.assertEqual(pat.match('\xe0'), None) 700 self.assertEqual(pat.match('\xe0'), None)
700 pat = self._re.compile('\w', self._re.ASCII) 701 pat = self._re.compile('\w', self._re.ASCII)
701 self.assertEqual(pat.match('\xe0'), None) 702 self.assertEqual(pat.match('\xe0'), None)
702 pat = self._re.compile('(?a)\w') 703 pat = self._re.compile('(?a)\w')
703 self.assertEqual(pat.match('\xe0'), None) 704 self.assertEqual(pat.match('\xe0'), None)
704 # Bytes patterns 705 # Bytes patterns
705 for flags in (0, self._re.ASCII): 706 for flags in (0, self._re.ASCII):
706 pat = self._re.compile(b'\xc0', self._re.IGNORE_CASE) 707 pat = self._re.compile(b'\xc0', self._re.IGNORE_CASE)
707 self.assertEqual(pat.match(b'\xe0'), None) 708 self.assertEqual(pat.match(b'\xe0'), None)
708 pat = self._re.compile(b'\w') 709 pat = self._re.compile(b'\w')
709 self.assertEqual(pat.match(b'\xe0'), None) 710 self.assertEqual(pat.match(b'\xe0'), None)
710 # Incompatibilities 711 # Incompatibilities
711 self.assertRaises(ValueError, self._re.compile, b'\w', self._re.UNICODE) 712 self.assertRaises(ValueError, self._re.compile, b'\w', self._re.UNICODE)
712 self.assertRaises(ValueError, self._re.compile, b'(?u)\w') 713 self.assertRaises(ValueError, self._re.compile, b'(?u)\w')
713 self.assertRaises(ValueError, self._re.compile, '\w', self._re.UNICODE | self._re.ASCII) 714 self.assertRaises(ValueError, self._re.compile, '\w', self._re.UNICODE | self._re.ASCII)
714 self.assertRaises(ValueError, self._re.compile, '(?u)\w', self._re.ASCII) 715 self.assertRaises(ValueError, self._re.compile, '(?u)\w', self._re.ASCII)
715 self.assertRaises(ValueError, self._re.compile, '(?a)\w', self._re.UNICODE) 716 self.assertRaises(ValueError, self._re.compile, '(?a)\w', self._re.UNICODE)
716 self.assertRaises(ValueError, self._re.compile, '(?au)\w') 717 self.assertRaises(ValueError, self._re.compile, '(?au)\w')
717 718
718 def run_re_tests(): 719 def run_re_tests():
719 from test.re_tests import tests, SUCCEED, FAIL, SYNTAX_ERROR 720 from test.re_tests import tests, SUCCEED, FAIL, SYNTAX_ERROR
720 if verbose: 721 if verbose:
721 print('Running re_tests test suite') 722 print('Running re_tests test suite')
722 else: 723 else:
723 # To save time, only run the first and last 10 tests 724 # To save time, only run the first and last 10 tests
724 #tests = tests[:10] + tests[-10:] 725 #tests = tests[:10] + tests[-10:]
725 pass 726 pass
726 727
727 for t in tests: 728 for t in tests:
728 sys.stdout.flush() 729 sys.stdout.flush()
729 pattern = s = outcome = repl = expected = None 730 pattern = s = outcome = repl = expected = None
730 if len(t) == 5: 731 if len(t) == 5:
731 pattern, s, outcome, repl, expected = t 732 pattern, s, outcome, repl, expected = t
732 elif len(t) == 3: 733 elif len(t) == 3:
733 pattern, s, outcome = t 734 pattern, s, outcome = t
734 else: 735 else:
735 raise ValueError('Test tuples should have 3 or 5 fields', t) 736 raise ValueError('Test tuples should have 3 or 5 fields', t)
736 737
737 try: 738 try:
738 obj = self._re.compile(pattern) 739 obj = self._re.compile(pattern)
739 except self._re.error: 740 except self._re.error:
740 if outcome == SYNTAX_ERROR: pass # Expected a syntax error 741 if outcome == SYNTAX_ERROR: pass # Expected a syntax error
741 else: 742 else:
742 print('=== Syntax error:', t) 743 print('=== Syntax error:', t)
743 except KeyboardInterrupt: raise KeyboardInterrupt 744 except KeyboardInterrupt: raise KeyboardInterrupt
744 except: 745 except:
745 print('*** Unexpected error ***', t) 746 print('*** Unexpected error ***', t)
746 if verbose: 747 if verbose:
747 traceback.print_exc(file=sys.stdout) 748 traceback.print_exc(file=sys.stdout)
748 else: 749 else:
749 try: 750 try:
750 result = obj.search(s) 751 result = obj.search(s)
751 except self._re.error as msg: 752 except self._re.error as msg:
752 print('=== Unexpected exception', t, repr(msg)) 753 print('=== Unexpected exception', t, repr(msg))
753 if outcome == SYNTAX_ERROR: 754 if outcome == SYNTAX_ERROR:
754 # This should have been a syntax error; forget it. 755 # This should have been a syntax error; forget it.
755 pass 756 pass
756 elif outcome == FAIL: 757 elif outcome == FAIL:
757 if result is None: pass # No match, as expected 758 if result is None: pass # No match, as expected
758 else: print('=== Succeeded incorrectly', t) 759 else: print('=== Succeeded incorrectly', t)
759 elif outcome == SUCCEED: 760 elif outcome == SUCCEED:
760 if result is not None: 761 if result is not None:
761 # Matched, as expected, so now we compute the 762 # Matched, as expected, so now we compute the
762 # result string and compare it to our expected result. 763 # result string and compare it to our expected result.
763 start, end = result.span(0) 764 start, end = result.span(0)
764 vardict={'found': result.group(0), 765 vardict={'found': result.group(0),
765 'groups': result.group(), 766 'groups': result.group(),
766 'flags': result.self._re.flags} 767 'flags': result.self._re.flags}
767 for i in range(1, 100): 768 for i in range(1, 100):
768 try: 769 try:
769 gi = result.group(i) 770 gi = result.group(i)
770 # Special hack because else the string concat fails: 771 # Special hack because else the string concat fails:
771 if gi is None: 772 if gi is None:
772 gi = "None" 773 gi = "None"
773 except IndexError: 774 except IndexError:
774 gi = "Error" 775 gi = "Error"
775 vardict['g%d' % i] = gi 776 vardict['g%d' % i] = gi
776 for i in result.self._re.groupindex.keys(): 777 for i in result.self._re.groupindex.keys():
777 try: 778 try:
778 gi = result.group(i) 779 gi = result.group(i)
779 if gi is None: 780 if gi is None:
780 gi = "None" 781 gi = "None"
781 except IndexError: 782 except IndexError:
782 gi = "Error" 783 gi = "Error"
783 vardict[i] = gi 784 vardict[i] = gi
784 repl = eval(repl, vardict) 785 repl = eval(repl, vardict)
785 if repl != expected: 786 if repl != expected:
786 print('=== grouping error', t, end=' ') 787 print('=== grouping error', t, end=' ')
787 print(repr(repl) + ' should be ' + repr(expected)) 788 print(repr(repl) + ' should be ' + repr(expected))
788 else: 789 else:
789 print('=== Failed incorrectly', t) 790 print('=== Failed incorrectly', t)
790 791
791 # Try the match with both pattern and string converted to 792 # Try the match with both pattern and string converted to
792 # bytes, and check that it still succeeds. 793 # bytes, and check that it still succeeds.
793 try: 794 try:
794 bpat = bytes(pattern, "ascii") 795 bpat = bytes(pattern, "ascii")
795 bs = bytes(s, "ascii") 796 bs = bytes(s, "ascii")
796 except UnicodeEncodeError: 797 except UnicodeEncodeError:
797 # skip non-ascii tests 798 # skip non-ascii tests
798 pass 799 pass
799 else: 800 else:
800 try: 801 try:
801 bpat = self._re.compile(bpat) 802 bpat = self._re.compile(bpat)
802 except Exception: 803 except Exception:
803 print('=== Fails on bytes pattern compile', t) 804 print('=== Fails on bytes pattern compile', t)
804 if verbose: 805 if verbose:
805 traceback.print_exc(file=sys.stdout) 806 traceback.print_exc(file=sys.stdout)
806 else: 807 else:
807 bytes_result = bpat.search(bs) 808 bytes_result = bpat.search(bs)
808 if bytes_result is None: 809 if bytes_result is None:
809 print('=== Fails on bytes pattern match', t) 810 print('=== Fails on bytes pattern match', t)
810 811
811 # Try the match with the search area limited to the extent 812 # Try the match with the search area limited to the extent
812 # of the match and see if it still succeeds. \B will 813 # of the match and see if it still succeeds. \B will
813 # break (because it won't match at the end or start of a 814 # break (because it won't match at the end or start of a
814 # string), so we'll ignore patterns that feature it. 815 # string), so we'll ignore patterns that feature it.
815 816
816 if pattern[:2] != '\\B' and pattern[-2:] != '\\B' \ 817 if pattern[:2] != '\\B' and pattern[-2:] != '\\B' \
817 and result is not None: 818 and result is not None:
818 obj = self._re.compile(pattern) 819 obj = self._re.compile(pattern)
819 result = obj.search(s, result.start(0), result.end(0) + 1) 820 result = obj.search(s, result.start(0), result.end(0) + 1)
820 if result is None: 821 if result is None:
821 print('=== Failed on range-limited match', t) 822 print('=== Failed on range-limited match', t)
822 823
823 # Try the match with IGNORE_CASE enabled, and check that it 824 # Try the match with IGNORE_CASE enabled, and check that it
824 # still succeeds. 825 # still succeeds.
825 obj = self._re.compile(pattern, self._re.IGNORE_CASE) 826 obj = self._re.compile(pattern, self._re.IGNORE_CASE)
826 result = obj.search(s) 827 result = obj.search(s)
827 if result is None: 828 if result is None:
828 print('=== Fails on case-insensitive match', t) 829 print('=== Fails on case-insensitive match', t)
829 830
830 # Try the match with LOCALE enabled, and check that it 831 # Try the match with LOCALE enabled, and check that it
831 # still succeeds. 832 # still succeeds.
832 # if '(?u)' not in pattern: 833 # if '(?u)' not in pattern:
833 # obj = self._re.compile(pattern, self._re.LOCALE) 834 # obj = self._re.compile(pattern, self._re.LOCALE)
834 # result = obj.search(s) 835 # result = obj.search(s)
835 # if result is None: 836 # if result is None:
836 # print('=== Fails on locale-sensitive match', t) 837 # print('=== Fails on locale-sensitive match', t)
837 838
838 # Try the match with UNICODE locale enabled, and check 839 # Try the match with UNICODE locale enabled, and check
839 # that it still succeeds. 840 # that it still succeeds.
840 obj = self._re.compile(pattern, self._re.UNICODE) 841 obj = self._re.compile(pattern, self._re.UNICODE)
841 result = obj.search(s) 842 result = obj.search(s)
842 if result is None: 843 if result is None:
843 print('=== Fails on unicode-sensitive match', t) 844 print('=== Fails on unicode-sensitive match', t)
Powered by Google Project Hosting