My favorites | Sign in
Project Home Downloads Wiki Issues Source
Repository:
Checkout   Browse   Changes   Clones  
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174

# Copyright 2009 Andrew Cooke

# This file is part of LEPL.
#
# LEPL is free software: you can redistribute it and/or modify
# it under the terms of the GNU Lesser General Public License as published
# by the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# LEPL is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with LEPL. If not, see <http://www.gnu.org/licenses/>.

'''
A stream that adds tokens at the start and end of lines.
'''

from io import StringIO

from lepl.lexer.stream import TokenSource
from lepl.offside.lexer import START
from lepl.offside.support import LineAwareError, OffsideError
from lepl.stream.stream import DefaultStreamFactory, LineSource, sample
from lepl.support.lib import str


class LineAwareStreamFactory(DefaultStreamFactory):
'''
Generate line-aware streams for various input types.
'''

def __init__(self, alphabet):
self.alphabet = alphabet

def from_path(self, path):
'''
Generate a stream from a file at a given path.
'''
return self(LineAwareSource(self.alphabet,
open(path, 'rt', buffering=1),
path))

def from_string(self, text):
'''
Generate a stream from a string.
'''
return self(LineAwareSource(self.alphabet, StringIO(text),
sample('str: ', repr(text))))

def from_lines(self, lines, source=None, join_=''.join):
'''
Generate a stream from a set of lines.
'''
if source is None:
source = sample('lines: ', repr(lines))
return self(LineAwareSource(self.alphabet, lines, source, join_))

def from_items(self, items, source=None, line_length=80):
'''
Lists of items are not supported.
'''
raise LineAwareError('Only line-based sources are supported')

def from_file(self, file_):
'''
Generate a stream from a file.
'''
return self(LineAwareSource(self.alphabet, file_,
getattr(file_, 'name', '<file>')) )

@staticmethod
def from_null(stream):
'''
Reject simple streams.
'''
raise LineAwareError('Only line-based sources are supported')


def top_and_tail(alphabet, lines):
'''
Create a sequence of lines that add SOL and EOL markers to the original
text.
'''
def extend(line):
'''
Add the markers.
'''
return [alphabet.min] + list(line) + [alphabet.max]
# pylint: disable-msg=W0141
return map(extend, lines)


# pylint: disable-msg=E1002
# pylint can't find ABCs
class LineAwareSource(LineSource):
'''
A source to generate `LocationStream` instances from text that contains
SOL and EOL tokens.
'''

def __init__(self, alphabet, lines, description=None, join_=None):
if not join_:
join_ = lambda lines: \
''.join([alphabet.join(line) for line in lines])
super(LineAwareSource, self).__init__(
top_and_tail(alphabet, lines),
repr(lines) if description is None else description,
join_)

def location(self, offset, line, location_state):
'''
Correct the location for the initial SOL character.
'''
(character_count, line_count) = location_state
return (line_count, offset - 1, character_count + offset - 1,
line, str(self))

def text(self, offset, line):
'''
Join characters together as a line of text.
'''
if line:
# remember - join joins *lines*
return self.join([line[offset:]])
else:
return self.join([])


class LineAwareTokenSource(TokenSource):
'''
Adapt `TokenSource` to replace tabs with spaces, if needed.
'''

def __init__(self, tokens, stream, tabsize):
super(LineAwareTokenSource, self).__init__(tokens, stream)
if tabsize:
self.__tab = ''.join([' '] * tabsize)
else:
self.__tab = None

def __next__(self):
'''
Provide (terminals, text) values (used by matchers) along with
the original stream as location_state.

Note that this is infinite - it is the StreamView that detects when
the Line is empty and terminates any processing by the user.
'''
try:
([(terminals, text)], stream) = \
super(LineAwareTokenSource, self).__next__()
if terminals and START in terminals:
if not len(terminals) == 1:
raise OffsideError('More than one token matching ^')
elif '\t' in text and self.__tab:
text = ''.join([char if char == ' ' else self.__tab
for char in text])
return ([(terminals, text)], stream)
except TypeError:
return (None, None)

@staticmethod
def factory(tabsize):
'''
Return a "constructor" that matches `TokenSource`.
'''
return lambda tokens, stream: \
LineAwareTokenSource(tokens, stream, tabsize)

Change log

bd00490b28dd by and...@small.europa2189 on Mar 21, 2010   Diff
docs; about to switch to 3.3.3 for fix
Go to: 
Project members, sign in to write a code review

Older revisions

343534f9fe86 by and...@small.europa2189 on Dec 27, 2009   Diff
cache compiled matcher
f19160268e00 by and...@small.europa2189 on Dec 19, 2009   Diff
tests close to working again
c84b58d74f3a by and...@small.europa2189 on Nov 28, 2009   Diff
fixed issue with tokens matching empty
string
All revisions of this file

File info

Size: 5710 bytes, 174 lines
Powered by Google Project Hosting