My favorites | Sign in
Project Home Downloads Wiki Issues Source
Repository:
Checkout   Browse   Changes   Clones    
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
package org.w3.swap.turtle

/* Parsers brings magic such as ~ and ^^ */
import scala.util.parsing.combinator.{Parsers, RegexParsers}
import scala.annotation.tailrec

import org.w3.swap
import swap.uri.Util.combine
import swap.rdf.RDFNodeBuilder
import swap.rdf.Vocabulary

/**
* TurtleParser
* http://www.w3.org/TeamSubmission/turtle/#sec-grammar-grammar
*/
abstract class TurtleSyntax(val initialBase: String)
extends TurtleLex with RDFNodeBuilder with CheckedParser {
def fresh(hint: String): BlankNode
def byName(name: String): BlankNode

import scala.collection.mutable
val namespaces = mutable.HashMap[String, String]()
var baseaddr = initialBase

def lazyrep[T] (p: Parser[Stream[T]]): Parser[Stream[T]] = (
p ~ lazyrep(p) ^^ { case hd~tl => hd ++ tl }
| success(Stream.empty)
)

//TODO: try phrase() again? didn't seem to work, earlier
def turtleDoc: Parser[Stream[Arc]] = lazyrep(statement)


def statement: Parser[Stream[Arc]] = (
directive <~ "." ^^ { case _ => Stream.empty }
| triples <~ "."
// | ws +
)

def directive: Parser[Unit] = prefixID | base

def prefixID: Parser[Unit] = "@prefix" ~> prefixOptColon ~ uriref ^^ {
case prefix ~ ref => {
namespaces.put(prefix, combine(baseaddr, ref))
}
}

def base: Parser[Unit] = "@base" ~> uriref ^^ {
case ref => baseaddr = combine(baseaddr, ref)
}

def triples: Parser[Stream[Arc]] = subject ~ predicateObjectList ^^ {
case ((arcs1, s)) ~ ((arcs2, pos)) => {
arcs1 ++ arcs2 ++ pos.map { case (p, o) => (s, p, o) }
}
}

def predicateObjectList: Parser[(Stream[Arc], List[(Label, Node)])] = (
repsep(verb ~ objectList, ";") ^^ {
case vol => {
val arcs = vol.toStream.flatMap{ case v ~ ((oarcs, ol)) => oarcs }
val lln = vol.flatMap{ case v ~ ((oarcs, ol)) => ol.map((v, _)) }
(arcs, lln)
}
}
)

def objectList: Parser[(Stream[Arc], List[Node])] = (
repsep(`object`, ",") ^^ {
case ol => {
var arcs = ol.toStream.flatMap{ case (arcs, node) => arcs }
var nodelist = ol.map{ case (arcs, node) => node }
(arcs, nodelist)
}
}
)

def verb: Parser[Label] = (
predicate
| "a" ^^ { case a => rdf_type }
)

def subject: Parser[(Stream[Arc], SubjectNode)] = (
// ugh! "resource" is a use/mention bug!
resource ^^ { case s => (Stream.empty, s) }
| blank
)

def predicate = resource

def `object`: Parser[(Stream[Arc], Node)] = (
resource ^^ { case o => (Stream.empty, o) }
| blank
| literal ^^ { case o => (Stream.empty, o) }
)

def literal: Parser[Literal] = (
datatypeString ^^ { case (lex, dt) => typed(lex, dt) }
| quotedStringAtLanguage ^^ { case (s, langopt) => plain(s, langopt) }
| integer ^^ { case num => typed(num, Vocabulary.integer) }
| double ^^ { case num => typed(num, Vocabulary.double) }
| decimal ^^ { case num => typed(num, Vocabulary.decimal) }
| boolean ^^ { case b => typed(b, Vocabulary.boolean) }
)

def blank: Parser[(Stream[Arc], SubjectNode)] = (
nodeID ^^ { case b => (Stream.empty, byName(b)) }
| "[" ~ "]" ^^ { case bra ~ ket => (Stream.empty, fresh("bk")) }
| "[" ~> predicateObjectList <~ "]" ^^ {
case (arcs, pos) => {
val b = fresh("brackets")
(arcs ++ pos.map { case (p, o) => (b, p, o) }, b)
}
}
| collection
)

def itemListOpt: Parser[(Stream[Arc], List[Node])] = rep(`object`) ^^ {
case ol => {
val arcs = ol.toStream.flatMap { case (arcs, node) => arcs }
val nodes = ol.map { case (arcs, node) => node }
(arcs, nodes)
}
}


def itemArcs(items: List[Node]): (Stream[Arc], SubjectNode) = {
items match {
case Nil => (Stream.empty, rdf_nil)
case first :: rest => {
val (arcs1, tail) = itemArcs(rest)
val cell = fresh("list")
val arcs = Stream.cons((cell, rdf_first, first),
Stream.cons((cell, rdf_rest, tail), arcs1))
(arcs, cell)
}
}
}

def collection: Parser[(Stream[Arc], SubjectNode)] = (
"(" ~> itemListOpt <~ ")" ^^ {
case (arcs, items) => {
val (listarcs, listnode) = itemArcs(items)
(arcs ++ listarcs, listnode)
}
}
)

def resource: Parser[Label] = (
uriref ^^ { case ref => uri(combine(baseaddr, ref)) }

| checked(qname) { case (qn, in) => (
if (namespaces.isDefinedAt(qn._1)) Success(qn, in)
else Error("no such prefix: " + qn._1, in)
) } ^^ { case (p, l) => uri(namespaces(p) + l) }

)

}

trait CheckedParser extends Parsers {
/**
* checked wraps a Parser[T] with a check on its results
*/
def checked[T](p: => Parser[T])(
check: (T, Input) => ParseResult[T]): Parser[T] = Parser {
in => p(in) match {
case s @ Success(x, in) => check(x, in)
case ns => ns
}
}
}


class TurtleLex extends RegexParsers {
// treat comments as whitespace
// this corresponds to ws+ in the turtle spec
override val whiteSpace = "(?:[ \t\n\r]|(?:#[^\r\n]*))*".r

def nodeID: Parser[String] = ("_:" + localname_re).r ^^ {
case str => str.substring(2)
}

// TODO: non-ASCII name characters
/* note _:xyz is an evar but _a:xyz is a qname */
val prefix_re = """(?:((?:_[A-Za-z0-9_]+)|(?:[A-Za-z][A-Za-z0-9_]*)|):)"""
val localname_re = """([A-Za-z][A-Za-z0-9_-]*)"""

def prefixOptColon: Parser[String] = prefix_re.r ^^ {
/* strip off the colon*/
case str => str.substring(0, str.length() - 1)
}

val Qname_re = (prefix_re + localname_re).r
def qname: Parser[(String, String)] = Qname_re ^^ {
case Qname_re(p, l) => (p, l)
}

/**
* TODO: uriref escaping
*/
def uriref: Parser[String] = (
"""<([^<>'{}|^`&&[^\x01-\x20]])*>""".r ^^ {
case str => str.substring(1, str.length()-1)
}
)

val lang_re = "[a-z]+(?:-[a-z0-9]+)*"
val string_re = "\"[^\"]*\"" // TODO: fix
val longString_re = (
"\"\"\""
+ "(?:[^\"\\\\]+|\"|(?:\"\")|(?:\\\\[tbnrf\\\\\"]))*"
+ "\"\"\""
)
val quotedString_re = "(" + string_re + ")|(" + longString_re + ")"
val qsal_pat = (quotedString_re + "(@" + lang_re + ")?").r
def stripn(s: String, n: Int) = s.substring(n, s.length - n)

def quotedStringAtLanguage: Parser[(String, Option[Symbol])] = (
qsal_pat ^^ {
case qsal_pat(sq, lsq, lang) => {
val s = (
if (lsq == null) stripn(sq, 1)
else stripn(lsq, 3)
)
val langopt = if (lang == null) None else Some(Symbol(lang))
(s, langopt)
}
}
)

val datatype_pat = ("(" + quotedString_re + ")^^<([^>]*)>").r
def datatypeString: Parser[(String, String)] = datatype_pat ^^ {
case datatype_pat(lex, dt) => {
// TODO: unescaping
(lex, dt)
}
}

def integer: Parser[String] = "[+-]?[0-9]+".r

def double: Parser[String] = "[+-]?[0-9]+(\\.[0-9]+)?([eE][+-]?[0-9]+)".r

def decimal: Parser[String] = "[+-]?[0-9]+(\\.[0-9]+)".r

def boolean: Parser[String] = "true" | "fase"

}

Change log

8369d0a2f62f by Dan Connolly <http://www.w3.org/People/Connolly/> on Feb 19, 2010   Diff
- N3Parser layered on N3Syntax and N3Logic
(36/36)
  - N3Logic layered on rdflogic and
coherent logic
  - CheckedParser factored out of
TurtleSyntax as a trait
Go to: 
Project members, sign in to write a code review

Older revisions

5f87384ceae7 by Dan Connolly <http://www.w3.org/People/Connolly/> on Feb 13, 2010   Diff
note TODO for non-ASCII chars in
TurtleLex
5883022c6bac by Dan Connolly <http://www.w3.org/People/Connolly/> on Feb 11, 2010   Diff
- integrated rdfstdtest.scala with new
layering
- moved Graph to webdata; integrated
with new layering
- integrated swap.turtle.TurtleSyntax
...
94ac883bd610 by Dan Connolly <http://www.w3.org/People/Connolly/> on Feb 3, 2010   Diff
new RDFa test framework for examples
in section 6: expected results in
turtle
- cite RDFa spec from RDFaParser class
- start a seprate TurtleParser
...
All revisions of this file

File info

Size: 6887 bytes, 248 lines
Powered by Google Project Hosting