|
BuildYourOwnJavaParserWithAntlr3
Build Your Own Java Parser with Antlr 3.
Phase-Design IntroductionAntlr (ANother Tool for Language Recognition) is a parser generator. This article describes how to use Antlr 3 to create your own parser in Java. DependenciesTo use Antlr 3, we can define the Maven dependencies as follows. <dependency>
<groupId>org.antlr</groupId>
<artifactId>antlr</artifactId>
<version>3.1.3</version>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>org.antlr</groupId>
<artifactId>gunit</artifactId>
<version>3.1.3</version>
<scope>test</scope>
</dependency>We also need the Antlr3 Maven Plugin to automatically generate the Java parser code from Antlr 3 grammar. <build>
<resources>
<resource>
<directory>src/main/java</directory>
</resource>
<resource>
<directory>src/main/resources</directory>
</resource>
<resource>
<directory>src/main/gen</directory>
</resource>
</resources>
<testResources>
<testResource>
<directory>src/test/java</directory>
</testResource>
<testResource>
<directory>src/test/resources</directory>
</testResource>
</testResources>
<plugins>
<plugin>
<groupId>org.antlr</groupId>
<artifactId>antlr3-maven-plugin</artifactId>
<version>3.1.3-1</version>
<executions>
<execution>
<goals>
<goal>antlr</goal>
</goals>
<configuration>
<conversionTimeout>10000</conversionTimeout>
<debug>false</debug>
<dfa>false</dfa>
<nfa>false</nfa>
<libDirectory>src/main/antlr3/imports</libDirectory>
<messageFormat>antlr</messageFormat>
<outputDirectory>src/main/gen</outputDirectory>
<printGrammar>false</printGrammar>
<profile>false</profile>
<report>false</report>
<sourceDirectory>src/main/antlr3</sourceDirectory>
<trace>false</trace>
<verbose>true</verbose>
</configuration>
</execution>
</executions>
</plugin>
<plugin>
<artifactId>maven-clean-plugin</artifactId>
<configuration>
<filesets>
<fileset>
<directory>src/main/gen</directory>
<includes>
<include>**/*</include>
</includes>
<followSymlinks>false</followSymlinks>
</fileset>
</filesets>
</configuration>
</plugin>
</plugins>
</build>IDE SupportANTLRWorks is a novel grammar development environment for ANTLR v3 grammars. ANTLRWorks has an IntelliJ IDEA Plugin. There are also other IDE plugins available. ExampleHere, we like to use the Tellurium UID Description Language (UDL) as an example to illustrate how to create a parser in Java. GrammarThe UDL grammars are defined as follows, grammar Udl;
uid
: baseUid
| listUid
| tableUid
;
baseUid
: ID
;
listUid
: '{' INDEX '}'
| '{' INDEX '}' 'as' ID
;
tableUid
: tableHeaderUid
| tableFooterUid
| tableBodyUid
;
tableHeaderUid
: '{' 'header' ':' INDEX '}'
| '{' 'header' ':' INDEX '}' 'as' ID
;
tableFooterUid
: '{' 'footer' ':' INDEX '}'
| '{' 'footer' ':' INDEX '}' 'as' ID
;
tableBodyUid
: '{' 'row' ':' INDEX ',' 'column' ':' INDEX '}'
| '{' 'row' ':' INDEX ',' 'column' ':' INDEX '}' 'as' ID
| '{' 'row' '->' ID ',' 'column' ':' INDEX '}'
| '{' 'row' '->' ID ',' 'column' ':' INDEX '}' 'as' ID
| '{' 'row' ':' INDEX ',' 'column' '->' ID '}'
| '{' 'row' ':' INDEX ',' 'column' '->' ID '}' 'as' ID
| '{' 'row' '->' ID ',' 'column' '->' ID '}'
| '{' 'row' '->' ID ',' 'column' '->' ID '}' 'as' ID
| '{' 'tbody' ':' INDEX ',' 'row' ':' INDEX ',' 'column' ':' INDEX '}'
| '{' 'tbody' ':' INDEX ',' 'row' ':' INDEX ',' 'column' ':' INDEX '}' 'as' ID
| '{' 'tbody' ':' INDEX ',' 'row' '->' ID ',' 'column' ':' INDEX '}'
| '{' 'tbody' ':' INDEX ',' 'row' '->' ID ',' 'column' ':' INDEX '}' 'as' ID
| '{' 'tbody' ':' INDEX ',' 'row' ':' INDEX ',' 'column' '->' ID '}'
| '{' 'tbody' ':' INDEX ',' 'row' ':' INDEX ',' 'column' '->' ID '}' 'as' ID
| '{' 'tbody' ':' INDEX ',' 'row' '->' id1=ID ',' 'column' '->' ID '}'
| '{' 'tbody' ':' INDEX ',' 'row' '->' id1=ID ',' 'column' '->' ID '}' 'as' ID
;
fragment LETTER : ('a'..'z' | 'A'..'Z') ;
fragment DIGIT : '0'..'9';
INDEX : (DIGIT+ |'all' | 'odd' | 'even' | 'any' | 'first' | 'last' );
ID : LETTER (LETTER | DIGIT)*;
WS : (' ' | '\t' | '\n' | '\r' | '\f')+ {$channel = HIDDEN;};Data StructureTo create the parser, we need couple Java objects to map to the Antlr 3 grammars. For example, we define the following classes. public enum IndexType {
VAL,
REF
}
public class Index {
IndexType type;
String value;
}
public class MetaData {
protected String id;
}
public class ListMetaData extends MetaData {
protected Index index;
}
public class TableHeaderMetaData extends ListMetaData{
}
public class TableFooterMetaData extends ListMetaData {
}
public class TableBodyMetaData extends MetaData {
protected Index tbody;
protected Index row;
protected Index column;
}Embedded Java CodeTo get data from the parser, we can embed Java code inside the Antlr 3 grammars as follows. grammar Udl;
options {
language = Java;
}
@header {
package org.telluriumsource.udl;
import org.telluriumsource.udl.code.IndexType;
import org.telluriumsource.udl.Index;
import org.telluriumsource.udl.MetaData;
import org.telluriumsource.udl.ListMetaData;
import org.telluriumsource.udl.TableHeaderMetaData;
import org.telluriumsource.udl.TableFooterMetaData;
import org.telluriumsource.udl.TableBodyMetaData;
}
@lexer::header {
package org.telluriumsource.udl;
import org.telluriumsource.udl.code.IndexType;
import org.telluriumsource.udl.Index;
import org.telluriumsource.udl.MetaData;
import org.telluriumsource.udl.ListMetaData;
import org.telluriumsource.udl.TableHeaderMetaData;
import org.telluriumsource.udl.TableFooterMetaData;
import org.telluriumsource.udl.TableBodyMetaData;
}
@members{
}
uid returns [MetaData metadata]
: bu=baseUid {metadata=bu;}
| lu=listUid {metadata=lu;}
| tu=tableUid {metadata=tu;}
;
baseUid returns [MetaData metadata]
: ID {metadata = new MetaData($ID.text);}
;
listUid returns [ListMetaData metadata]
: '{' INDEX '}' {metadata = new ListMetaData('_' + $INDEX.text, $INDEX.text);}
| '{' INDEX '}' 'as' ID {metadata = new ListMetaData($ID.text, $INDEX.text);}
;
tableUid returns [MetaData metadata]
: thu=tableHeaderUid {metadata = thu;}
| tfu=tableFooterUid {metadata = tfu;}
| tbu=tableBodyUid {metadata = tbu;}
;
tableHeaderUid returns [TableHeaderMetaData metadata]
: '{' 'header' ':' INDEX '}' {metadata = new TableHeaderMetaData('_'+$INDEX.text, $INDEX.text);}
| '{' 'header' ':' INDEX '}' 'as' ID {metadata = new TableHeaderMetaData($ID.text, $INDEX.text);}
;
tableFooterUid returns [TableFooterMetaData metadata]
: '{' 'footer' ':' INDEX '}' {metadata = new TableFooterMetaData('_'+$INDEX.text, $INDEX.text);}
| '{' 'footer' ':' INDEX '}' 'as' ID {metadata = new TableFooterMetaData($ID.text, $INDEX.text);}
;
tableBodyUid returns [TableBodyMetaData metadata]
: '{' 'row' ':' inx1=INDEX ',' 'column' ':' inx2=INDEX '}' {metadata = new TableBodyMetaData("_1_" + inx1.getText() + '_' + inx2.getText()); metadata.setTbody(new Index("1")); metadata.setRow(new Index(inx1.getText())); metadata.setColumn(new Index(inx2.getText()));}
| '{' 'row' ':' inx1=INDEX ',' 'column' ':' inx2=INDEX '}' 'as' ID {metadata = new TableBodyMetaData($ID.text); metadata.setTbody(new Index("1")); metadata.setRow(new Index(inx1.getText())); metadata.setColumn(new Index(inx2.getText())); }
| '{' 'row' '->' id1=ID ',' 'column' ':' INDEX '}' {metadata = new TableBodyMetaData("_1_" + id1.getText() + '_' + $INDEX.text); metadata.setTbody(new Index("1")); metadata.setRow(new Index(IndexType.REF, id1.getText())); metadata.setColumn(new Index($INDEX.text));}
| '{' 'row' '->' id1=ID ',' 'column' ':' INDEX '}' 'as' id2=ID {metadata = new TableBodyMetaData(id2.getText()); metadata.setTbody(new Index("1")); metadata.setRow(new Index(IndexType.REF, id1.getText())); metadata.setColumn(new Index($INDEX.text));}
| '{' 'row' ':' INDEX ',' 'column' '->' id1=ID '}' {metadata = new TableBodyMetaData("_1_" + $INDEX.text + '_' + id1.getText()); metadata.setTbody(new Index("1")); metadata.setRow(new Index($INDEX.text)); metadata.setColumn(new Index(IndexType.REF, id1.getText()));}
| '{' 'row' ':' INDEX ',' 'column' '->' id1=ID '}' 'as' id2=ID {metadata = new TableBodyMetaData(id2.getText()); metadata.setTbody(new Index("1")); metadata.setRow(new Index($INDEX.text)); metadata.setColumn(new Index(IndexType.REF, id1.getText()));}
| '{' 'row' '->' id1=ID ',' 'column' '->' id2=ID '}' {metadata = new TableBodyMetaData("_1_" + id1.getText() + '_' + id2.getText()); metadata.setTbody(new Index("1")); metadata.setRow(new Index(IndexType.REF, id1.getText())); metadata.setColumn(new Index(IndexType.REF, id2.getText()));}
| '{' 'row' '->' id1=ID ',' 'column' '->' id2=ID '}' 'as' id3=ID {metadata = new TableBodyMetaData(id3.getText()); metadata.setTbody(new Index("1")); metadata.setRow(new Index(IndexType.REF, id1.getText())); metadata.setColumn(new Index(IndexType.REF, id2.getText()));}
| '{' 'tbody' ':' inx1=INDEX ',' 'row' ':' inx2=INDEX ',' 'column' ':' inx3=INDEX '}' {metadata = new TableBodyMetaData('_' + inx1.getText() + '_' + inx2.getText() + '_' + inx3.getText()); metadata.setTbody(new Index(inx1.getText())); metadata.setRow(new Index(inx2.getText())); metadata.setColumn(new Index(inx3.getText()));}
| '{' 'tbody' ':' inx1=INDEX ',' 'row' ':' inx2=INDEX ',' 'column' ':' inx3=INDEX '}' 'as' ID {metadata = new TableBodyMetaData($ID.text); metadata.setTbody(new Index(inx1.getText())); metadata.setRow(new Index(inx2.getText())); metadata.setColumn(new Index(inx3.getText()));}
| '{' 'tbody' ':' inx1=INDEX ',' 'row' '->' id1=ID ',' 'column' ':' inx2=INDEX '}' {metadata = new TableBodyMetaData('_' + inx1.getText() + '_' + id1.getText() + '_' + inx2.getText()); metadata.setTbody(new Index(inx1.getText())); metadata.setRow(new Index(IndexType.REF, id1.getText())); metadata.setColumn(new Index(inx2.getText()));}
| '{' 'tbody' ':' inx1=INDEX ',' 'row' '->' id1=ID ',' 'column' ':' inx2=INDEX '}' 'as' id2=ID {metadata = new TableBodyMetaData(id2.getText()); metadata.setTbody(new Index(inx1.getText())); metadata.setRow(new Index(IndexType.REF, id1.getText())); metadata.setColumn(new Index(inx2.getText()));}
| '{' 'tbody' ':' inx1=INDEX ',' 'row' ':' inx2=INDEX ',' 'column' '->' id1=ID '}' {metadata = new TableBodyMetaData('_' + inx1.getText() + '_' + inx2.getText() + '_' + id1.getText()); metadata.setTbody(new Index(inx1.getText())); metadata.setRow(new Index(inx2.getText())); metadata.setColumn(new Index(IndexType.REF, id1.getText()));}
| '{' 'tbody' ':' inx1=INDEX ',' 'row' ':' inx2=INDEX ',' 'column' '->' id1=ID '}' 'as' id2=ID {metadata = new TableBodyMetaData(id2.getText()); metadata.setTbody(new Index(inx1.getText())); metadata.setRow(new Index(inx2.getText())); metadata.setColumn(new Index(IndexType.REF, id1.getText()));}
| '{' 'tbody' ':' INDEX ',' 'row' '->' id1=ID ',' 'column' '->' id2=ID '}' {metadata = new TableBodyMetaData('_' + $INDEX.text + '_' + id1.getText() + '_' + id2.getText()); metadata.setTbody(new Index($INDEX.text)); metadata.setRow(new Index(IndexType.REF, id1.getText())); metadata.setColumn(new Index(IndexType.REF, id2.getText()));}
| '{' 'tbody' ':' INDEX ',' 'row' '->' id1=ID ',' 'column' '->' id2=ID '}' 'as' id3=ID {metadata = new TableBodyMetaData(id3.getText()); metadata.setTbody(new Index($INDEX.text)); metadata.setRow(new Index(IndexType.REF, id1.getText())); metadata.setColumn(new Index(IndexType.REF, id2.getText()));}
;
fragment LETTER : ('a'..'z' | 'A'..'Z') ;
fragment DIGIT : '0'..'9';
INDEX : (DIGIT+ | 'all' | 'odd' | 'even' | 'any' | 'first' | 'last' );
ID : LETTER (LETTER | DIGIT)*;
WS : (' ' | '\t' | '\n' | '\r' | '\f')+ {$channel = HIDDEN;};ParserThe above Antlr 3 Maven plugin generates a lexer, UdlLexer, and a parser, UdlParser. To wrap things up, we define a parser class as follows. public class UidParser {
public static MetaData parse(String uid) throws RecognitionException {
if (uid == null || uid.trim().length() == 0)
return null;
CharStream stream = new ANTLRStringStream(uid);
UdlLexer lexer = new UdlLexer(stream);
TokenStream tokenStream = new CommonTokenStream(lexer);
UdlParser parser = new UdlParser(tokenStream);
return parser.uid();
}
}Unit TestTo test the generated parser, UidParser, we can use JUnit 4 to test it. public class UidParser_UT {
@Test
public void testBaseUid(){
try{
MetaData data = UidParser.parse("Tellurium");
assertNotNull(data);
assertEquals("Tellurium", data.getId());
}catch(RecognitionException e){
fail(e.getMessage());
}
}
@Test
public void testTableHeaderUid(){
try{
MetaData data = UidParser.parse("{header: 3} as A");
assertNotNull(data);
assertTrue(data instanceof TableHeaderMetaData);
TableHeaderMetaData th = (TableHeaderMetaData)data;
assertEquals("A", th.getId());
assertEquals("3", th.getIndex().getValue());
assertEquals(IndexType.VAL, th.getIndex().getType());
}catch(RecognitionException e){
fail(e.getMessage());
}
}
@Test
public void testTableBodyRefUid(){
try{
MetaData data = UidParser.parse("{tbody : 1, row -> good, column -> bad} as Search");
assertNotNull(data);
assertEquals("Search", data.getId());
assertTrue(data instanceof TableBodyMetaData);
TableBodyMetaData tbmd = (TableBodyMetaData)data;
assertEquals("1", tbmd.getTbody().getValue());
assertEquals(IndexType.VAL, tbmd.getTbody().getType());
assertEquals("good", tbmd.getRow().getValue());
assertEquals(IndexType.REF, tbmd.getRow().getType());
assertEquals("bad", tbmd.getColumn().getValue());
assertEquals(IndexType.REF, tbmd.getColumn().getType());
}catch(RecognitionException e){
fail(e.getMessage());
}
}Resources |