Query 1 – select gene names, symbols and synonymsSPARQL# flybase benchmark query 1 (SPARQL)
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
PREFIX chado: <http://purl.org/net/chado/schema/>
PREFIX so: <http://purl.org/obo/owl/SO#>
PREFIX syntype: <http://purl.org/net/flybase/synonym-types/>
SELECT DISTINCT ?flybaseid ?annotationsymbol ?fullname WHERE {
?feature chado:name "schuy"^^xsd:string ;
a so:SO_0000704 ;
chado:organism [
chado:genus "Drosophila"^^xsd:string ;
chado:species "melanogaster"^^xsd:string ;
] ;
chado:is_analysis "false"^^xsd:boolean ;
chado:is_obsolete "false"^^xsd:boolean;
chado:uniquename ?flybaseid ;
chado:feature_dbxref [
chado:accession ?annotationsymbol ;
chado:db <http://openflydata.org/id/flybase/db/FlyBase_Annotation_IDs>
] .
?feature_fullname
chado:feature ?feature ;
chado:is_current "true"^^xsd:boolean ;
a chado:Feature_Synonym ;
chado:synonym [
a syntype:FullName ;
chado:name ?fullname ;
] ;
.
}Run on 2009-04-21 at 16:12 with 'schuy' ... 391, 281, 281, 282, 266, 265, 312, 250, 250, 281 (ms) SQL-- flybase benchmark query 1 (SQL)
SELECT DISTINCT
feature.uniquename AS flybaseid,
dbxref.accession AS annotationsymbol,
synonym.name AS fullname
FROM feature
JOIN cvterm ON (feature.type_id = cvterm.cvterm_id)
JOIN organism ON (feature.organism_id = organism.organism_id)
JOIN feature_dbxref ON (feature.feature_id = feature_dbxref.feature_id)
JOIN dbxref ON (feature_dbxref.dbxref_id = dbxref.dbxref_id)
JOIN feature_synonym ON (feature.feature_id = feature_synonym.feature_id)
JOIN synonym ON (feature_synonym.synonym_id = synonym.synonym_id)
WHERE feature.name = 'schuy'
AND cvterm.cvterm_id = 219
AND organism.genus = 'Drosophila'
AND organism.species = 'melanogaster'
AND feature.is_analysis = false
AND feature.is_obsolete = false
AND dbxref.db_id = 1
AND feature_synonym.is_current = true
AND synonym.type_id = 60037 ; Run on 2009-04-21 16:01 with 'schuy' ... 1516, 704, 703, 703, 704, 687, 704, 703, 688, 703 (ms) ComparisonData below are from 2009-04-21 10 queries each... | | FlyBase Chado Relational Database | OpenFlyData SPARQL Endpoint | | Shortest (ms) | 687 | 250 | | Longest (ms) | 1516 | 391 | | Average (ms) | 782 | 286 | | Standard deviation | 258 | 41 |
Query 2 - select publications for a given geneSPARQL# flybase benchmark query 2 (SPARQL)
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
PREFIX chado: <http://purl.org/net/chado/schema/>
PREFIX so: <http://purl.org/obo/owl/SO#>
SELECT DISTINCT ?pubid ?title ?pyear ?pages ?volume ?issue ?miniref WHERE {
?feature
chado:uniquename "FBgn0004644"^^xsd:string ;
a so:SO_0000704 ;
chado:organism [
chado:genus "Drosophila"^^xsd:string ;
chado:species "melanogaster"^^xsd:string ;
] ;
chado:is_analysis "false"^^xsd:boolean ;
chado:is_obsolete "false"^^xsd:boolean ;
chado:feature_pub ?pub .
?pub a chado:Pub ;
chado:uniquename ?pubid .
OPTIONAL { ?pub chado:title ?title }
OPTIONAL { ?pub chado:pyear ?pyear }
OPTIONAL { ?pub chado:pages ?pages }
OPTIONAL { ?pub chado:volume ?volume }
OPTIONAL { ?pub chado:issue ?issue }
OPTIONAL { ?pub chado:miniref ?miniref }
}Run on 2009-04-21 17:50 with 'FBgn0004464' (hh) ... 313, 641, 422, 688, 1954, 1031, 500, 687, 610, 734 (ms) SQL-- flybase benchmark query 2 (SQL)
SELECT DISTINCT
pub.uniquename AS pubid,
pub.title AS title,
pub.pyear AS pyear,
pub.pages AS pages,
pub.volume AS volume,
pub.issue AS issue,
pub.miniref AS miniref
FROM feature
JOIN cvterm ON (feature.type_id = cvterm.cvterm_id)
JOIN organism ON (feature.organism_id = organism.organism_id)
JOIN feature_pub ON (feature.feature_id = feature_pub.feature_id)
JOIN pub ON (feature_pub.pub_id = pub.pub_id)
WHERE feature.uniquename = 'FBgn0004644'
AND cvterm.cvterm_id = 219
AND organism.genus = 'Drosophila'
AND organism.species = 'melanogaster'
AND feature.is_analysis = false
AND feature.is_obsolete = false ; Run on 2009-04-21 17:03 with 'FBgn0004464' (hh) ... 6032, 1688, 1687, 1547, 1547, 1562, 1703, 2219, 2219, 2218 (ms) ComparisonData below are from 2009-04-21 10 queries each... | | FlyBase Chado Relational Database | OpenFlyData SPARQL Endpoint | | Shortest (ms) | 1547 | 313 | | Longest (ms) | 6032 | 1954 | | Average (ms) | 2242 | 758 | | Standard deviation | 1362 | 463 |
Query 3 - Select genes whose range includes a given genome positionSPARQL# flybase benchmark query 3 (sparql)
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
PREFIX chado: <http://purl.org/net/chado/schema/>
PREFIX so: <http://purl.org/obo/owl/SO#>
SELECT ?name ?uniquename WHERE {
{
?organism
chado:genus "Drosophila"^^xsd:string ;
chado:species "melanogaster"^^xsd:string ;
a chado:Organism ;
.
?srcfeature
chado:uniquename "3R"^^xsd:string ;
a so:SO_0000105 ;
chado:organism ?organism ;
.
?featureloc
chado:srcfeature ?srcfeature ;
chado:fmin ?fmin ;
chado:fmax ?fmax ;
.
FILTER ( ?fmin <= 21809000 && ?fmax >= 21809000 )
}
{
?featureloc chado:feature ?feature .
?feature a so:SO_0000704 ;
chado:organism ?organism ;
chado:is_analysis "false"^^xsd:boolean ;
chado:is_obsolete "false"^^xsd:boolean ;
chado:name ?name ;
chado:uniquename ?uniquename .
}
}Run on 2009-04-22 at ~11:39 ... 24, 22, 18, 16, 17, 19, 20, 19, 19, 21 (s) Run on 2009-05-08 at ~08:50 ... 18, 18, 23, 20, 18, 20, 18, 22, 21, 19 (s) SQL-- flybase benchmark query 3 (sql)
SELECT
feature.name AS name,
feature.uniquename AS uniquename
FROM organism
JOIN feature AS srcfeature ON (organism.organism_id = srcfeature.organism_id)
JOIN cvterm AS srcfeaturetype ON (srcfeature.type_id = srcfeaturetype.cvterm_id)
JOIN featureloc ON (srcfeature.feature_id = featureloc.srcfeature_id)
JOIN feature ON (featureloc.feature_id = feature.feature_id)
JOIN cvterm AS featuretype ON (feature.type_id = featuretype.cvterm_id)
WHERE organism.genus = 'Drosophila'
AND organism.species = 'melanogaster'
AND feature.organism_id = srcfeature.organism_id
AND srcfeature.uniquename = '3R'
AND srcfeaturetype.cvterm_id = 210
AND featureloc.fmin <= 21809000
AND featureloc.fmax >= 21809000
AND featuretype.cvterm_id = 219
AND feature.is_analysis = false
AND feature.is_obsolete = false
; Run on 2009-04-22 at 11:16 ... > 10000s. However, if you reorder this query as... SELECT
feature.name AS nm,
feature.uniquename AS uniquename
FROM feature AS srcfeature
JOIN organism ON (organism.organism_id = srcfeature.organism_id)
JOIN featureloc ON (srcfeature.feature_id = featureloc.srcfeature_id)
JOIN feature ON (featureloc.feature_id = feature.feature_id)
JOIN cvterm AS featuretype ON (feature.type_id = featuretype.cvterm_id)
JOIN cvterm AS srcfeaturetype ON (srcfeature.type_id = srcfeaturetype.cvterm_id)
WHERE srcfeature.uniquename = '3R'
AND srcfeaturetype.cvterm_id = 210
AND organism.genus = 'Drosophila'
AND organism.species = 'melanogaster'
AND feature.organism_id = srcfeature.organism_id
AND featureloc.fmin <= 21809000
AND featureloc.fmax >= 21809000
AND featuretype.cvterm_id = 219
AND feature.is_analysis = false
AND feature.is_obsolete = false
; ... you get dramatically better performance, e.g. 266ms. Run on 2009-05-08 at ~08:50 ... 912, 272, 272, 272, 272, 268, 273, 272, 270, 270 Comparison| | FlyBase Chado Relational Database | OpenFlyData SPARQL Endpoint | | Shortest (s) | 0.27 | 18 | | Longest (s) | 0.91 | 23 | | Average (s) | 0.34 | 19.7 | | Standard deviation | 0.20 | 1.8 |
|