Skip to content

Commit eaec460

Browse files
committed
#4931 - Support for GraphDB knowledge bases
- Tentative support - fails many tests
1 parent beca769 commit eaec460

File tree

6 files changed

+489
-1
lines changed

6 files changed

+489
-1
lines changed

inception/inception-kb/pom.xml

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -263,13 +263,17 @@
263263
<artifactId>jackson-annotations</artifactId>
264264
</dependency>
265265

266+
<dependency>
267+
<groupId>org.apache.httpcomponents</groupId>
268+
<artifactId>httpclient</artifactId>
269+
</dependency>
266270
<dependency>
267271
<groupId>org.apache.httpcomponents</groupId>
268272
<artifactId>httpcore</artifactId>
269273
</dependency>
270274
<dependency>
271275
<groupId>org.apache.httpcomponents</groupId>
272-
<artifactId>httpclient</artifactId>
276+
<artifactId>httpmime</artifactId>
273277
</dependency>
274278

275279
<!-- DEPENDENCIES FOR TESTING -->

inception/inception-kb/src/main/java/de/tudarmstadt/ukp/inception/kb/IriConstants.java

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ public class IriConstants
4848
public static final String PREFIX_MWAPI = "https://www.mediawiki.org/ontology#API/";
4949
public static final String PREFIX_STARDOG = "tag:stardog:api:search:";
5050
public static final String PREFIX_BLAZEGRAPH = "http://www.bigdata.com/rdf/search#";
51+
public static final String PREFIX_GRAPHDB = "http://www.ontotext.com/";
5152

5253
public static final String UKP_WIKIDATA_SPARQL_ENDPOINT = "http://knowledgebase.ukp.informatik.tu-darmstadt.de:8890/sparql";
5354
public static final Set<String> IMPLICIT_NAMESPACES = Set.of(RDF.NAMESPACE, RDFS.NAMESPACE,
@@ -90,6 +91,7 @@ public class IriConstants
9091
public static final IRI FTS_WIKIDATA;
9192
public static final IRI FTS_STARDOG;
9293
public static final IRI FTS_BLAZEGRAPH;
94+
public static final IRI FTS_GRAPHDB;
9395
public static final IRI FTS_NONE;
9496

9597
public static final List<IRI> CLASS_IRIS;
@@ -121,6 +123,7 @@ public class IriConstants
121123
FTS_WIKIDATA = vf.createIRI(PREFIX_MWAPI, "search");
122124
FTS_STARDOG = vf.createIRI(PREFIX_STARDOG, "textMatch");
123125
FTS_BLAZEGRAPH = vf.createIRI(PREFIX_BLAZEGRAPH, "search");
126+
FTS_GRAPHDB = vf.createIRI(PREFIX_GRAPHDB, "fts");
124127
FTS_NONE = vf.createIRI("FTS:NONE");
125128

126129
CLASS_IRIS = asList(RDFS.CLASS, OWL.CLASS, WIKIDATA_CLASS, SKOS.CONCEPT);
@@ -147,6 +150,10 @@ public static String getFtsBackendName(String aFTS)
147150
return "Blazegraph DB";
148151
}
149152

153+
if (FTS_GRAPHDB.stringValue().equals(aFTS)) {
154+
return "GraphDB";
155+
}
156+
150157
if (FTS_VIRTUOSO.stringValue().equals(aFTS)) {
151158
return "Virtuoso";
152159
}
Lines changed: 182 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,182 @@
1+
/*
2+
* Licensed to the Technische Universität Darmstadt under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The Technische Universität Darmstadt
6+
* licenses this file to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License.
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
*/
18+
package de.tudarmstadt.ukp.inception.kb.querybuilder;
19+
20+
import static de.tudarmstadt.ukp.inception.kb.IriConstants.PREFIX_GRAPHDB;
21+
import static de.tudarmstadt.ukp.inception.kb.querybuilder.SPARQLQueryBuilder.convertToRequiredTokenPrefixMatchingQuery;
22+
import static de.tudarmstadt.ukp.inception.kb.querybuilder.SPARQLQueryBuilder.Priority.PRIMARY;
23+
import static org.apache.commons.lang3.StringUtils.isBlank;
24+
import static org.eclipse.rdf4j.sparqlbuilder.constraint.Expressions.and;
25+
import static org.eclipse.rdf4j.sparqlbuilder.core.SparqlBuilder.prefix;
26+
import static org.eclipse.rdf4j.sparqlbuilder.graphpattern.GraphPatterns.and;
27+
import static org.eclipse.rdf4j.sparqlbuilder.graphpattern.GraphPatterns.union;
28+
import static org.eclipse.rdf4j.sparqlbuilder.rdf.Rdf.iri;
29+
30+
import java.util.ArrayList;
31+
32+
import org.eclipse.rdf4j.sparqlbuilder.constraint.Expression;
33+
import org.eclipse.rdf4j.sparqlbuilder.core.Prefix;
34+
import org.eclipse.rdf4j.sparqlbuilder.graphpattern.GraphPattern;
35+
36+
public class FtsAdapterGraphDb
37+
implements FtsAdapter
38+
{
39+
private static final String MULTI_CHAR_WILDCARD = "*";
40+
41+
private static final Prefix PREFIX_GRAPHDB_SEARCH = prefix("onto", iri(PREFIX_GRAPHDB));
42+
43+
private final SPARQLQueryBuilder builder;
44+
45+
public FtsAdapterGraphDb(SPARQLQueryBuilder aBuilder)
46+
{
47+
builder = aBuilder;
48+
builder.addPrefix(PREFIX_GRAPHDB_SEARCH);
49+
}
50+
51+
@Override
52+
public void withLabelMatchingExactlyAnyOf(String... aValues)
53+
{
54+
var kb = builder.getKnowledgeBase();
55+
56+
var valuePatterns = new ArrayList<GraphPattern>();
57+
for (var value : aValues) {
58+
var sanitizedValue = builder.sanitizeQueryString_FTS(value);
59+
60+
if (isBlank(sanitizedValue)) {
61+
continue;
62+
}
63+
64+
builder.addProjection(SPARQLQueryBuilder.VAR_SCORE);
65+
66+
valuePatterns.add(new GraphDbFtsQuery(SPARQLQueryBuilder.VAR_SUBJECT,
67+
SPARQLQueryBuilder.VAR_SCORE, SPARQLQueryBuilder.VAR_MATCH_TERM,
68+
SPARQLQueryBuilder.VAR_MATCH_TERM_PROPERTY, sanitizedValue) //
69+
.withLimit(builder.getLimit()) //
70+
.filter(builder.equalsPattern(SPARQLQueryBuilder.VAR_MATCH_TERM, value,
71+
kb)));
72+
}
73+
74+
if (valuePatterns.isEmpty()) {
75+
builder.noResult();
76+
}
77+
78+
builder.addPattern(PRIMARY, and( //
79+
builder.bindMatchTermProperties(SPARQLQueryBuilder.VAR_MATCH_TERM_PROPERTY), //
80+
union(valuePatterns.toArray(GraphPattern[]::new))));
81+
}
82+
83+
@Override
84+
public void withLabelContainingAnyOf(String... aValues)
85+
{
86+
var valuePatterns = new ArrayList<GraphPattern>();
87+
for (var value : aValues) {
88+
var sanitizedValue = builder.sanitizeQueryString_FTS(value);
89+
90+
if (isBlank(sanitizedValue)) {
91+
continue;
92+
}
93+
94+
builder.addProjection(SPARQLQueryBuilder.VAR_SCORE);
95+
96+
valuePatterns.add(new GraphDbFtsQuery(SPARQLQueryBuilder.VAR_SUBJECT,
97+
SPARQLQueryBuilder.VAR_SCORE, SPARQLQueryBuilder.VAR_MATCH_TERM,
98+
SPARQLQueryBuilder.VAR_MATCH_TERM_PROPERTY, sanitizedValue) //
99+
.withLimit(builder.getLimit()) //
100+
.filter(builder.containsPattern(SPARQLQueryBuilder.VAR_MATCH_TERM,
101+
value)));
102+
}
103+
104+
if (valuePatterns.isEmpty()) {
105+
builder.noResult();
106+
}
107+
108+
builder.addPattern(PRIMARY,
109+
and(builder.bindMatchTermProperties(SPARQLQueryBuilder.VAR_MATCH_TERM_PROPERTY),
110+
union(valuePatterns.toArray(GraphPattern[]::new))));
111+
}
112+
113+
@Override
114+
public void withLabelStartingWith(String aPrefixQuery)
115+
{
116+
// Strip single quotes and asterisks because they have special semantics
117+
var queryString = builder.sanitizeQueryString_FTS(aPrefixQuery);
118+
119+
if (isBlank(queryString)) {
120+
builder.noResult();
121+
}
122+
123+
// If the query string entered by the user does not end with a space character, then
124+
// we assume that the user may not yet have finished writing the word and add a
125+
// wildcard
126+
if (!aPrefixQuery.endsWith(" ")) {
127+
queryString += MULTI_CHAR_WILDCARD;
128+
}
129+
130+
builder.addProjection(SPARQLQueryBuilder.VAR_SCORE);
131+
132+
// Locate all entries where the label contains the prefix (using the FTS) and then
133+
// filter them by those which actually start with the prefix.
134+
builder.addPattern(PRIMARY, and( //
135+
builder.bindMatchTermProperties(SPARQLQueryBuilder.VAR_MATCH_TERM_PROPERTY), //
136+
new GraphDbFtsQuery(SPARQLQueryBuilder.VAR_SUBJECT, SPARQLQueryBuilder.VAR_SCORE,
137+
SPARQLQueryBuilder.VAR_MATCH_TERM,
138+
SPARQLQueryBuilder.VAR_MATCH_TERM_PROPERTY, queryString) //
139+
.withLimit(builder.getLimit()) //
140+
.filter(builder.startsWithPattern(SPARQLQueryBuilder.VAR_MATCH_TERM,
141+
aPrefixQuery))));
142+
}
143+
144+
@Override
145+
public void withLabelMatchingAnyOf(String... aValues)
146+
{
147+
var valuePatterns = new ArrayList<GraphPattern>();
148+
for (var value : aValues) {
149+
var sanitizedValue = builder.sanitizeQueryString_FTS(value);
150+
151+
if (isBlank(sanitizedValue)) {
152+
continue;
153+
}
154+
155+
var fuzzyQuery = convertToRequiredTokenPrefixMatchingQuery(sanitizedValue, "",
156+
MULTI_CHAR_WILDCARD);
157+
158+
if (isBlank(fuzzyQuery)) {
159+
continue;
160+
}
161+
162+
builder.addProjection(SPARQLQueryBuilder.VAR_SCORE);
163+
164+
var labelFilterExpressions = new ArrayList<Expression<?>>();
165+
labelFilterExpressions.add(builder.matchKbLanguage(VAR_MATCH_TERM));
166+
167+
valuePatterns.add(new GraphDbFtsQuery(SPARQLQueryBuilder.VAR_SUBJECT,
168+
SPARQLQueryBuilder.VAR_SCORE, SPARQLQueryBuilder.VAR_MATCH_TERM,
169+
SPARQLQueryBuilder.VAR_MATCH_TERM_PROPERTY, fuzzyQuery) //
170+
.withLimit(builder.getLimit()) //
171+
.filter(and(labelFilterExpressions.toArray(Expression[]::new))));
172+
}
173+
174+
if (valuePatterns.isEmpty()) {
175+
builder.noResult();
176+
}
177+
178+
builder.addPattern(PRIMARY,
179+
and(builder.bindMatchTermProperties(SPARQLQueryBuilder.VAR_MATCH_TERM_PROPERTY),
180+
union(valuePatterns.toArray(GraphPattern[]::new))));
181+
}
182+
}
Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
/*
2+
* Licensed to the Technische Universität Darmstadt under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The Technische Universität Darmstadt
6+
* licenses this file to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License.
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
*/
18+
package de.tudarmstadt.ukp.inception.kb.querybuilder;
19+
20+
import static de.tudarmstadt.ukp.inception.kb.querybuilder.RdfCollection.collectionOf;
21+
import static org.eclipse.rdf4j.sparqlbuilder.core.SparqlBuilder.prefix;
22+
import static org.eclipse.rdf4j.sparqlbuilder.rdf.Rdf.iri;
23+
import static org.eclipse.rdf4j.sparqlbuilder.rdf.Rdf.literalOf;
24+
25+
import java.util.ArrayList;
26+
27+
import org.eclipse.rdf4j.sparqlbuilder.core.Prefix;
28+
import org.eclipse.rdf4j.sparqlbuilder.core.QueryElement;
29+
import org.eclipse.rdf4j.sparqlbuilder.core.Variable;
30+
import org.eclipse.rdf4j.sparqlbuilder.graphpattern.GraphPattern;
31+
import org.eclipse.rdf4j.sparqlbuilder.graphpattern.GraphPatterns;
32+
import org.eclipse.rdf4j.sparqlbuilder.rdf.Iri;
33+
34+
import de.tudarmstadt.ukp.inception.kb.IriConstants;
35+
36+
public class GraphDbFtsQuery
37+
implements GraphPattern
38+
{
39+
public static final Prefix PREFIX_GRAPHDB_FTS = prefix("onto",
40+
iri(IriConstants.PREFIX_GRAPHDB));
41+
public static final Iri GRAPHDB_FTS = PREFIX_GRAPHDB_FTS.iri("fts");
42+
43+
private final Variable subject;
44+
private final Variable score;
45+
private final Variable matchTerm;
46+
private final Variable matchTermProperty;
47+
private final String query;
48+
private int limit = 0;
49+
50+
public GraphDbFtsQuery(Variable aSubject, Variable aScore, Variable aMatchTerm,
51+
Variable aMatchTermProperty, String aQuery)
52+
{
53+
subject = aSubject;
54+
score = aScore;
55+
matchTerm = aMatchTerm;
56+
matchTermProperty = aMatchTermProperty;
57+
query = aQuery;
58+
}
59+
60+
public GraphDbFtsQuery withLimit(int aLimit)
61+
{
62+
limit = aLimit;
63+
return this;
64+
}
65+
66+
@Override
67+
public String getQueryString()
68+
{
69+
var queryElements = new ArrayList<QueryElement>();
70+
queryElements.add(literalOf(query));
71+
if (limit > 0) {
72+
queryElements.add(literalOf(2 * limit));
73+
}
74+
75+
return GraphPatterns.and( //
76+
matchTerm.has(GRAPHDB_FTS, collectionOf(queryElements)), //
77+
subject.has(matchTermProperty, matchTerm)) //
78+
.getQueryString();
79+
}
80+
81+
@Override
82+
public boolean isEmpty()
83+
{
84+
return false;
85+
}
86+
}

inception/inception-kb/src/main/java/de/tudarmstadt/ukp/inception/kb/querybuilder/SPARQLQueryBuilder.java

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
import static de.tudarmstadt.ukp.inception.kb.IriConstants.FTS_ALLEGRO_GRAPH;
2121
import static de.tudarmstadt.ukp.inception.kb.IriConstants.FTS_BLAZEGRAPH;
2222
import static de.tudarmstadt.ukp.inception.kb.IriConstants.FTS_FUSEKI;
23+
import static de.tudarmstadt.ukp.inception.kb.IriConstants.FTS_GRAPHDB;
2324
import static de.tudarmstadt.ukp.inception.kb.IriConstants.FTS_NONE;
2425
import static de.tudarmstadt.ukp.inception.kb.IriConstants.FTS_RDF4J_LUCENE;
2526
import static de.tudarmstadt.ukp.inception.kb.IriConstants.FTS_STARDOG;
@@ -799,6 +800,10 @@ private FtsAdapter getAdapter()
799800
return new FtsAdapterBlazegraph(this);
800801
}
801802

803+
if (FTS_GRAPHDB.equals(ftsMode)) {
804+
return new FtsAdapterGraphDb(this);
805+
}
806+
802807
if (FTS_FUSEKI.equals(ftsMode)) {
803808
return new FtsAdapterFuseki(this);
804809
}
@@ -995,6 +1000,9 @@ private Expression<?> matchString(SparqlFunction aFunction, Variable aVariable,
9951000
value = Stream.of(TOKENKIZER_PATTERN.split(aValue)) //
9961001
.map(t -> "(?=.*" + asRegexp(t) + ")") //
9971002
.collect(joining());
1003+
// value = Stream.of(TOKENKIZER_PATTERN.split(aValue)) //
1004+
// .map(t -> asRegexp(t)) //
1005+
// .collect(joining("|"));
9981006
break;
9991007
default:
10001008
throw new IllegalArgumentException(

0 commit comments

Comments
 (0)