Skip to content

Commit 8290e65

Browse files
committed
Make webservice use query starter query logic factory xml files and the config service for properties
1 parent 78cedc1 commit 8290e65

File tree

135 files changed

+10156
-2287
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

135 files changed

+10156
-2287
lines changed

docker/config/application-query.yml

Lines changed: 72 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,10 @@ warehouse:
1010
host: localhost
1111
port: 8125
1212
tables:
13+
annotation:
14+
name: 'datawave.annotation'
15+
annotationSource:
16+
name: 'datawave.annotationSource'
1317
shard:
1418
name: 'datawave.shard'
1519
index:
@@ -24,6 +28,8 @@ warehouse:
2428
name: 'datawave.metadata'
2529
edge:
2630
name: 'datawave.edge'
31+
ssdeepIndex:
32+
name: 'ssdeepIndex'
2733
errorTables:
2834
shard:
2935
name: "datawave.error_s"
@@ -36,7 +42,7 @@ warehouse:
3642
metadata:
3743
name: "datawave.error_m"
3844
model:
39-
name: "datawave.error_m"
45+
name: "datawave.metadata"
4046
metricTables:
4147
shard:
4248
name: "datawave.queryMetrics_s"
@@ -45,7 +51,7 @@ warehouse:
4551
reverseIndex:
4652
name: "datawave.queryMetrics_r"
4753
dateIndex:
48-
name: ""
54+
name: "datawave.queryMetrics_di"
4955
metadata:
5056
name: "datawave.queryMetrics_m"
5157
model:
@@ -58,8 +64,8 @@ warehouse:
5864
fullTableScanEnabled: false
5965
baseIteratorPriority: 100
6066
maxIndexScanTimeMillis: 31536000000
61-
eventPerDayThreshold: 40000
62-
shardsPerDayThreshold: 20
67+
eventPerDayThreshold: 10000
68+
shardsPerDayThreshold: 10
6369
initialMaxTermThreshold: 2000
6470
finalMaxTermThreshold: 2000
6571
maxDepthThreshold: 2000
@@ -72,6 +78,7 @@ warehouse:
7278
maxOrExpansionFstThreshold: 750
7379
maxFieldIndexRangeSplit: 16
7480
maxIvaratorSources: 20
81+
maxIvaratorSourceWait: 1800000
7582
maxEvaluationPipelines: 16
7683
maxPipelineCachedResults: 16
7784
hdfsSiteConfigURLs: 'file://${HADOOP_CONF_DIR:/etc/hadoop/conf}/core-site.xml,file://${HADOOP_CONF_DIR:/etc/hadoop/conf}/hdfs-site.xml'
@@ -81,6 +88,7 @@ warehouse:
8188
ivaratorCacheScanPersistThreshold: 100000
8289
ivaratorCacheScanTimeoutMinutes: 60
8390
yieldThresholdMs: 188400
91+
maxYields: 20
8492
modelName: 'DATAWAVE'
8593
edgeModelName: 'DATAWAVE_EDGE'
8694

@@ -162,8 +170,8 @@ datawave:
162170
disableIndexOnlyDocuments: false
163171
indexOnlyFilterFunctionsEnabled: false
164172
includeHierarchyFields: false
165-
hierarchyFieldOptions:
166-
"FOO": "BAR"
173+
#hierarchyFieldOptions:
174+
# "FOO": "BAR"
167175
baseIteratorPriority: ${warehouse.defaults.baseIteratorPriority}
168176
maxIndexScanTimeMillis: ${warehouse.defaults.maxIndexScanTimeMillis}
169177
collapseUids: false
@@ -177,10 +185,10 @@ datawave:
177185
enricherClassNames:
178186
- 'datawave.query.enrich.DatawaveTermFrequencyEnricher'
179187
useFilters: false
180-
filterClassNames:
181-
- 'foo.bar'
182-
filterOptions:
183-
'bar': "foo"
188+
#filterClassNames:
189+
# - 'foo.bar'
190+
#filterOptions:
191+
# 'bar': "foo"
184192
auditType: "ACTIVE"
185193
logicDescription: "Retrieve sharded events/documents, leveraging the global index tables as needed"
186194
eventPerDayThreshold: ${warehouse.defaults.eventPerDayThreshold}
@@ -195,18 +203,22 @@ datawave:
195203
maxOrExpansionFstThreshold: ${warehouse.defaults.maxOrExpansionFstThreshold}
196204
maxFieldIndexRangeSplit: ${warehouse.defaults.maxFieldIndexRangeSplit}
197205
maxIvaratorSources: ${warehouse.defaults.maxIvaratorSources}
206+
maxIvaratorSourceWait: ${warehouse.defaults.maxIvaratorSourceWait}
198207
maxEvaluationPipelines: ${warehouse.defaults.maxEvaluationPipelines}
199208
maxPipelineCachedResults: ${warehouse.defaults.maxPipelineCachedResults}
200209
hdfsSiteConfigURLs: ${warehouse.defaults.hdfsSiteConfigURLs}
201210
zookeeperConfig: ${warehouse.accumulo.zookeepers}
202211
ivaratorCacheDirConfigs:
203212
- basePathURI: "hdfs://${HADOOP_HOST:localhost}:9000/IvaratorCache"
213+
localIvaratorCacheDirConfigs:
214+
- basePathURI: "file:///tmp/IvaratorCache"
204215
ivaratorFstHdfsBaseURIs: ${warehouse.defaults.ivaratorFstHdfsBaseURIs}
205216
ivaratorCacheBufferSize: ${warehouse.defaults.ivaratorCacheBufferSize}
206217
ivaratorMaxOpenFiles: ${warehouse.defaults.ivaratorMaxOpenFiles}
207218
ivaratorCacheScanPersistThreshold: ${warehouse.defaults.ivaratorCacheScanPersistThreshold}
208219
ivaratorCacheScanTimeoutMinutes: ${warehouse.defaults.ivaratorCacheScanTimeoutMinutes}
209220
yieldThresholdMs: ${warehouse.defaults.yieldThresholdMs}
221+
maxYields: ${warehouse.defaults.maxYields}
210222
eventQueryDataDecoratorTransformer:
211223
requestedDecorators:
212224
- "CSV"
@@ -229,7 +241,7 @@ datawave:
229241
sendTimingToStatsd: false
230242
collectQueryMetrics: true
231243
collectTimingDetails: true
232-
logTimingDetails: true
244+
logTimingDetails: false
233245
statsdHost: ${warehouse.statsd.host}
234246
statsdPort: ${warehouse.statsd.port}
235247
evaluationOnlyFields: ""
@@ -435,7 +447,7 @@ datawave:
435447
useEnrichers: false
436448
auditType: "NONE"
437449
logicDescription: "Fast boolean query over indexed fields, only returning fields queried on"
438-
eventPerDayThreshold: 40000
450+
eventPerDayThreshold: 10000
439451
shardsPerDayThreshold: ${warehouse.defaults.shardsPerDayThreshold}
440452
initialMaxTermThreshold: ${warehouse.defaults.initialMaxTermThreshold}
441453
finalMaxTermThreshold: ${warehouse.defaults.finalMaxTermThreshold}
@@ -470,6 +482,52 @@ datawave:
470482
edgeModelName: "DATAWAVE_EDGE"
471483
modelTableName: ${warehouse.tables.model.name}
472484

485+
SSDeepSimilarityQuery:
486+
checkpointable: ${warehouse.defaults.checkpointable}
487+
tableName: ${warehouse.tables.ssdeepIndex.name}
488+
maxResults: -1
489+
maxWork: -1
490+
auditType: "NONE"
491+
indexBuckets: 32
492+
queryThreads: 100
493+
maxRepeatedCharacters: 3
494+
bucketEncodingBase: 32
495+
bucketEncodingLength: 2
496+
logicDescription: "Query that will retrieve similar ssdeep hashes"
497+
498+
SSDeepDiscoveryQuery:
499+
checkpointable: ${warehouse.defaults.checkpointable}
500+
tableName: ${warehouse.tables.index.name}
501+
indexTableName: ${warehouse.tables.index.name}
502+
reverseIndexTableName: ${warehouse.tables.reverseIndex.name}
503+
maxResults: -1
504+
maxWork: -1
505+
modelTableName: ${warehouse.tables.model.name}
506+
modelName: ${warehouse.defaults.modelName}
507+
fullTableScanEnabled: ${warehouse.defaults.fullTableScanEnabled}
508+
allowLeadingWildcard: true
509+
auditType: "NONE"
510+
logicDescription: "Discovery query that returns information from the index about the supplied term(s)"
511+
512+
SSDeepSimilarityDiscoveryQuery:
513+
checkpointable: ${warehouse.defaults.checkpointable}
514+
tableName: ${warehouse.tables.ssdeepIndex.name}
515+
auditType: "NONE"
516+
logicDescription: "Query that will retrieve similar ssdeep hashes and then automatically perform a follow-on Discovery query that returns information from the index about the similar hashes"
517+
518+
KeywordQuery:
519+
tableName: ${warehouse.tables.shard.name}
520+
maxResults: -1
521+
maxWork: -1
522+
auditType: "NONE"
523+
logicDescription: "Query that returns keywords from content"
524+
queryThreads: 100
525+
maxContentChars: 32768
526+
maxKeywords: 10
527+
maxNgrams: 3
528+
minNgrams: 1
529+
maxScore: "0.6f"
530+
473531
uuidTypes: &defaultUuidTypes
474532
'EVENT_ID':
475533
fieldName: 'EVENT_ID'
@@ -496,12 +554,12 @@ datawave:
496554
lookup:
497555
columnVisibility: ""
498556
beginDate: "20100101 000000.000"
499-
types: *defaultUuidTypes
557+
uuidTypes: *defaultUuidTypes
500558

501559
translateid:
502560
columnVisibility: ""
503561
beginDate: "20100101 000000.000"
504-
types: *defaultUuidTypes
562+
uuidTypes: *defaultUuidTypes
505563

506564
edge:
507565
# Uncomment the following line to override the edge beans to load

docker/config/wildfly.yml

Lines changed: 137 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,137 @@
1+
logging:
2+
level:
3+
datawave: info
4+
org.springframework: info
5+
6+
accumulo:
7+
zookeepers: 'localhost:2181'
8+
instanceName: 'accumulo'
9+
username: 'root'
10+
password: 'secret'
11+
12+
query:
13+
page:
14+
shortCircuit:
15+
minutes: 55
16+
size:
17+
default: 10
18+
max: 10000
19+
shortCircuit:
20+
minutes: 30
21+
byte:
22+
trigger: 0
23+
maxCallTime:
24+
minutes: 60
25+
expiration:
26+
minutes: 15
27+
28+
spring.context.debug.dir: "${java.io.tmpdir}/contextDebug-${random.uuid}"
29+
30+
datawave:
31+
cachedresults:
32+
hdfsUri: 'hdfs://localhost:8020/'
33+
exportDir: '/CachedResults'
34+
rowsPerBatch: 10
35+
daysToLive: 1
36+
configuration:
37+
spring:
38+
# if false, then the cdi.bean.context property will be used to find the XML file (default beanRefContext.xml)
39+
# to construct the application context and these datawave.configuration.spring properties will be ignored
40+
configure-from-properties: true
41+
ignoreUnresolvablePlaceholders: false
42+
scan-base-packages:
43+
- "datawave.webservice.config"
44+
- "datawave.microservice.config.marking"
45+
- "datawave.microservice.query.edge.config"
46+
- "datawave.microservice.query.logic.config"
47+
sources:
48+
- "classpath*:datawave/annotation/AnnotationManagerFactory.xml"
49+
- "classpath*:datawave/configuration/spring/CDIBeanPostProcessor.xml"
50+
- "classpath*:datawave/marking/MarkingFunctionsContext.xml"
51+
- "classpath*:datawave/metadata/MetadataHelperContext.xml"
52+
- "classpath*:datawave/metadata/MetadataHelperCacheListenerContext.xml"
53+
- "classpath*:datawave/modification/ModificationServices.xml"
54+
- "classpath*:datawave/query/QueryExpiration.xml"
55+
- "classpath*:datawave/query/CachedResults*.xml"
56+
- "classpath*:datawave/security/PrincipalFactory.xml"
57+
- "classpath*:datawave/security/TestDatawaveUserServiceConfiguration.xml"
58+
- "classpath*:org/apache/accumulo/operations/LookupBean.xml"
59+
- "classpath*:CacheContext.xml"
60+
- "classpath*:QueryLogicFactory.xml"
61+
- "classpath*:EdgeQueryLogicFactory.xml"
62+
- "classpath*:KeywordExtractionQueryLogicFactory.xml"
63+
- "classpath*:SSDeepQueryLogicFactory.xml"
64+
query:
65+
expiration:
66+
minutes: ${query.expiration.minutes}
67+
callTimeout: ${query.maxCallTime.minutes}
68+
shortCircuitCheckTime: ${query.page.size.shortCircuit.minutes}
69+
shortCircuitTimeout: ${query.page.shortCircuit.minutes}
70+
logic:
71+
factory:
72+
enabled: true
73+
importResources: false
74+
logics:
75+
RemoteEventQuery:
76+
tableName: ${warehouse.tables.shard.name}
77+
maxResults: -1
78+
auditType: "NONE"
79+
logicDescription: "Retrieve sharded events/documents, leveraging the global index tables as needed"
80+
metrics:
81+
remoteProcessorThreads: 4
82+
batchSize: 100
83+
maxQueueSize: 250000
84+
maxLatencyMs: 5000
85+
maxShutdownMs: 30000
86+
useRemoteService: false
87+
timely:
88+
host: "localhost"
89+
port: 4242
90+
metricFieldTags:
91+
- "USER"
92+
- "HOST"
93+
- "QUERY_ID"
94+
- "QUERY_LOGIC"
95+
RemoteUserOperations:
96+
scheme: "https"
97+
host: "localhost"
98+
port: 8443
99+
uri: "/DataWave/Security/User/"
100+
RemoteQueryService:
101+
scheme: "https"
102+
host: "query"
103+
port: 8443
104+
uri: "/query/v1/"
105+
modification:
106+
data:
107+
tableName: ${warehouse.tables.metadata.name}
108+
poolName: ${datawave.connection.factory.defaultPool}
109+
handlers:
110+
requiresAudit: false
111+
authorizedRoles:
112+
- "AuthorizedUser"
113+
eventTableName: ${warehouse.tables.shard.name}
114+
metadataTableName: ${warehouse.tables.metadata.name}
115+
indexTableName: ${warehouse.tables.index.name}
116+
reverseIndexTableName: ${warehouse.tables.reverseIndex.name}
117+
securityMarkingExemptFields:
118+
- "exemptField1"
119+
indexOnlyMap:
120+
key1: "value1"
121+
indexOnlySuffixes:
122+
- "suffix1"
123+
contentFields:
124+
- "contentField1"
125+
table:
126+
cache:
127+
enabled: false
128+
zookeepers: ${accumulo.zookeepers}
129+
poolName: ${datawave.connection.factory.defaultPool}
130+
reloadInterval: 86400000
131+
evictionReaperIntervalInSeconds: 30
132+
numLocks: 300
133+
maxRetries: 10
134+
tableNames:
135+
- datawave.metadata
136+
- datawave.queryMetrics_m
137+
- datawave.error_m

docs/pom.xml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -162,7 +162,7 @@
162162
<dependency>
163163
<groupId>org.springframework.security</groupId>
164164
<artifactId>spring-security-core</artifactId>
165-
<version>${version.springframework}</version>
165+
<version>${version.spring-security-core}</version>
166166
</dependency>
167167
<dependency>
168168
<groupId>org.wildfly</groupId>
@@ -318,7 +318,7 @@
318318
<configuration combine.children="append">
319319
<sourcepath>${project.build.directory}/dependency</sourcepath>
320320
<outputDirectory>${project.build.directory}/apidocs</outputDirectory>
321-
<excludePackageNames>generated-sources.*:datawave.microservice.querymetric</excludePackageNames>
321+
<excludePackageNames>generated-sources.*:datawave.microservice.querymetric:datawave.autoconfigure</excludePackageNames>
322322
</configuration>
323323
</execution>
324324
</executions>

microservices/configcheck/pom.xml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -339,8 +339,8 @@
339339
<threadCountClasses>1</threadCountClasses>
340340
<threadCountMethods>0</threadCountMethods>
341341
<threadCountSuites>0</threadCountSuites>
342-
<argLine>@{argLine} -XX:+TieredCompilation -XX:TieredStopAtLevel=1 -Dfile.encoding=UTF8 -Duser.timezone=GMT -Xmx1024m -Dapple.awt.UIElement=true -Djava.security.krb5.realm= -Djava.security.krb5.kdc=</argLine>
343-
<redirectTestOutputToFile>true</redirectTestOutputToFile>
342+
<argLine>-XX:+TieredCompilation -XX:TieredStopAtLevel=1 -Dfile.encoding=UTF8 -Duser.timezone=GMT -Xmx1024m -Dapple.awt.UIElement=true -Djava.security.krb5.realm= -Djava.security.krb5.kdc=</argLine>
343+
<redirectTestOutputToFile>false</redirectTestOutputToFile>
344344
<runOrder>random</runOrder>
345345
</configuration>
346346
</plugin>

0 commit comments

Comments
 (0)