Skip to content

Commit 77ecf44

Browse files
committed
Rebuild pattern matching by parsing the bids schema, fix a small bug with repeated data_type in some filenames/paths
1 parent 15f4518 commit 77ecf44

File tree

2 files changed

+87
-52
lines changed

2 files changed

+87
-52
lines changed

generate_entity_patterns.sh

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
#!/usr/bin/env bash
2+
3+
set -euo pipefail
4+
5+
entities_order=($(jq -r .rules.entities.[] schema.json))
6+
entities_names_ordered=()
7+
8+
for entity in ${entities_order[@]}; do
9+
entity_name=$(jq -r .objects.entities.${entity}.name schema.json)
10+
entities_names_ordered+=(${entity_name})
11+
entity_format=$(jq -r .objects.entities.${entity}.format schema.json)
12+
if [[ ${entity_format} == "label" ]]; then
13+
echo \""*(_${entity_name}-+([a-zA-Z0-9]))"\"
14+
elif [[ ${entity_format} == "index" ]]; then
15+
echo \""*(_${entity_name}-+([0-9]))"\"
16+
else
17+
echo "Unrecognized entity_format ${entity_format}" 1>&2
18+
exit 1
19+
fi
20+
done
21+
22+
printf "%s," ${entities_order[@]}
23+
echo
24+
printf "%s " ${entities_names_ordered[@]}
25+
echo

libBIDS.sh

Lines changed: 62 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
#!/usr/bin/env bash
22

3-
if (( BASH_VERSINFO[0] < 4 || (BASH_VERSINFO[0] == 4 && BASH_VERSINFO[1] < 3) )); then
3+
if ((BASH_VERSINFO[0] < 4 || (BASH_VERSINFO[0] == 4 && BASH_VERSINFO[1] < 3))); then
44
echo "Error: bash >= 4.3 is required for this script" >&2
55
exit 1
66
fi
@@ -220,7 +220,9 @@ _libBIDSsh_parse_filename() {
220220
# Store the full path and filename
221221
arr[path]=$(tr -s / <<<"${path}")
222222
arr[extension]="${filename#*.}"
223-
arr[type]=$(grep -E -o '(func|dwi|fmap|anat|perf|meg|eeg|ieeg|beh|pet|micr|nirs|motion|mrs)' <<<$(basename $(dirname "${path}")) || echo "NA")
223+
# Extract from schema
224+
# jq -r .objects.datatypes.[].value schema.json | paste -s -d'|'
225+
arr[data_type]=$(grep -o -E "(anat|beh|dwi|eeg|fmap|func|ieeg|meg|micr|motion|mrs|perf|pet|nirs)" <<<$(dirname ${path}) | head -1 || echo "NA")
224226
arr[derivatives]=$(grep -o 'derivatives/.*/' <<<"${path}" | awk -F/ '{print $2}' || echo "NA")
225227

226228
local name_no_ext="${filename%%.*}"
@@ -245,13 +247,12 @@ _libBIDSsh_parse_filename() {
245247

246248
key_order+=("suffix")
247249
key_order+=("extension")
248-
key_order+=("type")
250+
key_order+=("data_type")
249251
key_order+=("derivatives")
250252
key_order+=("path")
251253

252254
# Store the key order in the array
253255
arr[_key_order]="${key_order[*]}"
254-
255256
}
256257

257258
libBIDSsh_extension_json_rows_to_column_json_path() {
@@ -337,49 +338,54 @@ libBIDSsh_parse_bids_to_csv() {
337338
# Build the pattern piece by piece
338339
local base_pattern="*"
339340

340-
# Optional components
341-
local optional_components=(
342-
"*(_ses-+([a-zA-Z0-9]))" # _ses-<label>
343-
"*(_task-+([a-zA-Z0-9]))" # _task-<label>
344-
"*(_acq-+([a-zA-Z0-9]))" # _acq-<label>
345-
"*(_ce-+([a-zA-Z0-9]))" # _ce-<label>
346-
"*(_rec-+([a-zA-Z0-9]))" # _rec-<label>
347-
"*(_dir-+([a-zA-Z0-9]))" # _dir-<label>
348-
"*(_run-+([0-9]))" # _run-<index>
349-
"*(_recording-+([a-zA-Z0-9]))" # _recording-<label>
350-
"*(_mod-+([a-zA-Z0-9]))" # _mod-<label>
351-
"*(_echo-+([0-9]))" # _echo-<index>
352-
"*(_part-@(mag|phase|real|imag))" # _part-<mag|phase|real|imag>
353-
"*(_chunk-+([0-9]))" # _chunk-<index>
341+
# Entities components
342+
# Extracted from schema with generate_entity_patterns.sh
343+
local entities=(
344+
"*(_sub-+([a-zA-Z0-9]))"
345+
"*(_ses-+([a-zA-Z0-9]))"
346+
"*(_sample-+([a-zA-Z0-9]))"
347+
"*(_task-+([a-zA-Z0-9]))"
348+
"*(_tracksys-+([a-zA-Z0-9]))"
349+
"*(_acq-+([a-zA-Z0-9]))"
350+
"*(_nuc-+([a-zA-Z0-9]))"
351+
"*(_voi-+([a-zA-Z0-9]))"
352+
"*(_ce-+([a-zA-Z0-9]))"
353+
"*(_trc-+([a-zA-Z0-9]))"
354+
"*(_stain-+([a-zA-Z0-9]))"
355+
"*(_rec-+([a-zA-Z0-9]))"
356+
"*(_dir-+([a-zA-Z0-9]))"
357+
"*(_run-+([0-9]))"
358+
"*(_mod-+([a-zA-Z0-9]))"
359+
"*(_echo-+([0-9]))"
360+
"*(_flip-+([0-9]))"
361+
"*(_inv-+([0-9]))"
362+
"*(_mt-+([a-zA-Z0-9]))"
363+
"*(_part-+([a-zA-Z0-9]))"
364+
"*(_proc-+([a-zA-Z0-9]))"
365+
"*(_hemi-+([a-zA-Z0-9]))"
366+
"*(_space-+([a-zA-Z0-9]))"
367+
"*(_split-+([0-9]))"
368+
"*(_recording-+([a-zA-Z0-9]))"
369+
"*(_chunk-+([0-9]))"
370+
"*(_seg-+([a-zA-Z0-9]))"
371+
"*(_res-+([a-zA-Z0-9]))"
372+
"*(_den-+([a-zA-Z0-9]))"
373+
"*(_label-+([a-zA-Z0-9]))"
374+
"*(_desc-+([a-zA-Z0-9]))"
354375
)
355376

356-
# Anatomical suffixes
357-
# UNIT1 suffix is technically incorrect as it cannot coexist with the _part-@(mag|phase|real|imag)
358-
local suffixes="_@(FLAIR|PDT2|PDw|T1w|T2starw|T2w|UNIT1|angio|inplaneT1|inplaneT2"
359-
# Parametric map suffixes
360-
suffixes+="|Chimap|M0map|MTRmap|MTVmap|MTsat|MWFmap|PDmap|R1map|R2map|R2starmap|RB1map|S0map|T1map|T1rho|T2map|T2starmap|TB1map"
361-
# Defacing mask
362-
suffixes+="|defacemask"
363-
# Depreciated anatomical suffixes
364-
# FLASH PD and T2star are depreciated but we support them
365-
suffixes+="|FLASH|PD|T2star"
366-
# Functional images
367-
suffixes+="|bold|cbv|phase|sbref|noRF|events|physio|stim"
368-
# Diffusion images
369-
suffixes+="|dwi|sbref"
370-
# Perfusion images
371-
suffixes+="|asl|m0scan|aslcontext|noRF"
372-
# Field maps
373-
suffixes+="|magnitude1|magnitude2|phasediff|phase1|phase2|fieldmap|magnitude|epi"
374-
# Modality agnostic files
375-
suffixes+="|scans|sessions)"
377+
# Suffixes from schema.json
378+
# jq -r .objects.suffixes.[].value schema.json | paste -s -d'|'
379+
suffixes+="_@(2PE|BF|Chimap|CARS|CONF|DIC|DF|FLAIR|FLASH|FLUO|IRT1|M0map|MEGRE|MESE|MP2RAGE|MPE|MPM|MTR|MTRmap|MTS|MTVmap|MTsat|MWFmap|NLO|OCT|PC|PD|PDT2|PDmap|PDw|PLI|R1map|R2map|R2starmap|RB1COR|RB1map|S0map|SEM|SPIM|SR|T1map|T1rho|T1w|T2map|T2star|T2starmap|T2starw|T2w|TB1AFI|TB1DAM|TB1EPI|TB1RFM|TB1SRGE|TB1TFL|TB1map|TEM|UNIT1|VFA|angio|asl|aslcontext|asllabeling|beh|blood|bold|cbv|channels|coordsystem|defacemask|descriptions|dseg|dwi|eeg|electrodes|epi|events|fieldmap|headshape|XPCT|ieeg|inplaneT1|inplaneT2|m0scan|magnitude|magnitude1|magnitude2|markers|mask|meg|motion|mrsi|mrsref|nirs|noRF|optodes|pet|phase|phase1|phase2|phasediff|photo|physio|probseg|sbref|scans|sessions|stim|svs|uCT|unloc)"
376380

377381
# Allowed extensions
378-
local extensions="@(.nii|.json|.tsv|bval|bvec)?(.gz)"
382+
# jq -r .objects.extensions.[].value schema.json | paste -s -d'|'
383+
# Stripped the ".*" and directory entries manually
384+
local extensions="@(.ave|.bdf|.bval|.bvec|.chn|.con|.dat|.dlabel.nii|.edf|.eeg|.fdt|.fif|.jpg|.json|.kdf|.label.gii|.md||.mhd|.mrk|.nii|.nii.gz|.nwb|.ome.btf|.ome.tif|.png|.pos|.raw|.rst|.set|.snirf|.sqd|.tif|.trg|.tsv|.tsv.gz|.txt|.vhdr|.vmrk)"
379385

380386
# Piece together the pattern
381387
local pattern=${base_pattern}
382-
for entry in "${optional_components[@]}"; do
388+
for entry in "${entities[@]}"; do
383389
pattern+=${entry}
384390
done
385391
pattern+=${suffixes}
@@ -395,11 +401,15 @@ libBIDSsh_parse_bids_to_csv() {
395401
shopt -u nullglob
396402
shopt -u globstar
397403

398-
echo "derivatives,sub,ses,type,task,acq,ce,rec,dir,run,recording,mod,echo,part,chunk,suffix,extension,path"
404+
# Order of entities from generate_entity_patterns.sh
405+
entities_displayname_order="subject,session,sample,task,tracksys,acquisition,nucleus,volume,ceagent,tracer,stain,reconstruction,direction,run,modality,echo,flip,inversion,mtransfer,part,processing,hemisphere,space,split,recording,chunk,segmentation,resolution,density,label,description"
406+
entities_order="sub ses sample task tracksys acq nuc voi ce trc stain rec dir run mod echo flip inv mt part proc hemi space split recording chunk seg res den label desc"
407+
408+
echo "derivatives,data_type,${entities_displayname_order},suffix,extension,path"
399409
for file in "${files[@]}"; do
400410
declare -A file_info
401411
_libBIDSsh_parse_filename "${file}" file_info
402-
for key in derivatives sub ses type task acq ce rec dir run recording mod echo part chunk suffix extension path; do
412+
for key in derivatives data_type ${entities_order} suffix extension path; do
403413
if [[ "${file_info[${key}]+abc}" ]]; then
404414
echo -n "${file_info[${key}]},"
405415
else
@@ -617,16 +627,16 @@ libBIDS_csv_iterator() {
617627
}
618628

619629
libBIDSsh_json_to_associative_array() {
620-
local json_file="$1"
621-
declare -n arr_ref="$2" # nameref to the associative array
622-
623-
# Use jq to process the JSON file and output key-value pairs with type prefixes
624-
while IFS="=" read -r key value; do
625-
# Remove quotes from key (jq outputs keys with quotes)
626-
key="${key%\"}"
627-
key="${key#\"}"
628-
arr_ref["$key"]="$value"
629-
done < <(jq -r 'to_entries[] |
630+
local json_file="$1"
631+
declare -n arr_ref="$2" # nameref to the associative array
632+
633+
# Use jq to process the JSON file and output key-value pairs with type prefixes
634+
while IFS="=" read -r key value; do
635+
# Remove quotes from key (jq outputs keys with quotes)
636+
key="${key%\"}"
637+
key="${key#\"}"
638+
arr_ref["$key"]="$value"
639+
done < <(jq -r 'to_entries[] |
630640
"\(.key)=\(
631641
if .value|type == "array" then "array:" + (.value|join(","))
632642
elif .value|type == "object" then "object:" + (.value|tostring)

0 commit comments

Comments
 (0)