Skip to content

Commit 936f602

Browse files
committed
Simplify code by using "gh api" instead of curl to fetch file lists
Also: - Rename DSRA_REPOSITORY to DSRA_REPO - Rename DSRA_BRANCH to DSRA_REPO_REF - Create the PSRA_REPO and PSRA_REPO_REF too - Replace ?ref=${DSRA_REPO_REF} with ${DSRA_REPO_REF:+?ref=$DSRA_REPO_REF} (Bash’s Parameter Expansion “Use Alternatve Value”) so that ?ref= is printed only when $DSRA_REPO_REF is not empty. - Replace "mapfile" with its synomym "readarray" to make it easier for me to understand. - Use Bash’s Parameter Expansion “Assign Default Values” to simplify code near the beginning of python/add_data.sh - Adjust sample.env accordingly
1 parent 98f6394 commit 936f602

File tree

2 files changed

+61
-56
lines changed

2 files changed

+61
-56
lines changed

python/add_data.sh

Lines changed: 40 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -26,14 +26,15 @@ ENV_VAR_LIST=(
2626
processDSRA processPSRA
2727
)
2828

29-
ADD_DATA_PRINT_FUNCNAME=${ADD_DATA_PRINT_FUNCNAME:-true}
30-
ADD_DATA_PRINT_LINENO=${ADD_DATA_PRINT_LINENO:-true}
31-
ADD_DATA_REDUCE_DISK_USAGE=${ADD_DATA_REDUCE_DISK_USAGE:-true}
29+
: "${ADD_DATA_PRINT_FUNCNAME:=true}"
30+
: "${ADD_DATA_PRINT_LINENO:=true}"
31+
: "${ADD_DATA_REDUCE_DISK_USAGE:=true}"
3232

33-
DSRA_REPOSITORY=OpenDRR/earthquake-scenarios
34-
DSRA_BRANCH=master
35-
# DSRA_REPOSITORY=OpenDRR/DSRA-processing
36-
# DSRA_BRANCH=six-new-scenarios-sep-2022
33+
: "${PSRA_REPO:=OpenDRR/seismic-risk-model}"
34+
: "${PSRA_REPO_REF:=master}"
35+
36+
: "${DSRA_REPO:=OpenDRR/earthquake-scenarios}"
37+
: "${DSRA_REPO_REF:=master}"
3738

3839
PT_LIST=(AB BC MB NB NL NS NT NU ON PE QC SK YT)
3940
# PT_LIST=(AB MB NB NL NS NT NU ON PE QC SK YT)
@@ -194,7 +195,7 @@ download_luts() {
194195
# on master branch, making into v1.1.0 release on 2023-09-12.
195196
# See https://github.com/OpenDRR/seismic-risk-model/pull/92
196197
RUN fetch_csv seismic-risk-model \
197-
scripts/sourceTypes.csv?ref=master
198+
scripts/sourceTypes.csv${PSRA_REPO_REF:+?ref=$PSRA_REPO_REF}
198199
}
199200

200201
# run_psql runs PostgreSQL queries from a given input SQL file.
@@ -298,12 +299,10 @@ fetch_psra_csv_from_model() {
298299
model=$1
299300

300301
for PT in "${PT_LIST[@]}"; do
301-
RUN curl -H "Authorization: token ${GITHUB_TOKEN}" \
302-
--retry-all-errors --retry-delay 5 --retry-max-time 0 --retry 360 \
303-
-o "${PT}.json" \
304-
-L "https://api.github.com/repos/OpenDRR/seismic-risk-model/contents/$model/output/${PT}?ref=master"
305-
306-
RUN mapfile -t DOWNLOAD_LIST < <(jq -r '.[].url | select(. | contains(".csv"))' "${PT}.json")
302+
RUN readarray -t DOWNLOAD_LIST < <( \
303+
gh api "repos/${PSRA_REPO}/contents/${model}/output/${PT}${PSRA_REPO_REF:+?ref=$PSRA_REPO_REF}" \
304+
-q '.[].url | select(. | contains(".csv"))' \
305+
)
307306

308307
mkdir -p "$model/$PT"
309308
( cd "$model/$PT"
@@ -322,8 +321,6 @@ fetch_psra_csv_from_model() {
322321
# (safe for cH_${PT}_hmaps_xref.csv)
323322
RUN sed -i -r $'1{/^(\xEF\xBB\xBF)?#,/d}' "$FILENAME"
324323
done
325-
# TODO: Use a different for ${PT}.json, and keep for debugging
326-
RUN rm -f "${PT}.json"
327324
)
328325
done
329326
}
@@ -336,12 +333,10 @@ fetch_psra_csv_from_national_model() {
336333
model=$1
337334
PT=Canada
338335

339-
RUN curl -H "Authorization: token ${GITHUB_TOKEN}" \
340-
--retry-all-errors --retry-delay 5 --retry-max-time 0 --retry 360 \
341-
-o "${PT}.json" \
342-
-L "https://api.github.com/repos/OpenDRR/seismic-risk-model/contents/$model/output/Canada?ref=master"
343-
344-
RUN mapfile -t DOWNLOAD_LIST < <(jq -r '.[].url | select(. | contains(".csv"))' "${PT}.json")
336+
RUN readarray -t DOWNLOAD_LIST < <( \
337+
gh api "repos/${PSRA_REPO}/contents/${model}/output/Canada${PSRA_REPO_REF:+?ref=$PSRA_REPO_REF}" \
338+
-q '.[].url | select(. | contains(".csv"))' \
339+
)
345340

346341
mkdir -p "$model/$PT"
347342
( cd "$model/$PT"
@@ -360,8 +355,6 @@ fetch_psra_csv_from_national_model() {
360355
# (safe for cH_${PT}_hmaps_xref.csv)
361356
RUN sed -i -r $'1{/^(\xEF\xBB\xBF)?#,/d}' "$FILENAME"
362357
done
363-
# TODO: Use a different for ${PT}.json, and keep for debugging
364-
RUN rm -f "${PT}.json"
365358
)
366359

367360
}
@@ -494,7 +487,7 @@ get_git_lfs_pointers_of_csv_files() {
494487
RUN rm -rf "$base_dir"
495488
RUN mkdir -p "$base_dir"
496489
( cd "$base_dir" && \
497-
for repo in ${DSRA_REPOSITORY} OpenDRR/openquake-inputs OpenDRR/seismic-risk-model; do
490+
for repo in "${DSRA_REPO}" OpenDRR/openquake-inputs "${PSRA_REPO}"; do
498491
RUN git clone --filter=blob:none --no-checkout "https://${GITHUB_TOKEN}@github.com/${repo}.git"
499492
is_dry_run || \
500493
( RUN cd "$(basename "$repo")" && \
@@ -565,14 +558,18 @@ import_exposure_ancillary_db() {
565558
import_raw_psra_tables() {
566559
LOG "## Importing Raw PSRA Tables"
567560

568-
LOG "### Get list of provinces & territories"
569-
RUN curl -H "Authorization: token ${GITHUB_TOKEN}" \
570-
--retry-all-errors --retry-delay 5 --retry-max-time 0 --retry 360 \
571-
-o output.json \
572-
-L https://api.github.com/repos/OpenDRR/seismic-risk-model/contents/eDamage/output?ref=master
573-
574-
# TODO: Compare PT_LIST with FETCHED_PT_LIST
575-
RUN mapfile -t FETCHED_PT_LIST < <(jq -r '.[].name' output.json)
561+
LOG "### Get list of provinces & territories from ${PSRA_REPO}"
562+
RUN readarray -t FETCHED_PT_LIST < <( \
563+
gh api "repos/${PSRA_REPO}/contents/eDamage/output${PSRA_REPO_REF:+?ref=$PSRA_REPO_REF}" \
564+
-q '.[].name' \
565+
)
566+
if [[ "${PT_LIST[*]}" == "${FETCHED_PT_LIST[*]}" ]]; then
567+
LOG "PT_LIST and FETCHED_PT_LIST are equal: (${PT_LIST[*]})"
568+
else
569+
WARN "PT_LIST and FETCHED_PT_LIST differ:"
570+
WARN "Want: (${PT_LIST[*]})"
571+
WARN "Got : (${FETCHED_PT_LIST[*]})"
572+
fi
576573

577574
# Disable cDamage. As @wkhchow noted in commit 922c409:
578575
# change cDamage reference to eDamage (cDamage will be removed eventually)
@@ -712,22 +709,19 @@ post_process_psra_tables() {
712709

713710
import_earthquake_scenarios() {
714711
LOG "## Get list of earthquake scenarios"
715-
RUN curl -H "Authorization: token ${GITHUB_TOKEN}" \
716-
--retry-all-errors --retry-delay 5 --retry-max-time 0 --retry 360 \
717-
-o FINISHED.json \
718-
-L https://api.github.com/repos/${DSRA_REPOSITORY}/contents/FINISHED?ref=${DSRA_BRANCH}
712+
gh api "repos/${DSRA_REPO}/contents/FINISHED${DSRA_REPO_REF:+?ref=$DSRA_REPO_REF}" > FINISHED.json
719713

720714
# s_lossesbyasset_ACM6p5_Beaufort_r1_299_b.csv → ACM6p5_Beaufort
721-
RUN mapfile -t EQSCENARIO_LIST < <(jq -r '.[].name | scan("(?<=s_lossesbyasset_).*(?=_r1)")' FINISHED.json)
715+
RUN readarray -t EQSCENARIO_LIST < <(jq -r '.[].name | scan("(?<=s_lossesbyasset_).*(?=_r1)")' FINISHED.json)
722716

723717
# s_lossesbyasset_ACM6p5_Beaufort_r1_299_b.csv → ACM6p5_Beaufort_r1_299_b.csv
724-
RUN mapfile -t EQSCENARIO_LIST_LONGFORM < <(jq -r '.[].name | scan("(?<=s_lossesbyasset_).*r1.*\\.csv")' FINISHED.json)
718+
RUN readarray -t EQSCENARIO_LIST_LONGFORM < <(jq -r '.[].name | scan("(?<=s_lossesbyasset_).*r1.*\\.csv")' FINISHED.json)
725719

726720
LOG "## Importing scenario outputs into PostGIS"
727721
for eqscenario in "${EQSCENARIO_LIST[@]}"; do
728722
RUN python3 DSRA_outputs2postgres_lfs.py \
729-
--dsraRepo=${DSRA_REPOSITORY} \
730-
--dsraRepoBranch=${DSRA_BRANCH} \
723+
--dsraRepo="${DSRA_REPO}" \
724+
--dsraRepoBranch="${DSRA_REPO_REF}" \
731725
--columnsINI=DSRA_outputs2postgres.ini \
732726
--eqScenario="$eqscenario"
733727
done
@@ -736,14 +730,14 @@ import_earthquake_scenarios() {
736730
import_shakemap() {
737731
LOG "## Importing Shakemap"
738732
# Make a list of Shakemaps in the repo and download the raw csv files
739-
mapfile -t DOWNLOAD_URL_LIST < <(jq -r '.[].url | scan(".*s_shakemap_.*(?<!MMI)\\.csv")' FINISHED.json)
733+
readarray -t DOWNLOAD_URL_LIST < <(jq -r '.[].url | scan(".*s_shakemap_.*(?<!MMI)\\.csv")' FINISHED.json)
740734
for shakemap in "${DOWNLOAD_URL_LIST[@]}"; do
741735
# Get the shakemap
742736
shakemap_filename=$( echo "$shakemap" | cut -f9- -d/ | cut -f1 -d?)
743737
RUN curl -H "Authorization: token ${GITHUB_TOKEN}" \
744738
--retry-all-errors --retry-delay 5 --retry-max-time 0 --retry 360 \
745739
-o "$shakemap_filename" \
746-
-L "${shakemap}?ref=${DSRA_BRANCH}"
740+
-L "${shakemap}${DSRA_REPO_REF:+?ref=$DSRA_REPO_REF}"
747741
is_dry_run || DOWNLOAD_URL=$(jq -r '.download_url' "$shakemap_filename")
748742
LOG "$DOWNLOAD_URL"
749743
RUN curl -o "$shakemap_filename" \
@@ -755,7 +749,7 @@ import_shakemap() {
755749
done
756750

757751
# Run Create_table_shakemap_update.sql or Create_table_shakemap_update_ste.sql
758-
RUN mapfile -t SHAKEMAP_LIST < <(jq -r '.[].name | scan("s_shakemap_.*\\.csv")' FINISHED.json)
752+
RUN readarray -t SHAKEMAP_LIST < <(jq -r '.[].name | scan("s_shakemap_.*\\.csv")' FINISHED.json)
759753
for ((i=0;i<${#EQSCENARIO_LIST_LONGFORM[@]};i++)); do
760754
item=${EQSCENARIO_LIST_LONGFORM[i]}
761755
#echo ${EQSCENARIO_LIST_LONGFORM[i]}
@@ -781,8 +775,8 @@ import_shakemap() {
781775
import_rupture_model() {
782776
LOG "## Importing Rupture Model"
783777
RUN python3 DSRA_ruptures2postgres.py \
784-
--dsraRuptureRepo=${DSRA_REPOSITORY} \
785-
--dsraRuptureBranch=${DSRA_BRANCH}
778+
--dsraRuptureRepo="${DSRA_REPO}" \
779+
--dsraRuptureBranch="${DSRA_REPO_REF}"
786780

787781
LOG "## Generating indicator views"
788782
for item in "${EQSCENARIO_LIST_LONGFORM[@]}"; do

sample.env

Lines changed: 21 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
# Sample .env file used by Docker Compose for python-opendrr service
2+
# Note: Do not add any space on either side of the equal signs!
23

34
POSTGRES_USER=postgres
45
POSTGRES_PASS=password
@@ -13,22 +14,32 @@ ES_ENDPOINT=http://elasticsearch-opendrr:9200
1314
ES_USER=
1415
ES_PASS=
1516

16-
processDSRA=true #Process DSRA through PostGIS
17-
loadDsraScenario=true #load DSRA data into ElasticSearch
18-
processPSRA=true #Process PSRA through PostGIS
19-
loadPsraModels=true #load PSRA data into ElasticSearch
17+
processDSRA=true # Process DSRA through PostGIS
18+
loadDsraScenario=true # load DSRA data into ElasticSearch
19+
processPSRA=true # Process PSRA through PostGIS
20+
loadPsraModels=true # load PSRA data into ElasticSearch
2021
loadHazardThreat=false
2122
loadPhysicalExposure=true
2223
loadRiskDynamics=true
2324
loadSocialFabric=true
24-
loadHexGrid = false
25+
loadHexGrid=false
2526

2627
# DSRA scenario selection
2728
autoParseDSRA=false
2829
dsraScenarioList=
2930

30-
# For testing and debugging
31-
#ADD_DATA_DRY_RUN=true
32-
#ADD_DATA_DOWNLOAD_ONLY=true
33-
ADD_DATA_PRINT_FUNCNAME=true
34-
ADD_DATA_PRINT_LINENO=true
31+
# Default values for git repositories and references (branches/tags)
32+
# to DSRA and PSRA data:
33+
34+
# DSRA_REPO=OpenDRR/earthquake-scenarios
35+
# DSRA_REPO_REF=master
36+
37+
# PSRA_REPO=OpenDRR/seismic-risk-model
38+
# PSRA_REPO_REF=master
39+
40+
# For testing and debugging:
41+
42+
# ADD_DATA_PRINT_FUNCNAME=true
43+
# ADD_DATA_PRINT_LINENO=true
44+
# ADD_DATA_REDUCE_DISK_USAGE=false
45+
# ADD_DATA_DRY_RUN=false

0 commit comments

Comments
 (0)