Skip to content

Commit d99c1e4

Browse files
authored
Merge pull request #889 from hubmapconsortium/shirey/remove-index-perms-option
remove suspend_indexing_and_acls from publish endpoint and never rein…
2 parents acb6353 + 9904a5a commit d99c1e4

File tree

1 file changed

+28
-41
lines changed

1 file changed

+28
-41
lines changed

src/app.py

Lines changed: 28 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -1146,10 +1146,6 @@ def publish_datastage(identifier):
11461146
if r.ok is False:
11471147
raise ValueError("Cannot find specimen with identifier: " + identifier)
11481148
dataset_uuid = json.loads(r.text)['hm_uuid']
1149-
suspend_indexing_and_acls = string_helper.isYes(request.args.get('suspend-indexing-and-acls'))
1150-
no_indexing_and_acls = False
1151-
if suspend_indexing_and_acls:
1152-
no_indexing_and_acls = True
11531149

11541150
entities_to_reindex = []
11551151
with neo4j_driver_instance.session() as neo_session:
@@ -1330,8 +1326,7 @@ def publish_datastage(identifier):
13301326

13311327
collection = {'description': dataset_description, 'title': "A collection of datasets from Publication: " + entity['title'], 'contacts': dataset_contacts, 'contributors': dataset_contributors, "dataset_uuids": parent_uuids, "group_uuid": dataset_group_uuid}
13321328
post_url = f"{commons_file_helper.ensureTrailingSlashURL(app.config['ENTITY_WEBSERVICE_URL'])}" \
1333-
f"entities/collection" \
1334-
f"{'?reindex=false' if suspend_indexing_and_acls else ''}"
1329+
f"entities/collection?reindex=false"
13351330
response = requests.post(post_url, json=collection, headers={'Authorization': 'Bearer ' + token, 'X-Hubmap-Application': 'ingest-api'},verify=False)
13361331
if not response.status_code == 200:
13371332
error_msg = f"Faled to create collection for Publication {dataset_uuid} failed with code:{response.status_code} message:" + response.text
@@ -1345,8 +1340,7 @@ def publish_datastage(identifier):
13451340
collection_updates['registered_doi'] = collection_doi_info['registered_doi']
13461341
collection_updates['doi_url'] = collection_doi_info['doi_url']
13471342
put_url = f"{commons_file_helper.ensureTrailingSlashURL(app.config['ENTITY_WEBSERVICE_URL'])}" \
1348-
f"entities/{created_collection['uuid']}" \
1349-
f"{'?reindex=false' if suspend_indexing_and_acls else ''}"
1343+
f"entities/{created_collection['uuid']}?reindex=false"
13501344
response = requests.put(put_url, json=collection_updates, headers={'Authorization': 'Bearer ' + token, 'X-Hubmap-Application': 'ingest-api'}, verify=False)
13511345
if not response.status_code == 200:
13521346
error_msg = f"Update to Collection {created_collection['uuid']} failed, collection is attached to Publication {dataset_uuid}. Publication not Published, but Collection created failed with code:{response.status_code} message: " + response.text
@@ -1357,40 +1351,45 @@ def publish_datastage(identifier):
13571351
#add status change to 'Published", except for component datasets which must be handled separately
13581352
if not is_component:
13591353
dataset_updates['status'] = 'Published'
1360-
1354+
13611355
#update the dataset
13621356
put_url = f"{commons_file_helper.ensureTrailingSlashURL(app.config['ENTITY_WEBSERVICE_URL'])}" \
1363-
f"entities/{dataset_uuid}" \
1364-
f"{'?reindex=false' if suspend_indexing_and_acls else ''}"
1357+
f"entities/{dataset_uuid}?reindex=false"
13651358
response = requests.put(put_url, json=dataset_updates, headers={'Authorization': 'Bearer ' + token, 'X-Hubmap-Application': 'ingest-api'}, verify=False)
13661359
if not response.status_code == 200:
13671360
error_msg = f"Update to Dataset {dataset_uuid} failed with code:{response.status_code} message:" + response.text
13681361
logger.error(error_msg)
13691362
return Response(error_msg, response.status_code)
13701363

13711364
#for component datasets we must still change the status in Neo4j because the entity-api put locks us out
1365+
#for all (component and non-component) datasets, we must update the published_timestamp field directly in Neo4j because it is
1366+
#immutable in entity-api
1367+
#
13721368
if is_component:
1373-
update_query = f"match (ds {{uuid:'{dataset_uuid}'}}) set ds.status = 'Published'"
1374-
try:
1375-
with neo4j_driver_instance.session() as neo_session:
1376-
tx = neo_session.begin_transaction()
1377-
result = tx.run(update_query)
1378-
tx.commit()
1379-
except TransactionError as e:
1380-
if tx and tx.closed() == False:
1381-
tx.rollback()
1382-
logger.exception(e)
1383-
return Response(f"Error while updating status on Component dataset {dataset_uuid}. See logs. Transaction error: {e}.", 500)
1384-
except Exception as ex:
1385-
logger.exception(ex)
1386-
return Response(f"Unexpected error while updating status on Component datast {dataset_uuid}. See logs. {ex}", 500)
1369+
set_clause = "set ds.status = 'Published', ds.published_timestamp = timestamp()"
1370+
else:
1371+
set_clause = "set ds.published_timestamp = timestamp()"
1372+
1373+
update_query = f"match (ds {{uuid:'{dataset_uuid}'}}) {set_clause}"
1374+
try:
1375+
with neo4j_driver_instance.session() as neo_session:
1376+
tx = neo_session.begin_transaction()
1377+
result = tx.run(update_query)
1378+
tx.commit()
1379+
except TransactionError as e:
1380+
if tx and tx.closed() == False:
1381+
tx.rollback()
1382+
logger.exception(e)
1383+
return Response(f"Error while updating published_timestamp and status (for component only) on dataset {dataset_uuid}. See logs. Transaction error: {e}.", 500)
1384+
except Exception as ex:
1385+
logger.exception(ex)
1386+
return Response(f"Unexpected error while updating published_timestamp and status (for component only) on dataset {dataset_uuid}. See logs. {ex}", 500)
13871387

13881388
# if all else worked set the list of ids to public that need to be public
13891389
base_update_url = f"{commons_file_helper.ensureTrailingSlashURL(app.config['ENTITY_WEBSERVICE_URL'])}entities/"
1390-
update_url_suffix = f"{'?reindex=false' if suspend_indexing_and_acls else ''}"
13911390
headers={'Authorization': 'Bearer ' + token, 'X-Hubmap-Application': 'ingest-api', 'X-HuBMAP-Update-Override': app.config['LOCKED_ENTITY_UPDATE_OVERRIDE_KEY']}
13921391
for upid in uuids_for_public:
1393-
update_url = base_update_url + upid + update_url_suffix
1392+
update_url = base_update_url + upid + "?reindex=false"
13941393
resp = requests.put(update_url, json={'data_access_level': 'public'}, headers=headers, verify=False)
13951394
if not resp.status_code == 200:
13961395
error_message = f"Error while updating data_access_level on entity: {upid}, Dataset {dataset_uuid} may be published, but not all ancestors may be set to public and metadata files have not been updated!! {resp.text}"
@@ -1434,7 +1433,7 @@ def publish_datastage(identifier):
14341433
# This must be done after ALL files are written because calling it with published=True causes the
14351434
# directory to be made READ/EXECUTE only and any attempt to write a file will cause a server 500 error.
14361435
acls_cmd = ingest_helper.set_dataset_permissions(dataset_uuid, dataset_group_uuid, data_access_level,
1437-
True, no_indexing_and_acls)
1436+
True, True)
14381437

14391438

14401439
#find all of the files that match *metadata.tsv under the dataset's directory
@@ -1458,19 +1457,7 @@ def publish_datastage(identifier):
14581457
tsv_data.to_csv(tsv_file, sep='\t', index=False)
14591458

14601459

1461-
if no_indexing_and_acls:
1462-
r_val = {'acl_cmd': acls_cmd, 'entities_for_indexing': entities_to_reindex, 'relink_cmd': relink_cmd}
1463-
1464-
else:
1465-
r_val = {'acl_cmd': '', 'entities_for_indexing': [], 'relink_cmd': relink_cmd}
1466-
1467-
if not no_indexing_and_acls:
1468-
for ent_uuid in entities_to_reindex:
1469-
try:
1470-
rspn = requests.put(app.config['SEARCH_WEBSERVICE_URL'] + "/reindex/" + entity_uuid, headers={'Authorization': request.headers["AUTHORIZATION"]})
1471-
logger.info(f"Publishing {identifier} indexed entity {entity_uuid} with status {rspn.status_code}")
1472-
except:
1473-
logger.exception(f"While publishing {identifier} Error happened when calling reindex web service for entity {ent_uuid}")
1460+
r_val = {'acl_cmd': acls_cmd, 'entities_for_indexing': entities_to_reindex, 'relink_cmd': relink_cmd}
14741461

14751462
#inner function to copy public information from a protected dataset to a public dataset
14761463
def copy_protected_to_public():

0 commit comments

Comments
 (0)