Skip to content

Commit a7a7f42

Browse files
committed
auto create data source on project publish
When a project is published, we should always create a datasource object based on the project storage type because with the DataSource  all access will go though it, otherwise the files wont be accessible. This commit creates the DataSource type automatically when project is published. The type of DataSource created depends on the settings. For Physionet, the default data storage is direct, and for HDN its GCP(defined by STORAGE_TYPE). For auto creation of DataSource, i added a new environment variable to control type of DataSource. On second thought , i don't think we need the new environment variable as we only have two type of storage when project is published, and the new env variable don't server any new purpose (i will wait for feedback from reviewers)
1 parent b09b1b9 commit a7a7f42

File tree

4 files changed

+25
-1
lines changed

4 files changed

+25
-1
lines changed

.env.example

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -172,3 +172,9 @@ DEFAULT_NUMBER_OF_APPLICATIONS_TO_REMIND = 5
172172

173173
# minimum number of word needed for research_summary field for Credentialing Model.
174174
MIN_WORDS_RESEARCH_SUMMARY_CREDENTIALING = 20
175+
176+
177+
# Data Source configurations
178+
# Data Source determines how the data is stored(direct on server, gcs, big query, aws s3) and accessed(Direct, google, aws, Research Environment)
179+
# OPTIONS: direct, google, aws, research_environment
180+
DEFAULT_PROJECT_ACCESS_MECHANISM = 'direct'

physionet-django/physionet/settings/base.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -612,3 +612,5 @@ class StorageTypes:
612612

613613
# minimum number of word needed for research_summary field for Credentialing Model.
614614
MIN_WORDS_RESEARCH_SUMMARY_CREDENTIALING = config('MIN_WORDS_RESEARCH_SUMMARY_CREDENTIALING', cast=int, default=20)
615+
616+
DEFAULT_PROJECT_ACCESS_MECHANISM = config('DEFAULT_PROJECT_ACCESS_MECHANISM', default='direct')

physionet-django/project/projectfiles/gcs.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
from django.shortcuts import redirect
55
from google.cloud.exceptions import Conflict, NotFound
66
from physionet.gcs import GCSObject, GCSObjectException, create_bucket, delete_bucket
7+
from project.models import DataSource
78
from project.projectfiles.base import BaseProjectFiles
89
from project.utility import DirectoryInfo, FileInfo, readable_size
910

@@ -117,6 +118,14 @@ def publish_initial(self, active_project, published_project):
117118

118119
def publish_complete(self, active_project, published_project):
119120
self.rm_dir(active_project.file_root())
121+
if settings.DEFAULT_PROJECT_ACCESS_MECHANISM == 'research_environment':
122+
DataSource.objects.create(
123+
project=published_project,
124+
files_available=True,
125+
data_location=DataSource.DataLocation.GOOGLE_CLOUD_STORAGE,
126+
access_mechanism=DataSource.AccessMechanism.RESEARCH_ENVIRONMENT,
127+
uri=f'gs://{published_project.project_file_root()}/',
128+
)
120129

121130
def publish_rollback(self, active_project, published_project):
122131
delete_bucket(published_project.project_file_root())

physionet-django/project/projectfiles/local.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
from django.conf import settings
66
from physionet.utility import serve_file, sorted_tree_files, zip_dir
7+
from project.models import DataSource
78
from project.projectfiles.base import BaseProjectFiles
89
from project.utility import (
910
clear_directory,
@@ -120,7 +121,13 @@ def publish_rollback(self, active_project, published_project):
120121
os.rename(published_project.file_root(), active_project.file_root())
121122

122123
def publish_complete(self, active_project, published_project):
123-
pass
124+
if settings.DEFAULT_PROJECT_ACCESS_MECHANISM == 'direct':
125+
DataSource.objects.create(
126+
project=published_project,
127+
files_available=True,
128+
data_location=DataSource.DataLocation.DIRECT,
129+
)
130+
124131

125132
def get_project_file_root(self, slug, version, access_policy, klass):
126133
if access_policy:

0 commit comments

Comments
 (0)