Skip to content
This repository was archived by the owner on Mar 15, 2019. It is now read-only.
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 0 additions & 23 deletions .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,6 @@ workflows:
default:
jobs:
- "python-3.6"
- "deploy-conda":
requires:
- "python-3.6"
filters:
tags:
only: /^v.*/
branches:
ignore: /.*/


jobs:
Expand Down Expand Up @@ -78,18 +70,3 @@ jobs:
- store_artifacts:
path: docs/_build/html
destination: html


"deploy-conda":
docker:
- image: continuumio/miniconda:latest

steps:
# Get our data and merge with upstream
- checkout

- run:
name: Deploy package release to Anaconda.org
command: |
conda install conda-build anaconda-client --yes --quiet
./ci/upload-anaconda.sh
31 changes: 31 additions & 0 deletions binder/environment.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
name: intake-cmip-dev
channels:
- conda-forge
- intake
- defaults
dependencies:
- python=3.6
- numpy
- intake
- intake-xarray
- xarray
- netcdf4
- dask
- distributed
- autopep8
- flake8
- black
- isort
- pytest
- coverage
- pytest-cov
- codecov
- sphinx>=1.6
- sphinx_rtd_theme
- recommonmark
- numpydoc
- nbsphinx
- pandoc
- ipykernel
- pip:
- sphinx_copybutton
56 changes: 0 additions & 56 deletions conda/meta.yaml

This file was deleted.

209 changes: 109 additions & 100 deletions intake_cmip/cmip5.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,17 +24,8 @@ class CMIP5DataSource(intake_xarray.base.DataSourceMixin):
partition_access = True
name = "cmip5"

def __init__(
self,
database,
model,
experiment,
frequency,
realm,
ensemble,
varname=None,
metadata=None,
):
def __init__(self, database, metadata=None):

"""

Parameters
Expand All @@ -43,6 +34,53 @@ def __init__(
database : string or file handle
File path or object for cmip5 database. For users with access to
NCAR's glade file system, this argument can be set to 'glade'.
"""

# store important kwargs
self.database = self._read_database(database)
self.urlpath = ""
self.query = {}
self.query_results = None
self._ds = None
super(CMIP5DataSource, self).__init__(metadata=metadata)

def _read_database(self, database):
if database == "glade":
database = glade_cmip5_db
if os.path.exists(database):

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does this have to be a local file, or can we accept remote things like elsewhere in intake?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It doesn't need to be a local file. I just opted for a local file for prototyping purposes for the time being. As this matures, it makes sense to support remote databases/files.

return pd.read_csv(database)
else:
raise FileNotFoundError(f"{database}")

def _open_dataset(self):

ens_filepaths = self._get_ens_filepaths()

ds_list = [xr.open_mfdataset(paths) for paths in ens_filepaths.values()]
ens_list = list(ens_filepaths.keys())
self._ds = xr.concat(ds_list, dim="ensemble")
self._ds["ensemble"] = ens_list

def to_xarray(self, dask=True):
"""Return dataset as an xarray instance"""
if dask:
return self.to_dask()
return self.read()

def search(
self,
model=None,
experiment=None,
frequency=None,
realm=None,
ensemble=None,
varname=None,
):

"""
Parameters
-----------

model : str
identifies the model used (e.g. HADCM3, HADCM3-233).
experiment : str
Expand Down Expand Up @@ -82,92 +120,63 @@ def __init__(

"""

# store important kwargs
self.database = self._read_database(database)
self.model = model
self.experiment = experiment
self.frequency = frequency
self.realm = realm
self.ensemble = ensemble
self.varname = varname
self.urlpath = ""
self._ds = None
super(CMIP5DataSource, self).__init__(metadata=metadata)

def _read_database(self, database):
if database == "glade":
database = glade_cmip5_db
if os.path.exists(database):
return pd.read_csv(database)
else:
raise FileNotFoundError(f"{database}")

def _open_dataset(self):
ens_filepaths = get_ens_filepaths(
self.database,
self.model,
self.experiment,
self.frequency,
self.realm,
self.ensemble,
self.varname,
)

ds_list = [xr.open_mfdataset(paths) for paths in ens_filepaths.values()]
ens_list = list(ens_filepaths.keys())
self._ds = xr.concat(ds_list, dim="ensemble")
self._ds["ensemble"] = ens_list

def to_xarray(self, dask=True):
"""Return dataset as an xarray instance"""
if dask:
return self.to_dask()
return self.read()


def get_ens_filepaths(database, model, experiment, frequency, realm, ensemble, varname):
query = {
"model": model,
"experiment": experiment,
"frequency": frequency,
"realm": realm,
"ensemble": ensemble,
"varname": varname,
}

condition = np.ones(len(database), dtype=bool)

for key, val in query.items():
if val is not None:

condition = condition & (database[key] == val)

database_subset = database.loc[condition]

if database_subset.empty:

raise ValueError(
f"No dataset found for:\n \
\tmodel = {model} \n \
\texperiment = {experiment} \n \
\tfrequency = {frequency} \n \
\trealm = {realm} \n \
\tensemble = {ensemble} \n \
\tvarname = {varname}"
)

# -- realm is optional arg so check that the same varname is not in multiple realms
realm_list = database_subset.realm.unique()
if len(realm_list) != 1:
raise ValueError(
f"{varname} found in multiple realms:\n \
'\t{realm_list}. Please specify the realm to use"
)

ds_dict = OrderedDict()
for ens in database_subset["ensemble"].unique():
ens_match = database_subset["ensemble"] == ens
paths = database_subset.loc[ens_match]["file_fullpath"].tolist()
ds_dict[ens] = paths

return ds_dict
self.query = {
"model": model,
"experiment": experiment,
"frequency": frequency,
"realm": realm,
"ensemble": ensemble,
"varname": varname,
}
database = self.database
condition = np.ones(len(database), dtype=bool)

for key, val in self.query.items():
if val is not None:

condition = condition & (database[key] == val)

self.query_results = database.loc[condition]
return self

def results(self):
return self.query_results

def _get_ens_filepaths(self):
if self.query_results.empty:
raise ValueError(
f"No dataset found for:\n \
\tmodel = {self.query['model']}\n \
\texperiment = {self.query['experiment']} \n \
\tfrequency = {self.query['frequency']} \n \
\trealm = {self.query['realm']} \n \
\tensemble = {self.query['ensemble']} \n \
\tvarname = {self.query['varname']}"
)

models = self.query_results.ensemble.nunique() > 1
experiments = self.query_results.experiment.nunique() > 1
frequencies = self.query_results.frequency.nunique() > 1

if models or experiments or frequencies:

raise ValueError(
f"Invalid results for search query = {self.query}.\n\
Please specify unique model, experiment, and frequency to use"
)

# Check that the same varname is not in multiple realms
realm_list = self.query_results.realm.unique()
if len(realm_list) != 1:
raise ValueError(
f"{self.query['varname']} found in multiple realms:\
\t{self.query['realm_list']}. Please specify the realm to use"
)

ds_dict = OrderedDict()
for ens in self.query_results["ensemble"].unique():
ens_match = self.query_results["ensemble"] == ens
paths = self.query_results.loc[ens_match]["file_fullpath"].tolist()
ds_dict[ens] = paths

return ds_dict
Loading