diff --git a/datalad_dataverse/remote.py b/datalad_dataverse/remote.py index a60186d..468e093 100644 --- a/datalad_dataverse/remote.py +++ b/datalad_dataverse/remote.py @@ -1,8 +1,10 @@ +import os +import re + from datalad.customremotes import SpecialRemote from datalad.customremotes.main import main as super_main from pyDataverse.api import NativeApi, DataAccessApi from pyDataverse.models import Datafile -import os from requests import delete from requests.auth import HTTPBasicAuth @@ -13,6 +15,8 @@ def __init__(self, *args): super().__init__(*args) self.configs['url'] = 'The Dataverse URL for the remote' self.configs['doi'] = 'DOI to the dataset' + self._doi = None + self._url = None self._api = None def initremote(self): @@ -20,7 +24,7 @@ def initremote(self): Use this command to initialize a remote git annex initremote dv1 type=external externaltype=dataverse encryption=none """ - if self.annex.getconfig('url') is None or self.annex.getconfig('doi') is None: + if self.annex.getconfig('url') == '' or self.annex.getconfig('doi') == '': raise ValueError('url and doi must be specified') # check if instance is readable and authenticated @@ -33,6 +37,22 @@ def initremote(self): if not dv_ds.ok: raise RuntimeError("Cannot find dataset") + @property + def url(self): + if self._url is None: + self._url = self.annex.getconfig('url') + # remove trailing slash in URL + if self._url is not None and self._url.endswith('/'): + self._url = self._url[:-1] + return self._url + + @property + def doi(self): + if self._doi is None: + self._doi = self.annex.getconfig('doi') + self._doi = _format_doi(self._doi) + return self._doi + @property def api(self): if self._api is None: @@ -46,6 +66,8 @@ def api(self): def prepare(self): # trigger API instance in order to get possibly auth/connection errors # right away + self.url + self.doi self.api def checkpresent(self, key): @@ -123,6 +145,25 @@ def remove(self, key): status.raise_for_status() +def _format_doi(doi_in: str) -> str: + """ + Converts unformatted DOI strings into the format needed in the dataverse API. Compatible with DOIs starting + with "doi:", as URL or raw (i.e. 10.5072/FK2/WQCBX1). + + :param doi_in: unformatted doi string provided by user + :returns: DOI string as needed for dataverse API, None if string is empty. + """ + dataverse_doi_pattern = r'^doi:' + if re.match(pattern=dataverse_doi_pattern, string=doi_in): + return doi_in + + url_doi_pattern = r'^https?:\/\/doi\.org\/' + if re.match(url_doi_pattern, doi_in): + return re.sub(pattern=url_doi_pattern, repl='doi:', string=doi_in) + + return f'doi:{doi_in}' + + def main(): """cmdline entry point""" super_main(