Skip to content

Commit

Permalink
improve doi and url handling
Browse files Browse the repository at this point in the history
  • Loading branch information
ksarink committed Jun 17, 2022
1 parent 668fc1f commit c88db24
Showing 1 changed file with 43 additions and 2 deletions.
45 changes: 43 additions & 2 deletions datalad_dataverse/remote.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
import os
import re

from datalad.customremotes import SpecialRemote
from datalad.customremotes.main import main as super_main
from pyDataverse.api import NativeApi, DataAccessApi
from pyDataverse.models import Datafile
import os
from requests import delete
from requests.auth import HTTPBasicAuth

Expand All @@ -13,14 +15,16 @@ def __init__(self, *args):
super().__init__(*args)
self.configs['url'] = 'The Dataverse URL for the remote'
self.configs['doi'] = 'DOI to the dataset'
self._doi = None
self._url = None
self._api = None

def initremote(self):
"""
Use this command to initialize a remote
git annex initremote dv1 type=external externaltype=dataverse encryption=none
"""
if self.annex.getconfig('url') is None or self.annex.getconfig('doi') is None:
if self.annex.getconfig('url') == '' or self.annex.getconfig('doi') == '':
raise ValueError('url and doi must be specified')

# check if instance is readable and authenticated
Expand All @@ -33,6 +37,22 @@ def initremote(self):
if not dv_ds.ok:
raise RuntimeError("Cannot find dataset")

@property
def url(self):
if self._url is None:
self._url = self.annex.getconfig('url')
# remove trailing slash in URL
if self._url is not None and self._url.endswith('/'):
self._url = self._url[:-1]
return self._url

@property
def doi(self):
if self._doi is None:
self._doi = self.annex.getconfig('doi')
self._doi = _format_doi(self._doi)
return self._doi

@property
def api(self):
if self._api is None:
Expand All @@ -46,6 +66,8 @@ def api(self):
def prepare(self):
# trigger API instance in order to get possibly auth/connection errors
# right away
self.url
self.doi
self.api

def checkpresent(self, key):
Expand Down Expand Up @@ -123,6 +145,25 @@ def remove(self, key):
status.raise_for_status()


def _format_doi(doi_in: str) -> str:
"""
Converts unformatted DOI strings into the format needed in the dataverse API. Compatible with DOIs starting
with "doi:", as URL or raw (i.e. 10.5072/FK2/WQCBX1).
:param doi_in: unformatted doi string provided by user
:returns: DOI string as needed for dataverse API, None if string is empty.
"""
dataverse_doi_pattern = r'^doi:'
if re.match(pattern=dataverse_doi_pattern, string=doi_in):
return doi_in

url_doi_pattern = r'^https?:\/\/doi\.org\/'
if re.match(url_doi_pattern, doi_in):
return re.sub(pattern=url_doi_pattern, repl='doi:', string=doi_in)

return f'doi:{doi_in}'


def main():
"""cmdline entry point"""
super_main(
Expand Down

0 comments on commit c88db24

Please sign in to comment.