diff --git a/.zenodo.json b/.zenodo.json index 7f26ba2..17313d9 100644 --- a/.zenodo.json +++ b/.zenodo.json @@ -37,6 +37,11 @@ }, { "name": "Guenther, Nick" + }, + { + "affiliation": "Center for Adaptive Rationality, Max Planck Institute for Human Development, Berlin, Germany", + "name": "Appelhoff, Stefan", + "orcid": "0000-0001-8002-0877" } ], "keywords": [ diff --git a/datalad_osf/create_sibling_osf.py b/datalad_osf/create_sibling_osf.py index 6a7bd91..ed0b87a 100644 --- a/datalad_osf/create_sibling_osf.py +++ b/datalad_osf/create_sibling_osf.py @@ -37,18 +37,24 @@ @build_doc class CreateSiblingOSF(Interface): - """Create a dataset representation at OSF + """Create a dataset representation at OSF. This will create a project on OSF and initialize an osf special remote to point to it. There are two modes this can operate in: 'annex' and 'export'. The former uses the OSF project as a key-value store, that - can be used to by git-annex to copy data to and retrieve + can be used by git-annex to copy data to and retrieve data from (potentially by any clone of the original dataset). The latter allows to use 'git annex export' to publish a snapshot of a particular version of the dataset. Such an OSF project will - in opposition to the 'annex' - be human-readable. + + For authentification with OSF, you can define environment variables: Either + 'OSF_TOKEN', or both 'OSF_USERNAME' and 'OSF_PASSWORD'. If neither of these + is defined, the tool will fall back to the datalad credential manager and + inquire for credentials interactively. + """ result_renderer = 'tailored' @@ -68,7 +74,7 @@ class CreateSiblingOSF(Interface): ), name=Parameter( args=("-s", "--name",), - doc="""name of the to-be initialized osf-special-remote""", + doc="""Name of the to-be initialized osf-special-remote""", constraints=EnsureStr() ), mode=Parameter( diff --git a/datalad_osf/utils.py b/datalad_osf/utils.py index 0bccbd4..bcefc06 100644 --- a/datalad_osf/utils.py +++ b/datalad_osf/utils.py @@ -120,7 +120,7 @@ def get_credentials(allow_interactive=True): token_auth = Token(name='https://osf.io', url=None) up_auth = UserPassword(name='https://osf.io', url=None) - # get auth token, form environment, or from datalad credential store + # get auth token, from environment, or from datalad credential store # if known-- we do not support first-time entry during a test run token = environ.get( 'OSF_TOKEN', diff --git a/docs/source/exportdatacode.rst b/docs/source/exportdatacode.rst index b89e417..5d45b47 100644 --- a/docs/source/exportdatacode.rst +++ b/docs/source/exportdatacode.rst @@ -1,3 +1,4 @@ +.. include:: ./links.inc Export version-controlled data to OSF and code to GitHub ******************************************************** @@ -5,12 +6,12 @@ Export version-controlled data to OSF and code to GitHub Imagine you are a PhD student and want to collaborate on a fun little side project with a student at another institute. It is quite obvious for the two of you that your code will be hosted on GitHub_. And you also know enough about -DataLad, that using it for the whole project will be really beneficial. +DataLad_, that using it for the whole project will be really beneficial. But what about the data you are collecting? -The Dropbox is already full (`DataLad third party providers `_). And Amazon services don't seem to be -your best alternative. -Suddenly you remember, that you got an OSF_ account recently, and that there is this nice `Datalad extension `_ to set up a SpecialRemote on OSF_. +The Dropbox is already full (`DataLad third party providers `_). +And Amazon services don't seem to be your best alternative. +Suddenly you remember, that you got an OSF_ account recently, and that there is this nice `Datalad extension `_ to set up a `Special Remote`_ on OSF_. Walk through ------------ @@ -27,7 +28,7 @@ For installation checkout the installation page of the documentation. Creating an Example Dataset ^^^^^^^^^^^^^^^^^^^^^^^^^^^ -As a very first step you want to set up a DataLad Dataset. For this you should +As a very first step you want to set up a DataLad dataset. For this you should run. In all examples a `$` in front indicates a new line in the Bash-Shell, copying it will prevent your code from execution. @@ -35,8 +36,8 @@ copying it will prevent your code from execution. $ datalad create collab_osf -After having created the dataset we want to populate it with some content (just -like in the Handbook). Importantly we don't want to upload this file on GitHub, only on OSF - in the real world this could be your data that is too large to upload to GitHub. +After having created the dataset we want to populate it with some content (just like in the `DataLad Handbook`_). +Importantly we don't want to upload this file on GitHub, only on OSF - in the real world this could be your data that is too large to upload to GitHub. .. code-block:: bash @@ -52,11 +53,10 @@ And we also want to add a text file, which will be saved on GitHub_ - in your ca $ mkdir code $ cd code - $ echo "This is just an example file just to show the different ways of saving data in a DataLad Dataset." > example.txt + $ echo "This is just an example file just to show the different ways of saving data in a DataLad dataset." > example.txt $ datalad save --to-git -m "created an example.txt" -We now have a Dataset with one file that can be worked on using GitHub and one -that should be tracked using `git-annex`. +We now have a dataset with one file that can be worked on using GitHub and one that should be tracked using `git-annex`. Setting up the OSF Remote ^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -85,8 +85,3 @@ We can set-up a GitHub Remote with name `github` and include a publish dependenc $ datalad publish . --to github --transfer-data all This will publish example.txt in code/ to GitHub and only add the folder structure and symbolic links for all other file; at the same time it will upload the data to OSF - this way you can let OSF handle your data and GitHub your code. - - - -.. _OSF: https://www.osf.io/ -.. _GitHub: https://www.github.com/ diff --git a/docs/source/exporthumandata.rst b/docs/source/exporthumandata.rst index c957503..febaed3 100644 --- a/docs/source/exporthumandata.rst +++ b/docs/source/exporthumandata.rst @@ -1,7 +1,9 @@ +.. include:: ./links.inc + Export a human-readable dataset to OSF ************************************** -Imagine you have been creating a reproducible workflow using DataLad from the +Imagine you have been creating a reproducible workflow using DataLad_ from the get go. Everything is finished now, code, data, and paper are ready. Last thing to do: Publish your data. @@ -21,7 +23,7 @@ For installation checkout the installation page of the documentation. Creating an Example Dataset ^^^^^^^^^^^^^^^^^^^^^^^^^^^ -We will create a small example DataLad Dataset to show the functionality. +We will create a small example DataLad dataset to show the functionality. .. code-block:: bash @@ -31,7 +33,7 @@ We will create a small example DataLad Dataset to show the functionality. # Copying the $ will prevent your code from execution. After having created the dataset we want to populate it with some content (just -like in the Handbook): +like in the `Datalad Handbook`_): .. code-block:: bash @@ -44,21 +46,37 @@ like in the Handbook): Setting up the OSF Remote ^^^^^^^^^^^^^^^^^^^^^^^^^ -To use OSF as a storage, you need to provide either your OSF credentials or an OSF access token. -You can create such a token in your account settings (`Personal access token` and then `Create token`), make sure to create a `full_write` token to be able to create OSF projects and upload data to OSF. +To use OSF as a storage, you first need to provide either your OSF credentials (username and password) or an OSF access token. + +If you choose to use your credentials, proceed as follows: + +.. code-block:: bash + + export OSF_USERNAME=YOUR_USERNAME_FOR_OSF.IO + export OSF_PASSWORD=YOUR_PASSWORD_FOR_OSF.IO + +In this example, we are going to use an OSF access token instead. +You can create such a token in your account settings (`Personal access token` and then `Create token`). +Make sure to create a `full_write` token to be able to create OSF projects and upload data to OSF. .. code-block:: bash export OSF_TOKEN=YOUR_TOKEN_FROM_OSF.IO -We are now going to use datalad to create a sibling dataset on OSF with name `osf` - this will create a new project called `OSF_PROJECT_NAME` on the OSF account associated with the OSF token in `$OSF_TOKEN`. +We are now going to use datalad to create a sibling dataset on OSF with name `OSF_PROJECT_NAME`. +This will create a new project called `OSF_PROJECT_NAME` on the OSF account associated with the OSF token in `$OSF_TOKEN`. + +Note that the ``-s NAME_OF_REMOTE>`` flag is used to specify how ``git`` internally refers to your OSF project with the name `OSF_PROJECT_NAME`. +It would be completely fine to use `OSF_PROJECT_NAME` also as a value for the ``-s`` flag. + +You can later on list your remotes from the command line using the ``git remote -v`` command. .. code-block:: bash - $ datalad create-sibling-osf -s osf OSF_PROJECT_NAME --mode export + $ datalad create-sibling-osf -s NAME_OF_REMOTE OSF_PROJECT_NAME --mode export After that we can export the current state (the `HEAD`) of our dataset in human readable form to OSF: .. code-block:: bash - git annex export HEAD --to YOUR_OSF_REMOTE_NAME + git annex export HEAD --to NAME_OF_REMOTE diff --git a/docs/source/index.rst b/docs/source/index.rst index 93a40ec..10748d0 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -1,11 +1,15 @@ +.. include:: ./links.inc + DataLad extension to interface with OSF *************************************** -This extension enables DataLad to work with the Open Science Framework (OSF). Use it to publish your dataset's data to an OSF project to utilize the OSF for dataset data storage and easy dataset sharing. +This extension enables DataLad_ to work with the Open Science Framework (OSF_). +Use it to publish your dataset's data to an OSF project to utilize the OSF for dataset data storage and easy dataset sharing. The extension was created during the OHBM Hackathon 2020. -If you have any questions, comments, bug fixes or improvement suggestions, feel free to contact us via our `Github page `_. Before contributing, be sure to read the contributing guidelines. +If you have any questions, comments, bug fixes or improvement suggestions, feel free to contact us via our `Github page `_. +Before contributing, be sure to read the `contributing guidelines `_. .. toctree:: @@ -13,7 +17,7 @@ If you have any questions, comments, bug fixes or improvement suggestions, feel Documentation ============= -.. toctree:: +.. toctree:: :maxdepth: 2 intro @@ -64,5 +68,3 @@ Indices and tables * :ref:`search` .. |---| unicode:: U+02014 .. em dash - -.. _OSF: http://www.osf.io/ diff --git a/docs/source/intro.rst b/docs/source/intro.rst index 177d96c..a217121 100644 --- a/docs/source/intro.rst +++ b/docs/source/intro.rst @@ -1,19 +1,24 @@ +.. include:: ./links.inc + Introduction ------------ Goal of the extension ^^^^^^^^^^^^^^^^^^^^^ -This extension aims to allow DataLad to work with the Open Science Framework (OSF). This is done by transforming storage on the Open Science Framework (OSF) into a `git-annex `_ repository. +This extension aims to allow DataLad_ to work with the Open Science Framework (OSF_). +This is done by transforming storage on the Open Science Framework (OSF) into a `git-annex`_ repository. What can I use this extension for? ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -You can use this extension to use the OSF as a special remote to store data in the annex of a dataset. With this, you can `datalad publish` a dataset to GitHub or similar services and the data to the OSF (via a publication dependency). -The extension is most beneficial for easy access to data stored on OSF via GitHub. If you are sharing your dataset via OSF and code via GitHub, this will allow smooth integration of both along with unified version management provided by DataLad. +You can use this extension to use the OSF as a special remote to store data in the annex of a dataset. +With this, you can `datalad publish` a dataset to GitHub or similar services and the data to the OSF (via a publication dependency). +The extension is most beneficial for easy access to data stored on OSF via GitHub. +If you are sharing your dataset via OSF and code via GitHub, this will allow smooth integration of both along with unified version management provided by DataLad. What can I **not** use this extension for? ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ This tool does not work for data that is stored in a storage service other than OSF. -Please refer to the `list of special remotes `_ as hosted by the git-annex website for other storage services. +Please refer to the list of `special remotes`_ as hosted by the git-annex website for other storage services. diff --git a/docs/source/links.inc b/docs/source/links.inc new file mode 100644 index 0000000..504f0cd --- /dev/null +++ b/docs/source/links.inc @@ -0,0 +1,20 @@ +.. This (-*- rst -*-) format file contains commonly used link targets + and name substitutions. It may be included in many files, + therefore it should only contain link targets and name + substitutions. Try grepping for "^\.\. _" to find plausible + candidates for this list. + +.. NOTE: reST targets are + __not_case_sensitive__, so only one target definition is needed for + nipy, NIPY, Nipy, etc... + + +.. _DataLad: https://www.datalad.org +.. _DataLad Handbook: http://handbook.datalad.org/en/latest/ +.. _GitHub: https://www.github.com/ +.. _git-annex: git-annex.branchable.com/ +.. _git: git-scm.com/ +.. _OSF: https://www.osf.io/ +.. _Python: https://www.python.org/ +.. _Special Remote: https://git-annex.branchable.com/special_remotes/ +.. _Special Remotes: https://git-annex.branchable.com/special_remotes/ diff --git a/docs/source/settingup.rst b/docs/source/settingup.rst index b79bbda..ad6901b 100644 --- a/docs/source/settingup.rst +++ b/docs/source/settingup.rst @@ -1,3 +1,5 @@ +.. include:: ./links.inc + Setting up ========== @@ -6,7 +8,7 @@ Requirements - DataLad -Before being able to use the extension, you need to have DataLad installed, which relies on `git-annex `_, `git `_ and `Python `_. +Before being able to use the extension, you need to have DataLad installed, which relies on `git-annex`_, `git`_ and `Python`_. If you don't have DataLad installed yet, please follow the instructions from `the datalad handbook `_. - An account on the OSF @@ -15,7 +17,7 @@ You need an OSF account to be able to interact with it. If you don't have an acc - An account on a git repository hosting site -You should consider having an account on one or more repository hosting sites such as `GitHub `_ , `GitLab `_, `Bitbucket `_ or similar" +You should consider having an account on one or more repository hosting sites such as `GitHub `__ , `GitLab `_, `Bitbucket `_ or similar" Installation ------------