diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..d4c6b6d
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,4 @@
+.DS_Store
+.DS_Store?
+**/.DS_Store
+
diff --git a/src/data/Data_load_clean.ipynb b/src/data/Data_load_clean.ipynb
new file mode 100644
index 0000000..45ed753
--- /dev/null
+++ b/src/data/Data_load_clean.ipynb
@@ -0,0 +1,132 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Loading and Cleaning our Movie Data\n",
+    "We'll download our list of movies, the movie's descriptions first from [TMDB](https://www.themoviedb.org/?language=en-US). Then do some cleaning of the data. Download [word2vec](https://en.wikipedia.org/wiki/Word2vec) and finally, do some feature engineering to tokenize the descriptions. \n",
+    "\n",
+    "We've split these into 6 seperate scripts and linked them all in one notebook for simplicity. \n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Downloading the list of Movies"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "run src/data/movie_list.py  "
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Downloading the Movie's descriptions"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "run src/data/overviews.py"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Cleaning and de-duping the data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "run src/data/cleaning_data.py"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Featuring engineering"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "run src/features/feature_eng.py"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Downloading [Word2Vec](https://en.wikipedia.org/wiki/Word2vec)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "run src/models/get_word2vec.sh"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Feature engineering using [Word2Vec](https://en.wikipedia.org/wiki/Word2vec)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "run src/features/word2vec_features.py"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.6"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}