Skip to content

Commit

Permalink
docs: Refactor docs and docgen framework (microsoft#2021)
Browse files Browse the repository at this point in the history
* docs: Refactor docs and doc generation system

* remove old docusaurus versions

* fix broken links

* fix old references

* fix build issues

* small fix

* fix notebook upload

* standardize header

* remove boilerplate

* remove boilerplate

* update boilerplate

* update boilerplate

* fix docgen structure

* fixes
  • Loading branch information
mhamilton723 authored and JessicaXYWang committed Sep 14, 2023
1 parent 9e1da76 commit 19f898c
Show file tree
Hide file tree
Showing 1,008 changed files with 834 additions and 170,276 deletions.
2 changes: 1 addition & 1 deletion .acrolinx-config.edn
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
{:allowed-branchname-matches ["master" "release-.*"]
:allowed-filename-matches ["notebooks" "website"]}
:allowed-filename-matches ["docs" "website"]}
2 changes: 1 addition & 1 deletion CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ this process:

#### Implement documentation

- Add a [sample Jupyter notebook](notebooks/) that shows the intended use
- Add a [sample Jupyter notebook](docs/) that shows the intended use
case of your algorithm, with instructions in step-by-step manner. (The same
notebook could be used for testing the code.)
- Add in-line ScalaDoc comments to your source code, to generate the [API
Expand Down
10 changes: 5 additions & 5 deletions build.sbt
Original file line number Diff line number Diff line change
Expand Up @@ -381,11 +381,11 @@ publishBadges := {
uploadBadge("master version", version.value, "blue", "master_version3.svg")
}

val uploadNotebooks = TaskKey[Unit]("uploadNotebooks", "upload notebooks to blob storage")
val uploadNotebooks = TaskKey[Unit]("uploadNotebooks", "upload docs to blob storage")
uploadNotebooks := {
val localNotebooksFolder = join(baseDirectory.value.toString, "notebooks").toString
val localNotebooksFolder = join(baseDirectory.value.toString, "docs").toString
val blobNotebooksFolder = version.value
uploadToBlob(localNotebooksFolder, blobNotebooksFolder, "notebooks")
uploadToBlob(localNotebooksFolder, blobNotebooksFolder, "docs")
}

val settings = Seq(
Expand Down Expand Up @@ -493,8 +493,8 @@ setupTask := {

val convertNotebooks = TaskKey[Unit]("convertNotebooks", "convert notebooks to markdown for website display")
convertNotebooks := {
runCmdStr("python -m docs.python.documentprojection " +
"--customchannels docs/python/synapseml_channels -c website . docs/manifest.yaml -p")
runCmd(Seq("pip", "install", "-e", "."), wd=join(baseDirectory.value, "tools/docgen"))
runCmd(Seq("python", "__main__.py"), wd=join(baseDirectory.value, "tools/docgen/docgen"))
}

val testWebsiteDocs = TaskKey[Unit]("testWebsiteDocs",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,13 @@ object FileUtilities {
()
}

def copyAndRenameFile(from: File, toDir: File, newName: String, overwrite: Boolean = false): Unit = {
Files.copy(from.toPath, new File(toDir, newName).toPath,
(if (overwrite) Seq(StandardCopyOption.REPLACE_EXISTING)
else Seq()): _*)
()
}

// Perhaps this should move into a more specific place, not a generic file utils thing
def zipFolder(dir: File, out: File): Unit = {
import java.io.{BufferedInputStream, FileInputStream, FileOutputStream}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -86,17 +86,17 @@ object DatabricksUtilities {
// Execution Params
val TimeoutInMillis: Int = 40 * 60 * 1000

val NotebookFiles: Array[File] = FileUtilities.recursiveListFiles(
FileUtilities.join(
BuildInfo.baseDirectory.getParent, "notebooks", "features").getCanonicalFile)
val DocsDir = FileUtilities.join(BuildInfo.baseDirectory.getParent, "docs").getCanonicalFile()
val NotebookFiles: Array[File] = FileUtilities.recursiveListFiles(DocsDir)
.filter(_.toString.endsWith(".ipynb"))

val ParallelizableNotebooks: Seq[File] = NotebookFiles.filterNot(_.isDirectory)

val CPUNotebooks: Seq[File] = ParallelizableNotebooks
.filterNot(_.getAbsolutePath.contains("simple_deep_learning"))
.filterNot(_.getAbsolutePath.contains("Fine-tune"))
.filterNot(_.getAbsolutePath.contains("Explanation Dashboard")) // TODO Remove this exclusion

val GPUNotebooks: Seq[File] = ParallelizableNotebooks.filter(_.getAbsolutePath.contains("simple_deep_learning"))
val GPUNotebooks: Seq[File] = ParallelizableNotebooks.filter(_.getAbsolutePath.contains("Fine-tune"))

def databricksGet(path: String): JsValue = {
val request = new HttpGet(BaseURL + path)
Expand Down Expand Up @@ -336,13 +336,15 @@ object DatabricksUtilities {
//scalastyle:on cyclomatic.complexity

def uploadAndSubmitNotebook(clusterId: String, notebookFile: File): DatabricksNotebookRun = {
val destination: String = Folder + "/" + notebookFile.getName
val dirPaths = DocsDir.toURI.relativize(notebookFile.getParentFile.toURI).getPath
val folderToCreate = Folder + "/" + dirPaths
println(s"Creating folder $folderToCreate")
workspaceMkDir(folderToCreate)
val destination: String = folderToCreate + notebookFile.getName
uploadNotebook(notebookFile, destination)
val runId: Int = submitRun(clusterId, destination)
val run: DatabricksNotebookRun = DatabricksNotebookRun(runId, notebookFile.getName)

println(s"Successfully submitted job run id ${run.runId} for notebook ${run.notebookName}")

run
}

Expand Down Expand Up @@ -413,9 +415,6 @@ abstract class DatabricksTestHelper extends TestBase {
assert(areLibrariesInstalled(clusterId))
}

println(s"Creating folder $Folder")
workspaceMkDir(Folder)

println(s"Submitting jobs")
val parNotebookRuns: Seq[DatabricksNotebookRun] = notebooks.map(uploadAndSubmitNotebook(clusterId, _))
parNotebookRuns.foreach(notebookRun => jobIdsToCancel.append(notebookRun.runId))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,34 +4,76 @@
package com.microsoft.azure.synapse.ml.nbtest

import com.microsoft.azure.synapse.ml.build.BuildInfo
import com.microsoft.azure.synapse.ml.core.env.FileUtilities
import com.microsoft.azure.synapse.ml.core.env.{FileUtilities, StreamUtilities}
import org.apache.commons.io.FileUtils

import java.io.File
import java.lang.ProcessBuilder.Redirect
import scala.sys.process._

import scala.io.Source
import java.io.{BufferedWriter, File, FileWriter}

object SharedNotebookE2ETestUtilities {
val ResourcesDirectory = new File(getClass.getResource("/").toURI)
val NotebooksDir = new File(ResourcesDirectory, "generated-notebooks")
val NotebookPreamble: String =
"""
|# In[ ]:
|
|
|# This cell ensures make magic command like '%pip install' works on synapse scheduled spark jobs
|from synapse.ml.core.platform import running_on_synapse
|
|if running_on_synapse():
| from IPython import get_ipython
| from IPython.terminal.interactiveshell import TerminalInteractiveShell
| from synapse.ml.core.platform import materializing_display as display
| from pyspark.sql import SparkSession
|
| spark = SparkSession.builder.getOrCreate()
| try:
| shell = TerminalInteractiveShell.instance()
| except:
| pass
|
|""".stripMargin

def insertTextInFile(file: File, textToPrepend: String, locToInsert: Int): Unit = {
val existingLines = StreamUtilities.using(Source.fromFile(file)) { s =>
s.getLines().toList
}.get
val linesBefore = existingLines.take(locToInsert)
val linesAfter = existingLines.takeRight(existingLines.length - locToInsert)
val linesInMiddle = textToPrepend.split("\n")
val newText = (linesBefore ++ linesInMiddle ++ linesAfter).mkString("\n")
StreamUtilities.using(new BufferedWriter(new FileWriter(file))) { writer =>
writer.write(newText)
}
}

def generateNotebooks(): Unit = {
cleanUpGeneratedNotebooksDir()

FileUtilities.recursiveListFiles(FileUtilities
.join(BuildInfo.baseDirectory.getParent, "notebooks/features")
.getCanonicalFile)
val docsDir = FileUtilities.join(BuildInfo.baseDirectory.getParent, "docs").getCanonicalFile
val newFiles = FileUtilities.recursiveListFiles(docsDir)
.filter(_.getName.endsWith(".ipynb"))
.map { f =>
FileUtilities.copyFile(f, NotebooksDir, true)
val newFile = new File(NotebooksDir, f.getName)
val targetName = new File(NotebooksDir, f.getName.replace(" ", "").replace("-", ""))
newFile.renameTo(targetName)
targetName
val relative = docsDir.toURI.relativize(f.toURI).getPath
val newName = relative
.replace("/", "")
.replace(" ", "")
.replace("-", "")
.replace(",", "")
FileUtilities.copyAndRenameFile(f, NotebooksDir, newName, true)
new File(NotebooksDir, newName)
}

runCmd(activateCondaEnv ++ Seq("jupyter", "nbconvert", "--to", "python", "*.ipynb"), NotebooksDir)

newFiles.foreach { f =>
insertTextInFile(new File(f.getPath.replace(".ipynb", ".py")), NotebookPreamble, 2)
}

}

def cleanUpGeneratedNotebooksDir(): Unit = {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,11 +44,12 @@ class SynapseTests extends TestBase {

val selectedPythonFiles: Array[File] = FileUtilities.recursiveListFiles(SharedNotebookE2ETestUtilities.NotebooksDir)
.filter(_.getAbsolutePath.endsWith(".py"))
.filterNot(_.getAbsolutePath.contains("DeepLearningDeepTextClassification")) // Excluded by design task 1829306
.filterNot(_.getAbsolutePath.contains("DeepLearningDeepVisionClassification")) // Excluded by design task 1829306
.filterNot(_.getAbsolutePath.contains("VowpalWabbitClassificationusingVWnativeFormat"))
.filterNot(_.getAbsolutePath.contains("Finetune")) // Excluded by design task 1829306
.filterNot(_.getAbsolutePath.contains("VWnativeFormat"))
.filterNot(_.getAbsolutePath.contains("VowpalWabbitMulticlassclassification")) // Wait for Synpase fix
.filterNot(_.getAbsolutePath.contains("Langchain")) // Wait for Synpase fix
.filterNot(_.getAbsolutePath.contains("SetupCognitive")) // No code to run
.filterNot(_.getAbsolutePath.contains("CreateaSparkCluster")) // No code to run
.sortBy(_.getAbsolutePath)

val expectedPoolCount: Int = selectedPythonFiles.length
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ sidebar_label: About

### Jupyter Notebook Examples

- [Deploy a classifier trained on the Adult Census Dataset](../SparkServing%20-%20Deploying%20a%20Classifier)
- [Deploy a classifier trained on the Adult Census Dataset](../Quickstart%20-%20Deploying%20a%20Classifier)
- More coming soon!

### Spark Serving Hello World
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,31 +9,6 @@
"First, we import needed packages:"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from pyspark.sql import SparkSession\n",
"\n",
"# Bootstrap Spark Session\n",
"spark = SparkSession.builder.getOrCreate()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"import sys\n",
"import numpy as np\n",
"import pandas as pd"
]
},
{
"cell_type": "markdown",
"metadata": {},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,14 +33,7 @@
{
"cell_type": "code",
"source": [
"import os\n",
"from pyspark.sql import SparkSession\n",
"from synapse.ml.core.platform import running_on_synapse, find_secret\n",
"\n",
"# Bootstrap Spark Session\n",
"spark = SparkSession.builder.getOrCreate()\n",
"if running_on_synapse():\n",
" from notebookutils.visualization import display\n",
"from synapse.ml.core.platform import find_secret\n",
"\n",
"service_key = find_secret(\"cognitive-api-key\")\n",
"service_loc = \"eastus\""
Expand Down Expand Up @@ -264,7 +257,8 @@
{
"cell_type": "markdown",
"source": [
"#### Faster without extra hardware:\n<img src=\"https://mmlspark.blob.core.windows.net/graphics/Cog%20Service%20NB/async_relative%20(2).png\" width=\"500\" />"
"#### Faster without extra hardware:\n",
"<img src=\"https://mmlspark.blob.core.windows.net/graphics/Cog%20Service%20NB/async_relative%20(2).png\" width=\"500\" />"
],
"metadata": {
"application/vnd.databricks.v1+cell": {
Expand Down Expand Up @@ -398,7 +392,9 @@
{
"cell_type": "markdown",
"source": [
"## Learn More\n- [Explore other cogntive services](https://microsoft.github.io/SynapseML/docs/features/cognitive_services/CognitiveServices%20-%20Overview/)\n- [Read our paper \"Large-Scale Intelligent Microservices\"](https://arxiv.org/abs/2009.08044)"
"## Learn More\n",
"- [Explore other cogntive services](https://microsoft.github.io/SynapseML/docs/features/cognitive_services/CognitiveServices%20-%20Overview/)\n",
"- [Read our paper \"Large-Scale Intelligent Microservices\"](https://arxiv.org/abs/2009.08044)"
],
"metadata": {
"application/vnd.databricks.v1+cell": {
Expand All @@ -421,8 +417,13 @@
"language": "python",
"widgets": {},
"notebookOrigID": 3743502060540796
},
"kernelspec": {
"name": "python3",
"language": "python",
"display_name": "Python 3 (ipykernel)"
}
},
"nbformat": 4,
"nbformat_minor": 0
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -53,12 +53,8 @@
"metadata": {},
"outputs": [],
"source": [
"from pyspark.sql.functions import udf, col\n",
"from pyspark.sql.types import StructType, StructField, DoubleType\n",
"from pyspark.sql.functions import lit\n",
"from pyspark.ml import PipelineModel\n",
"from pyspark.sql.functions import col\n",
"import os\n",
"import requests\n",
"from requests.adapters import HTTPAdapter\n",
"from requests.packages.urllib3.util.retry import Retry\n",
Expand All @@ -75,21 +71,6 @@
"http.mount(\"http://\", adapter)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from pyspark.sql import SparkSession\n",
"from synapse.ml.core.platform import *\n",
"\n",
"# Bootstrap Spark Session\n",
"spark = SparkSession.builder.getOrCreate()\n",
"\n",
"from synapse.ml.core.platform import materializing_display as display"
]
},
{
"cell_type": "code",
"execution_count": null,
Expand All @@ -98,6 +79,7 @@
"source": [
"from synapse.ml.cognitive import *\n",
"from synapse.ml.geospatial import *\n",
"from synapse.ml.core.platform import *\n",
"\n",
"# An Azure Maps account key\n",
"maps_key = find_secret(\"azuremaps-api-key\")"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,20 +38,6 @@
"Let's start by setting up the environment variables for our service keys. The next cell sets the `ANOMALY_API_KEY` and the `BLOB_CONNECTION_STRING` environment variables based on the values stored in our Azure Key Vault. If you're running this tutorial in your own environment, make sure you set these environment variables before you proceed."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"from pyspark.sql import SparkSession\n",
"from synapse.ml.core.platform import find_secret\n",
"\n",
"# Bootstrap Spark Session\n",
"spark = SparkSession.builder.getOrCreate()"
]
},
{
"cell_type": "markdown",
"metadata": {},
Expand All @@ -65,6 +51,8 @@
"metadata": {},
"outputs": [],
"source": [
"from synapse.ml.core.platform import find_secret\n",
"\n",
"# An Anomaly Dectector subscription key\n",
"anomalyKey = find_secret(\"anomaly-api-key\") # use your own anomaly api key\n",
"# Your storage account name\n",
Expand Down
Loading

0 comments on commit 19f898c

Please sign in to comment.