From 1cb14cbd13bd94f6e9f40f2e9313bf585569e84c Mon Sep 17 00:00:00 2001
From: Daniel Elton <delton17@gmail.com>
Date: Thu, 19 Dec 2019 18:50:47 -0500
Subject: [PATCH 1/6] add discussion in interpretability section and update
 section on molecular design

---
 build/ci/cache/requests-cache.sqlite | Bin 0 -> 20480 bytes
 build/output/citations.tsv           |   1 +
 build/output/manuscript.md           |  38 +++++++++++++++++++++++++++
 build/output/references.json         |   1 +
 build/output/variables.json          |  17 ++++++++++++
 build/webpage/v/freeze/index.html    |  19 ++++++++++++++
 build/webpage/v/latest               |   1 +
 content/05.treat.md                  |  36 +++++++++++--------------
 content/06.discussion.md             |  34 +++++++++++++-----------
 content/citation-tags.tsv            |   7 ++++-
 10 files changed, 117 insertions(+), 37 deletions(-)
 create mode 100644 build/ci/cache/requests-cache.sqlite
 create mode 100644 build/output/citations.tsv
 create mode 100644 build/output/manuscript.md
 create mode 100644 build/output/references.json
 create mode 100644 build/output/variables.json
 create mode 100644 build/webpage/v/freeze/index.html
 create mode 120000 build/webpage/v/latest
diff --git a/build/ci/cache/requests-cache.sqlite b/build/ci/cache/requests-cache.sqlite
new file mode 100644
index 0000000000000000000000000000000000000000..5f5d7c8e968a7214f2eb1395eb7369bddc7dc5be
GIT binary patch
literal 20480
zcmeI%K}*9h7=YoV>xvUbcPTwCHx)!E9(UQ49%K`B?y^%?F~YE}y0)S}a6ibKN1MWm
z9(wdr-ayhXA?-(=^b&G^H(TdgJ*C^1I9KOlAfyyyrGyYxS-WN37t^=*o$@Jv#Z93l
z&PHE-_gmO*Pq?|eb9?TzX)@yoAb<b@2q1s}0tg_000Id7yTG{P_?{<q9zP_y*e02&
z_T@aBEJC%IT+BkXGD9nM{H#CK?fmL`GGD4`xI9(waZ>1$Bir#uo)qiNqyEU=lJZ;~
z#YLW0{m8r<1*YxZNXH)zWm=`$>g+Y$WIC$@>u=I}yg74VIes{7%3OaS3hJ;^dL9_l
zzG()gp&)<&0tg_000IagfB*srAb>zy1rE)^IRCeGds!C(2q1s}0tg_000IagfB*uO
z0Qdj=2nZm600IagfB*srAb<b@2(({-`~UXuF)Ks>0R#|0009ILKmY**5J2Du^0!$7

literal 0
HcmV?d00001

diff --git a/build/output/citations.tsv b/build/output/citations.tsv
new file mode 100644
index 00000000..e4349893
--- /dev/null
+++ b/build/output/citations.tsv
@@ -0,0 +1 @@
+manuscript_citekey	detagged_citekey	standard_citekey	short_citekey
diff --git a/build/output/manuscript.md b/build/output/manuscript.md
new file mode 100644
index 00000000..df0d610a
--- /dev/null
+++ b/build/output/manuscript.md
@@ -0,0 +1,38 @@
+---
+author-meta: []
+date-meta: '2019-12-19'
+header-includes: '<!--
+
+  Manubot generated metadata rendered from header-includes-template.html.
+
+  Suggest improvements at https://github.com/manubot/manubot/blob/master/manubot/process/header-includes-template.html
+
+  -->
+
+  <meta name="dc.format" content="text/html" />
+
+  <meta name="dc.date" content="2019-12-19" />
+
+  <meta name="citation_publication_date" content="2019-12-19" />
+
+  <meta name="dc.relation.ispartof" content="Manubot" />
+
+  <meta name="dc.publisher" content="Manubot" />
+
+  <meta name="citation_journal_title" content="Manubot" />
+
+  <meta name="citation_technical_report_institution" content="Manubot" />
+
+  <meta property="og:type" content="article" />
+
+  <meta property="twitter:card" content="summary_large_image" />
+
+  <link rel="icon" type="image/png" sizes="192x192" href="https://manubot.org/favicon-192x192.png" />
+
+  <link rel="mask-icon" href="https://manubot.org/safari-pinned-tab.svg" color="#ad1457" />
+
+  <meta name="theme-color" content="#ad1457" />
+
+  <!-- end Manubot generated metadata -->'
+...
+
diff --git a/build/output/references.json b/build/output/references.json
new file mode 100644
index 00000000..fe51488c
--- /dev/null
+++ b/build/output/references.json
@@ -0,0 +1 @@
+[]
diff --git a/build/output/variables.json b/build/output/variables.json
new file mode 100644
index 00000000..88840dd9
--- /dev/null
+++ b/build/output/variables.json
@@ -0,0 +1,17 @@
+{
+  "pandoc": {
+    "date-meta": "2019-12-19",
+    "author-meta": [],
+    "header-includes": "<!--\nManubot generated metadata rendered from header-includes-template.html.\nSuggest improvements at https://github.com/manubot/manubot/blob/master/manubot/process/header-includes-template.html\n-->\n<meta name=\"dc.format\" content=\"text/html\" />\n<meta name=\"dc.date\" content=\"2019-12-19\" />\n<meta name=\"citation_publication_date\" content=\"2019-12-19\" />\n<meta name=\"dc.relation.ispartof\" content=\"Manubot\" />\n<meta name=\"dc.publisher\" content=\"Manubot\" />\n<meta name=\"citation_journal_title\" content=\"Manubot\" />\n<meta name=\"citation_technical_report_institution\" content=\"Manubot\" />\n<meta property=\"og:type\" content=\"article\" />\n<meta property=\"twitter:card\" content=\"summary_large_image\" />\n<link rel=\"icon\" type=\"image/png\" sizes=\"192x192\" href=\"https://manubot.org/favicon-192x192.png\" />\n<link rel=\"mask-icon\" href=\"https://manubot.org/safari-pinned-tab.svg\" color=\"#ad1457\" />\n<meta name=\"theme-color\" content=\"#ad1457\" />\n<!-- end Manubot generated metadata -->"
+  },
+  "manubot": {
+    "date": "December 19, 2019",
+    "authors": [],
+    "manuscript_stats": {
+      "reference_counts": {
+        "total": 0
+      },
+      "word_count": 0
+    }
+  }
+}
diff --git a/build/webpage/v/freeze/index.html b/build/webpage/v/freeze/index.html
new file mode 100644
index 00000000..bff3da63
--- /dev/null
+++ b/build/webpage/v/freeze/index.html
@@ -0,0 +1,19 @@
+<!DOCTYPE HTML>
+<!--
+  HTML template for redirecting from this page to another.
+  Template in python, setting url to the destination URL.
+  Derived from https://stackoverflow.com/a/5411601/4651668.
+-->
+<html lang="en-US">
+  <head>
+    <meta charset="UTF-8">
+    <meta http-equiv="refresh" content="0; url=../local/">
+    <script type="text/javascript">
+      window.location.href = "../local/"
+    </script>
+    <title>Page Redirection</title>
+  </head>
+  <body>
+    If you are not redirected automatically, follow <a href="../local/">this link</a>.
+  </body>
+</html>
diff --git a/build/webpage/v/latest b/build/webpage/v/latest
new file mode 120000
index 00000000..c2c027fe
--- /dev/null
+++ b/build/webpage/v/latest
@@ -0,0 +1 @@
+local
\ No newline at end of file
diff --git a/content/05.treat.md b/content/05.treat.md
index 96db7d25..3c71ba95 100644
--- a/content/05.treat.md
+++ b/content/05.treat.md
@@ -180,28 +180,24 @@ However, in the long term, atomic convolutions may ultimately overtake grid-base
 
 #### *De novo* drug design
 
-*De novo* drug design attempts to model the typical design-synthesize-test cycle of drug discovery [@doi:10.1002/wcms.49; @doi:10.1021/acs.jmedchem.5b01849].
+*De novo* drug design attempts to model the typical design-synthesize-test cycle of drug discovery in-silico [@doi:10.1002/wcms.49; @doi:10.1021/acs.jmedchem.5b01849].
 It explores an estimated 10<sup>60</sup> synthesizable organic molecules with drug-like properties without explicit enumeration [@doi:10.1002/wcms.1104].
-To test or score structures, algorithms like those discussed earlier are used.
+To test or score structures, physics-based simulation could be used, or machine learning models based on techniques discussed may be used, as they are much more computationally efficient.
 To "design" and "synthesize", traditional *de novo* design software relied on classical optimizers such as genetic algorithms.
-Unfortunately, this often leads to overfit, "weird" molecules, which are difficult to synthesize in the lab.
-Current programs have settled on rule-based virtual chemical reactions to generate molecular structures [@doi:10.1021/acs.jmedchem.5b01849].
-Deep learning models that generate realistic, synthesizable molecules have been proposed as an alternative.
-In contrast to the classical, symbolic approaches, generative models learned from data would not depend on laboriously encoded expert knowledge.
-The challenge of generating molecules has parallels to the generation of syntactically and semantically correct text [@arxiv:1308.0850].
-
-As deep learning models that directly output (molecular) graphs remain under-explored, generative neural networks for drug design typically represent chemicals with the simplified molecular-input line-entry system (SMILES), a standard string-based representation with characters that represent atoms, bonds, and rings [@tag:Segler2017_drug_design].
-This allows treating molecules as sequences and leveraging recent progress in recurrent neural networks.
-Gómez-Bombarelli et al. designed a SMILES-to-SMILES autoencoder to learn a continuous latent feature space for chemicals [@tag:Gomezb2016_automatic].
-In this learned continuous space it was possible to interpolate between continuous representations of chemicals in a manner that is not possible with discrete
-(e.g. bit vector or string) features or in symbolic, molecular graph space.
-Even more interesting is the prospect of performing gradient-based or Bayesian optimization of molecules within this latent space.
-The strategy of constructing simple, continuous features before applying supervised learning techniques is reminiscent of autoencoders trained on high-dimensional EHR data [@tag:BeaulieuJones2016_ehr_encode].
+
+In the past few years a large number of techniques for the generative modeling and optimization of molecules with deep learning have been explored, including recursive neural networks, variational autoencoders, generative adversarial networks, and reinforcement learning -- for a review see Elton, et al.[@tag:Elton_molecular_design_review]
+
+Building off the large amount of work that has already gone into text generation,[@arxiv:1308.0850] many generative neural networks for drug design represent chemicals with the simplified molecular-input line-entry system (SMILES), a standard string-based representation with characters that represent atoms, bonds, and rings [@tag:Segler2017_drug_design].
+
+The first successful demonstration of a deep learning based approach for molecular optimization occured in 2016 with the development of a SMILES-to-SMILES autoencoder capable of learning a continuous latent feature space for molecules[@tag:Gomezb2016_automatic].
+In this learned continuous space it is possible to interpolate between molecular structures in a manner that is not possible with discrete
+(e.g. bit vector or string) features or in symbolic, molecular graph space. Even more interesting is that one can perform gradient-based or Bayesian optimization of molecules within this latent space. The strategy of constructing simple, continuous features before applying supervised learning techniques is reminiscent of autoencoders trained on high-dimensional EHR data [@tag:BeaulieuJones2016_ehr_encode].
 A drawback of the SMILES-to-SMILES autoencoder is that not all SMILES strings produced by the autoencoder's decoder correspond to valid chemical structures.
-Recently, the Grammar Variational Autoencoder, which takes the SMILES grammar into account and is guaranteed to produce syntactically valid SMILES, has been proposed to alleviate this issue [@arxiv:1703.01925].
+The Grammar Variational Autoencoder, which takes the SMILES grammar into account and is guaranteed to produce syntactically valid SMILES, helps alleviate this issue to some extent [@arxiv:1703.01925].
 
 Another approach to *de novo* design is to train character-based RNNs on large collections of molecules, for example, ChEMBL [@doi:10.1093/nar/gkr777], to first obtain a generic generative model for drug-like compounds [@tag:Segler2017_drug_design].
-These generative models successfully learn the grammar of compound representations, with 94% [@tag:Olivecrona2017_drug_design] or nearly 98% [@tag:Segler2017_drug_design] of generated SMILES corresponding to valid molecular structures.
-The initial RNN is then fine-tuned to generate molecules that are likely to be active against a specific target by either continuing training on a small set of positive examples [@tag:Segler2017_drug_design] or adopting reinforcement learning strategies [@tag:Olivecrona2017_drug_design; @arxiv:1611.02796].
-Both the fine-tuning and reinforcement learning approaches can rediscover known, held-out active molecules.
-The great flexibility of neural networks, and progress in generative models offers many opportunities for deep architectures in *de novo* design (e.g. the adaptation of GANs for molecules).
+These generative models successfully learn the grammar of compound representations, with 94% [@tag:Olivecrona2017_drug_design] or nearly 98% [@tag:Segler2017_drug_design] of generated SMILES corresponding to valid molecular structures. The initial RNN is then fine-tuned to generate molecules that are likely to be active against a specific target by either continuing training on a small set of positive examples [@tag:Segler2017_drug_design] or adopting reinforcement learning strategies [@tag:Olivecrona2017_drug_design; @arxiv:1611.02796]. Both the fine-tuning and reinforcement learning approaches can rediscover known, held-out active molecules.
+
+Reinforcement learning approaches where operations are performed directly on the molecular graph bypass the need to learn the details of SMILES syntax, allowing the model to focus purely on chemistry. Additionally, they seem to require less training data and generate more valid molecules since they are constrained by design only to graph operations which satisfy chemical valiance rules.[@tag:Elton_molecular_design_review] A reinforcement learning agent developed by Zhou et al. demonstrated superior molecular optimization performance on certain easy to compute metrics when compared with other deep learning based approaches such as the Junction Tree VAE, Objective Reinforced Generative Adversarial Network, and Graph Convolutional Policy Network.[@doi:10.1038/s41598-019-47148-x] As another example, Zhavoronkov et al. used generative tensorial reinforcement learning to discover potent inhibitors of discoidin domain receptor 1 (DDR1).[@tag:Zhavoronkov2019_drugs] Their work is unique in that six lead candidates discovered using their approach were synthesized and tested in the lab, with 4/6 achieving some degree of binding to DDR1.[@tag:Zhavoronkov2019_drugs]  
+
+It is worth pointing out that it has been shown that classical genetic algorithms can compete with many of the most advanced deep learning methods for molecular optimization.[@doi:10.1246/cl.180665; @doi:10.1039/C8SC05372C] Such genetic algorithms use hard coded rules based possible chemical reactions to generate molecular structures [@doi:10.1021/acs.jmedchem.5b01849]. Still, there are many avenues for improving current deep learning systems and the future of the field looks bright.
diff --git a/content/06.discussion.md b/content/06.discussion.md
index 364ff199..910a02c7 100644
--- a/content/06.discussion.md
+++ b/content/06.discussion.md
@@ -6,14 +6,12 @@ Here we examine these factors that may impede further progress, ask what steps h
 ### Customizing deep learning models reflects a tradeoff between bias and variance
 
 Some of the challenges in applying deep learning are shared with other machine learning methods.
-In particular, many problem-specific optimizations described in this review reflect a recurring universal tradeoff---controlling the flexibility of a model in order to maximize predictivity.
-Methods for adjusting the flexibility of deep learning models include dropout, reduced data projections, and transfer learning (described below).
-One way of understanding such model optimizations is that they incorporate external information to limit model flexibility and thereby improve predictions.
-This balance is formally described as a tradeoff between "bias and variance"
+In particular, many problem-specific optimizations described in this review reflect a recurring universal tradeoff---controlling the flexibility of a model in order to maximize generalizability and prevent overfitting.
+Methods for for preventing overfitting in deep learning models include adding regularization terms to the loss, dropout, using reduced data projections, and early stopping.
+The need for balance between model expressiveness and overfitting is formally described as a tradeoff between "bias and variance"
 [@url:http://www.deeplearningbook.org/].
 
-Although the bias-variance tradeoff is common to all machine learning applications, recent empirical and theoretical observations suggest that deep learning models may have uniquely advantageous generalization properties [@tag:Zhang2017_generalization; @tag:Lin2017_why_dl_works].
-Nevertheless, additional advances will be needed to establish a coherent theoretical foundation that enables practitioners to better reason about their models from first principles.
+Although the bias-variance tradeoff is is important to take into account in many machine learning tasks, recent empirical and theoretical observations suggest that deep neural networks have uniquely advantageous generalization properties and do not obey the tradeoff as expected [@tag:Belkin2019_PNAS; @tag:Zhang2017_generalization; @tag:Lin2017_why_dl_works]. According to the bias-variance theory, many of the most successful deep neural networks have so many free parameters they should overfit.[@tag:Belkin2019_PNAS] It has been shown that deep neural networks operate in a regime where they can exactly interpolate their training data yet are still able to generalize.[@tag:Belkin2019_PNAS] Thus, poor generalizability can often be remedied by adding more layers and increasing the number of free parameters, in conflict with the classic bias-variance theory. Additional advances will be needed to establish a coherent theoretical foundation that enables practitioners to better reason about their models from first principles.
 
 #### Evaluation metrics for imbalanced classification
 
@@ -106,18 +104,22 @@ As a result, several opportunities for innovation arise: understanding the cause
 Unfortunately, uncertainty quantification techniques are underutilized in the computational biology communities and largely ignored in the current deep learning for biomedicine literature.
 Thus, the practical value of uncertainty quantification in biomedical domains is yet to be appreciated.
 
-### Interpretation
+### Interpretability
 
-As deep learning models achieve state-of-the-art performance in a variety of domains, there is a growing need to make the models more interpretable.
-Interpretability matters for two main reasons.
-First, a model that achieves breakthrough performance may have identified patterns in the data that practitioners in the field would like to understand.
-However, this would not be possible if the model is a black box.
-Second, interpretability is important for trust.
-If a model is making medical diagnoses, it is important to ensure the model is making decisions for reliable reasons and is not focusing on an artifact of the data.
-A motivating example of this can be found in Caruana et al. [@tag:Caruana2015_intelligible], where a model trained to predict the likelihood of death from pneumonia assigned lower risk to patients with asthma, but only because such patients were treated as higher priority by the hospital.
-In the context of deep learning, understanding the basis of a model's output is particularly important as deep learning models are unusually susceptible to adversarial examples [@tag:Nguyen2014_adversarial] and can output confidence scores over 99.99% for samples that resemble pure noise.
+As deep learning models achieve state-of-the-art performance in a variety of domains, there is a growing need to make the models more interpretable. There are several important reasons to care about interpretability.
 
-As the concept of interpretability is quite broad, many methods described as improving the interpretability of deep learning models take disparate and often complementary approaches.
+Firstly, a model that achieves breakthrough performance may have identified patterns in the data that practitioners in the field would like to understand.
+For instance, interpreting a model for predicting chemical properties from molecular graphs may illuminate previously unknown structure-property relations.
+It is also useful to see if a model is using known relationships - if not, this may suggest a way to improve the model.
+Finally, there is a chance that the model may have learned relationships that are known to be wrong. This can be due to improper training data or due to overfitting on spurious correlations in the training data.
+
+This is particularly important if a model is making medical diagnoses. A motivating example of this can be found in Caruana et al. [@tag:Caruana2015_intelligible], where a model trained to predict the likelihood of death from pneumonia assigned lower risk to patients with asthma, but only because such patients were treated as higher priority by the hospital.
+
+It has been shown that deep learning models are unusually susceptible to carefully crafted adversarial examples [@tag:Nguyen2014_adversarial] and can output confidence scores over 99.99% for samples that resemble pure noise. While this is largely still an unsolved problem, the interpretation of deep learning models can help understand these failure modes and how to prevent them.
+
+Several different levels of interpretability can be distinguished. Consider a prototypical CNN used for image classification. At a high level, one can perform an occulusion or sensitivity analysis to determine what sections of an image are most important for making a classification, generating a "saliency" heatmap. Then, if one wishes to understand what is going on in the layers of the model, several tools have been developed for visualizing the learned feature maps, such as the deconvnet[@tag:Zeiler2013_visualizing]. Finally, if one wishes to analyze the flow of information through a deep neural network layer-wise relevance propagation can be performed to see how  each layer contributes to different classifications.[@tag:Montavon2018_visualization]
+
+A starting point for many discussions of interpretability is the interpretability-accuracy trade-off. The trade-off assumes that only simple models are interpretable and often a delineation is made between “white box" models (linear regression, decision trees) that are assumed to be not very accurate and “black box" models (neural networks, kernel SVMs) which are assumed to be more accurate. This view is becoming outmoded, however with the development of sophisticated tools for interrogating and understanding deep neural networks.[@tag:Montavon2018_visualization; @tag:Zeiler2013_visualizing] Still, this trade-off motivates a common practice whereby a easy to interpret model is trained next to a hard to interpret one. For instance, in the example discussed by Caruana et al. mentioned earlier, a rule-based model was trained next to a neural network using the same training data to understand the types of relations were learned by the neural network. More recently, a method for "distilling" a neural network into a decision tree has been developed.[@tag:Frosst2017_distilling]
 
 #### Assigning example-specific importance scores
 
diff --git a/content/citation-tags.tsv b/content/citation-tags.tsv
index b4eb9efe..ee40470f 100644
--- a/content/citation-tags.tsv
+++ b/content/citation-tags.tsv
@@ -20,6 +20,7 @@ Bar2015_nonmed_tl	doi:10.1117/12.2083124
 Barash2010_splicing_code	doi:10.1038/nature09000
 Baxt1991_myocardial	doi:10.7326/0003-4819-115-11-843
 BeaulieuJones2016_ehr_encode	doi:10.1016/j.jbi.2016.10.007
+Belkin2019_PNAS	doi:10.1073/pnas.1903070116
 Bengio2015_prec	arxiv:1412.7024
 Berezikov2011_mirna	doi:10.1038/nrg3079
 Bergstra2011_hyper	url:https://papers.nips.cc/paper/4443-algorithms-for-hyper-parameter-optimization.pdf
@@ -66,6 +67,7 @@ Duvenaud2015_graph_conv	url:http://papers.nips.cc/paper/5954-convolutional-netwo
 Edwards2015_growing_pains	doi:10.1145/2771283
 Ehran2009_visualizing	url:http://www.iro.umontreal.ca/~lisa/publications2/index.php/publications/show/247
 Elephas	url:https://github.com/maxpumperla/elephas
+Elton_molecular_design_review	doi:10.1039/C9ME00039A
 Errington2014_reproducibility	doi:10.7554/eLife.04333
 Eser2016_fiddle	doi:10.1101/081380
 Esfahani2016_melanoma	doi:10.1109/EMBC.2016.7590963
@@ -76,6 +78,7 @@ Feinberg2018	doi:10.1056/NEJMra1402513
 Finnegan2017_maximum	doi:10.1101/105957
 Fong2017_perturb	doi:10.1109/ICCV.2017.371
 Fraga2005	doi:10.1073/pnas.0500398102
+Frosst2017_distilling	arxiv:1711.09784
 Fu2019	doi:10.1109/TCBB.2019.2909237
 Gal2015_dropout	arxiv:1506.02142
 Gaublomme2015_th17	doi:10.1016/j.cell.2015.11.009
@@ -184,6 +187,7 @@ Meissner2008	doi:10.1038/nature07107
 Metaphlan	doi:10.1038/nmeth.2066
 Meng2016_mllib	arxiv:1505.06807
 Min2016_deepenhancer	doi:10.1109/BIBM.2016.7822593
+Montavon2018_visualization	doi:10.1016/j.dsp.2017.10.011
 Momeni2018	doi:10.1101/438341
 Moritz2015_sparknet	arxiv:1511.06051
 Mordvintsev2015_inceptionism	url:http://googleresearch.blogspot.co.uk/2015/06/inceptionism-going-deeper-into-neural.html
@@ -310,7 +314,8 @@ Yoon2016_cancer_reports	doi:10.1007/978-3-319-47898-2_21
 Yosinski2014	url:https://papers.nips.cc/paper/5347-how-transferable-are-features-in-deep-neural-networks
 Yosinksi2015_understanding	arxiv:1506.06579
 Yu2016_melanoma_resnet	doi:10.1109/TMI.2016.2642839
-Zeiler2013_visualizing	arxiv:1311.2901
+Zhavoronkov2019_drugs	doi:10.1038/s41587-019-0224-x
+Zeiler2013_visualizing	doi:10.1007/978-3-319-10590-1_53
 Zeng2015	doi:10.1186/s12859-015-0553-9
 Zeng2016_convolutional	doi:10.1093/bioinformatics/btw255
 Zhang2015_multitask_tl	doi:10.1145/2783258.2783304

From e7f6ca6ee794d700f8cdc8d1bdca2f8c3982583b Mon Sep 17 00:00:00 2001
From: Casey Greene <greenescientist@gmail.com>
Date: Mon, 10 Feb 2020 15:00:15 -0500
Subject: [PATCH 2/6] remove build files

---
 build/ci/cache/requests-cache.sqlite | Bin 20480 -> 0 bytes
 build/output/citations.tsv           |   1 -
 build/output/manuscript.md           |  38 ---------------------------
 build/output/references.json         |   1 -
 build/output/variables.json          |  17 ------------
 build/webpage/v/freeze/index.html    |  19 --------------
 build/webpage/v/latest               |   1 -
 7 files changed, 77 deletions(-)
 delete mode 100644 build/ci/cache/requests-cache.sqlite
 delete mode 100644 build/output/citations.tsv
 delete mode 100644 build/output/manuscript.md
 delete mode 100644 build/output/references.json
 delete mode 100644 build/output/variables.json
 delete mode 100644 build/webpage/v/freeze/index.html
 delete mode 120000 build/webpage/v/latest

diff --git a/build/ci/cache/requests-cache.sqlite b/build/ci/cache/requests-cache.sqlite
deleted file mode 100644
index 5f5d7c8e968a7214f2eb1395eb7369bddc7dc5be..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 20480
zcmeI%K}*9h7=YoV>xvUbcPTwCHx)!E9(UQ49%K`B?y^%?F~YE}y0)S}a6ibKN1MWm
z9(wdr-ayhXA?-(=^b&G^H(TdgJ*C^1I9KOlAfyyyrGyYxS-WN37t^=*o$@Jv#Z93l
z&PHE-_gmO*Pq?|eb9?TzX)@yoAb<b@2q1s}0tg_000Id7yTG{P_?{<q9zP_y*e02&
z_T@aBEJC%IT+BkXGD9nM{H#CK?fmL`GGD4`xI9(waZ>1$Bir#uo)qiNqyEU=lJZ;~
z#YLW0{m8r<1*YxZNXH)zWm=`$>g+Y$WIC$@>u=I}yg74VIes{7%3OaS3hJ;^dL9_l
zzG()gp&)<&0tg_000IagfB*srAb>zy1rE)^IRCeGds!C(2q1s}0tg_000IagfB*uO
z0Qdj=2nZm600IagfB*srAb<b@2(({-`~UXuF)Ks>0R#|0009ILKmY**5J2Du^0!$7

diff --git a/build/output/citations.tsv b/build/output/citations.tsv
deleted file mode 100644
index e4349893..00000000
--- a/build/output/citations.tsv
+++ /dev/null
@@ -1 +0,0 @@
-manuscript_citekey	detagged_citekey	standard_citekey	short_citekey
diff --git a/build/output/manuscript.md b/build/output/manuscript.md
deleted file mode 100644
index df0d610a..00000000
--- a/build/output/manuscript.md
+++ /dev/null
@@ -1,38 +0,0 @@
----
-author-meta: []
-date-meta: '2019-12-19'
-header-includes: '<!--
-
-  Manubot generated metadata rendered from header-includes-template.html.
-
-  Suggest improvements at https://github.com/manubot/manubot/blob/master/manubot/process/header-includes-template.html
-
-  -->
-
-  <meta name="dc.format" content="text/html" />
-
-  <meta name="dc.date" content="2019-12-19" />
-
-  <meta name="citation_publication_date" content="2019-12-19" />
-
-  <meta name="dc.relation.ispartof" content="Manubot" />
-
-  <meta name="dc.publisher" content="Manubot" />
-
-  <meta name="citation_journal_title" content="Manubot" />
-
-  <meta name="citation_technical_report_institution" content="Manubot" />
-
-  <meta property="og:type" content="article" />
-
-  <meta property="twitter:card" content="summary_large_image" />
-
-  <link rel="icon" type="image/png" sizes="192x192" href="https://manubot.org/favicon-192x192.png" />
-
-  <link rel="mask-icon" href="https://manubot.org/safari-pinned-tab.svg" color="#ad1457" />
-
-  <meta name="theme-color" content="#ad1457" />
-
-  <!-- end Manubot generated metadata -->'
-...
-
diff --git a/build/output/references.json b/build/output/references.json
deleted file mode 100644
index fe51488c..00000000
--- a/build/output/references.json
+++ /dev/null
@@ -1 +0,0 @@
-[]
diff --git a/build/output/variables.json b/build/output/variables.json
deleted file mode 100644
index 88840dd9..00000000
--- a/build/output/variables.json
+++ /dev/null
@@ -1,17 +0,0 @@
-{
-  "pandoc": {
-    "date-meta": "2019-12-19",
-    "author-meta": [],
-    "header-includes": "<!--\nManubot generated metadata rendered from header-includes-template.html.\nSuggest improvements at https://github.com/manubot/manubot/blob/master/manubot/process/header-includes-template.html\n-->\n<meta name=\"dc.format\" content=\"text/html\" />\n<meta name=\"dc.date\" content=\"2019-12-19\" />\n<meta name=\"citation_publication_date\" content=\"2019-12-19\" />\n<meta name=\"dc.relation.ispartof\" content=\"Manubot\" />\n<meta name=\"dc.publisher\" content=\"Manubot\" />\n<meta name=\"citation_journal_title\" content=\"Manubot\" />\n<meta name=\"citation_technical_report_institution\" content=\"Manubot\" />\n<meta property=\"og:type\" content=\"article\" />\n<meta property=\"twitter:card\" content=\"summary_large_image\" />\n<link rel=\"icon\" type=\"image/png\" sizes=\"192x192\" href=\"https://manubot.org/favicon-192x192.png\" />\n<link rel=\"mask-icon\" href=\"https://manubot.org/safari-pinned-tab.svg\" color=\"#ad1457\" />\n<meta name=\"theme-color\" content=\"#ad1457\" />\n<!-- end Manubot generated metadata -->"
-  },
-  "manubot": {
-    "date": "December 19, 2019",
-    "authors": [],
-    "manuscript_stats": {
-      "reference_counts": {
-        "total": 0
-      },
-      "word_count": 0
-    }
-  }
-}
diff --git a/build/webpage/v/freeze/index.html b/build/webpage/v/freeze/index.html
deleted file mode 100644
index bff3da63..00000000
--- a/build/webpage/v/freeze/index.html
+++ /dev/null
@@ -1,19 +0,0 @@
-<!DOCTYPE HTML>
-<!--
-  HTML template for redirecting from this page to another.
-  Template in python, setting url to the destination URL.
-  Derived from https://stackoverflow.com/a/5411601/4651668.
--->
-<html lang="en-US">
-  <head>
-    <meta charset="UTF-8">
-    <meta http-equiv="refresh" content="0; url=../local/">
-    <script type="text/javascript">
-      window.location.href = "../local/"
-    </script>
-    <title>Page Redirection</title>
-  </head>
-  <body>
-    If you are not redirected automatically, follow <a href="../local/">this link</a>.
-  </body>
-</html>
diff --git a/build/webpage/v/latest b/build/webpage/v/latest
deleted file mode 120000
index c2c027fe..00000000
--- a/build/webpage/v/latest
+++ /dev/null
@@ -1 +0,0 @@
-local
\ No newline at end of file

From 4e53c22c4566fb4dd8160a80cedea8806aa35450 Mon Sep 17 00:00:00 2001
From: Daniel Elton <delton17@gmail.com>
Date: Fri, 14 Feb 2020 18:11:32 -0500
Subject: [PATCH 3/6] rehash/update my previous commit - single lines and other
 fixes

---
 content/05.treat.md       | 34 +++++++++++++++++++--------
 content/06.discussion.md  | 48 +++++++++++++++++++++++++++------------
 content/citation-tags.tsv |  7 ++++++
 3 files changed, 65 insertions(+), 24 deletions(-)

diff --git a/content/05.treat.md b/content/05.treat.md
index 3c71ba95..217b6c71 100644
--- a/content/05.treat.md
+++ b/content/05.treat.md
@@ -182,22 +182,36 @@ However, in the long term, atomic convolutions may ultimately overtake grid-base
 
 *De novo* drug design attempts to model the typical design-synthesize-test cycle of drug discovery in-silico [@doi:10.1002/wcms.49; @doi:10.1021/acs.jmedchem.5b01849].
 It explores an estimated 10<sup>60</sup> synthesizable organic molecules with drug-like properties without explicit enumeration [@doi:10.1002/wcms.1104].
-To test or score structures, physics-based simulation could be used, or machine learning models based on techniques discussed may be used, as they are much more computationally efficient.
+To score molecules after generation or during optimization, physics-based simulation could be used [@tag:Sumita2018], but machine learning models based on techniques discussed earlier may be preferable [@tag:Gomezb2016_automatic], as they are much more computationally expedient. Computationally efficiency is particularly important during optimization as the "scoring function" may need to be called thousands of times.
+
 To "design" and "synthesize", traditional *de novo* design software relied on classical optimizers such as genetic algorithms.
+These approaches can lead to overfit, "weird" molecules, which are difficult to synthesize in the lab.
+A popular approach which may help ensure synthesizability is to use rule-based virtual chemical reactions to generate molecular structures [@doi:10.1021/acs.jmedchem.5b01849].
+Deep learning models that generate realistic, synthesizable molecules have been proposed as an alternative.
+In contrast to the classical, symbolic approaches, generative models learned from data would not depend on laboriously encoded expert knowledge.
 
-In the past few years a large number of techniques for the generative modeling and optimization of molecules with deep learning have been explored, including recursive neural networks, variational autoencoders, generative adversarial networks, and reinforcement learning -- for a review see Elton, et al.[@tag:Elton_molecular_design_review]
+In the past few years a large number of techniques for the generative modeling and optimization of molecules with deep learning have been explored, including recursive neural networks, variational autoencoders, generative adversarial networks, and reinforcement learning -- for a review see Elton, et al.[@tag:Elton_molecular_design_review] or Vamathevan et al.[@tagVamathevan2019].
 
 Building off the large amount of work that has already gone into text generation,[@arxiv:1308.0850] many generative neural networks for drug design represent chemicals with the simplified molecular-input line-entry system (SMILES), a standard string-based representation with characters that represent atoms, bonds, and rings [@tag:Segler2017_drug_design].
 
-The first successful demonstration of a deep learning based approach for molecular optimization occured in 2016 with the development of a SMILES-to-SMILES autoencoder capable of learning a continuous latent feature space for molecules[@tag:Gomezb2016_automatic].
-In this learned continuous space it is possible to interpolate between molecular structures in a manner that is not possible with discrete
-(e.g. bit vector or string) features or in symbolic, molecular graph space. Even more interesting is that one can perform gradient-based or Bayesian optimization of molecules within this latent space. The strategy of constructing simple, continuous features before applying supervised learning techniques is reminiscent of autoencoders trained on high-dimensional EHR data [@tag:BeaulieuJones2016_ehr_encode].
+The first successful demonstration of a deep learning based approach for molecular optimization occurred in 2016 with the development of a SMILES-to-SMILES autoencoder capable of learning a continuous latent feature space for molecules[@tag:Gomezb2016_automatic].
+In this learned continuous space it is possible to interpolate between molecular structures in a manner that is not possible with discrete (e.g. bit vector or string) features or in symbolic, molecular graph space.
+Even more interesting is that one can perform gradient-based or Bayesian optimization of molecules within this latent space.
+The strategy of constructing simple, continuous features before applying supervised learning techniques is reminiscent of autoencoders trained on high-dimensional EHR data [@tag:BeaulieuJones2016_ehr_encode].
 A drawback of the SMILES-to-SMILES autoencoder is that not all SMILES strings produced by the autoencoder's decoder correspond to valid chemical structures.
 The Grammar Variational Autoencoder, which takes the SMILES grammar into account and is guaranteed to produce syntactically valid SMILES, helps alleviate this issue to some extent [@arxiv:1703.01925].
 
 Another approach to *de novo* design is to train character-based RNNs on large collections of molecules, for example, ChEMBL [@doi:10.1093/nar/gkr777], to first obtain a generic generative model for drug-like compounds [@tag:Segler2017_drug_design].
-These generative models successfully learn the grammar of compound representations, with 94% [@tag:Olivecrona2017_drug_design] or nearly 98% [@tag:Segler2017_drug_design] of generated SMILES corresponding to valid molecular structures. The initial RNN is then fine-tuned to generate molecules that are likely to be active against a specific target by either continuing training on a small set of positive examples [@tag:Segler2017_drug_design] or adopting reinforcement learning strategies [@tag:Olivecrona2017_drug_design; @arxiv:1611.02796]. Both the fine-tuning and reinforcement learning approaches can rediscover known, held-out active molecules.
-
-Reinforcement learning approaches where operations are performed directly on the molecular graph bypass the need to learn the details of SMILES syntax, allowing the model to focus purely on chemistry. Additionally, they seem to require less training data and generate more valid molecules since they are constrained by design only to graph operations which satisfy chemical valiance rules.[@tag:Elton_molecular_design_review] A reinforcement learning agent developed by Zhou et al. demonstrated superior molecular optimization performance on certain easy to compute metrics when compared with other deep learning based approaches such as the Junction Tree VAE, Objective Reinforced Generative Adversarial Network, and Graph Convolutional Policy Network.[@doi:10.1038/s41598-019-47148-x] As another example, Zhavoronkov et al. used generative tensorial reinforcement learning to discover potent inhibitors of discoidin domain receptor 1 (DDR1).[@tag:Zhavoronkov2019_drugs] Their work is unique in that six lead candidates discovered using their approach were synthesized and tested in the lab, with 4/6 achieving some degree of binding to DDR1.[@tag:Zhavoronkov2019_drugs]  
-
-It is worth pointing out that it has been shown that classical genetic algorithms can compete with many of the most advanced deep learning methods for molecular optimization.[@doi:10.1246/cl.180665; @doi:10.1039/C8SC05372C] Such genetic algorithms use hard coded rules based possible chemical reactions to generate molecular structures [@doi:10.1021/acs.jmedchem.5b01849]. Still, there are many avenues for improving current deep learning systems and the future of the field looks bright.
+These generative models successfully learn the grammar of compound representations, with 94% [@tag:Olivecrona2017_drug_design] or nearly 98% [@tag:Segler2017_drug_design] of generated SMILES corresponding to valid molecular structures.
+The initial RNN is then fine-tuned to generate molecules that are likely to be active against a specific target by either continuing training on a small set of positive examples [@tag:Segler2017_drug_design] or adopting reinforcement learning strategies [@tag:Olivecrona2017_drug_design; @arxiv:1611.02796].
+Both the fine-tuning and reinforcement learning approaches can rediscover known, held-out active molecules.
+
+Reinforcement learning approaches where operations are performed directly on the molecular graph bypass the need to learn the details of SMILES syntax, allowing the model to focus purely on chemistry.
+Additionally, they seem to require less training data and generate more valid molecules since they are constrained by design only to graph operations which satisfy chemical valiance rules.[@tag:Elton_molecular_design_review]
+A reinforcement learning agent developed by Zhou et al. demonstrated superior molecular optimization performance on certain easy to compute metrics when compared with other deep learning based approaches such as the Junction Tree VAE, Objective Reinforced Generative Adversarial Network, and Graph Convolutional Policy Network [@doi:10.1038/s41598-019-47148-x].
+As another example, Zhavoronkov et al. used generative tensorial reinforcement learning to discover potent inhibitors of discoidin domain receptor 1 (DDR1) [@tag:Zhavoronkov2019_drugs].
+Their work is unique in that six lead candidates discovered using their approach were synthesized and tested in the lab, with 4/6 achieving some degree of binding to DDR1 [@tag:Zhavoronkov2019_drugs].
+
+In concluding this section, it is worth pointing out that it has been shown that classical genetic algorithms can compete with some of the most advanced deep learning methods for molecular optimization [@doi:10.1246/cl.180665; @doi:10.1039/C8SC05372C].
+Such genetic algorithms use hard coded rules based possible chemical reactions to generate molecular structures [@doi:10.1021/acs.jmedchem.5b01849].
+Still, there are many avenues for improving current deep learning systems and the future of the field looks bright.
diff --git a/content/06.discussion.md b/content/06.discussion.md
index 910a02c7..65159052 100644
--- a/content/06.discussion.md
+++ b/content/06.discussion.md
@@ -3,15 +3,21 @@
 Despite the disparate types of data and scientific goals in the learning tasks covered above, several challenges are broadly important for deep learning in the biomedical domain.
 Here we examine these factors that may impede further progress, ask what steps have already been taken to overcome them, and suggest future research directions.
 
-### Customizing deep learning models reflects a tradeoff between bias and variance
+### Preventing overfitting via hyperparameter tuning
 
-Some of the challenges in applying deep learning are shared with other machine learning methods.
-In particular, many problem-specific optimizations described in this review reflect a recurring universal tradeoff---controlling the flexibility of a model in order to maximize generalizability and prevent overfitting.
-Methods for for preventing overfitting in deep learning models include adding regularization terms to the loss, dropout, using reduced data projections, and early stopping.
-The need for balance between model expressiveness and overfitting is formally described as a tradeoff between "bias and variance"
-[@url:http://www.deeplearningbook.org/].
+Overfitting is one of the most common problems in machine learning which all practioners must learn to grapple with.
+In the classical theory of statistical learning and model fitting, there is a trade-off between "bias and variance" [@url:http://www.deeplearningbook.org/].
+Increasing the capacity of the model (by adding more layers) reduces bias but can increase variance, indicating overfitting.
+According to this theory, one way of reducing overfitting and increasing generalization performance is to reduce the capacity of the model.  
+Other methods for for preventing overfitting include adding regularization terms to the loss, using dropout, using reduced data projections, and early stopping. Each of these methods involves hyperparameters which must be tuned.
 
-Although the bias-variance tradeoff is is important to take into account in many machine learning tasks, recent empirical and theoretical observations suggest that deep neural networks have uniquely advantageous generalization properties and do not obey the tradeoff as expected [@tag:Belkin2019_PNAS; @tag:Zhang2017_generalization; @tag:Lin2017_why_dl_works]. According to the bias-variance theory, many of the most successful deep neural networks have so many free parameters they should overfit.[@tag:Belkin2019_PNAS] It has been shown that deep neural networks operate in a regime where they can exactly interpolate their training data yet are still able to generalize.[@tag:Belkin2019_PNAS] Thus, poor generalizability can often be remedied by adding more layers and increasing the number of free parameters, in conflict with the classic bias-variance theory. Additional advances will be needed to establish a coherent theoretical foundation that enables practitioners to better reason about their models from first principles.
+Although the bias-variance trade-off is is important to take into account with many classical machine learning models, recent empirical and theoretical observations suggest that deep neural networks in particular do not the tradeoff as expected [@tag:Belkin2019_PNAS; @tag:Zhang2017_generalization; @tag:Lin2017_why_dl_works].
+It has been demonstrated that poor generalizability (test error) can often be remedied by adding more layers and increasing the number of free parameters, in conflict with the classic bias-variance theory.
+This phenomena, known as "double descent" indicates that deep neural networks achieve their best performance when they smoothly interpolate training data - resulting in near zero training error [@tag:Belkin2019_PNAS].
+
+To optimize neural networks, hyperparaters must be tuned to yield the network with the best test error.
+This is computationally expensive and often not done, however it is important to do when making claims about the superiority of one machine learning method vs. another.
+Several examples have now been uncovered where a new method said to be superior to a baseline method (like an LSTM) after sufficient hyperparameter tuning [@tag:Sculley2018].
 
 #### Evaluation metrics for imbalanced classification
 
@@ -106,20 +112,33 @@ Thus, the practical value of uncertainty quantification in biomedical domains is
 
 ### Interpretability
 
-As deep learning models achieve state-of-the-art performance in a variety of domains, there is a growing need to make the models more interpretable. There are several important reasons to care about interpretability.
+As deep learning models achieve state-of-the-art performance in a variety of domains, there is a growing need to develop methods for interpreting how they function.
+There are several important reasons one might be interested in interpretability, which is also called "explainability".
 
 Firstly, a model that achieves breakthrough performance may have identified patterns in the data that practitioners in the field would like to understand.
 For instance, interpreting a model for predicting chemical properties from molecular graphs may illuminate previously unknown structure-property relations.
 It is also useful to see if a model is using known relationships - if not, this may suggest a way to improve the model.
-Finally, there is a chance that the model may have learned relationships that are known to be wrong. This can be due to improper training data or due to overfitting on spurious correlations in the training data.
+Finally, there is a chance that the model may have learned relationships that are known to be wrong.
+This can be due to improper training data or due to overfitting on spurious correlations in the training data.
 
-This is particularly important if a model is making medical diagnoses. A motivating example of this can be found in Caruana et al. [@tag:Caruana2015_intelligible], where a model trained to predict the likelihood of death from pneumonia assigned lower risk to patients with asthma, but only because such patients were treated as higher priority by the hospital.
+This is particularly important if a model is making medical diagnoses.
+A motivating example of this can be found in Caruana et al. [@tag:Caruana2015_intelligible], where a model trained to predict the likelihood of death from pneumonia assigned lower risk to patients with asthma, but only because such patients were treated as higher priority by the hospital.
 
-It has been shown that deep learning models are unusually susceptible to carefully crafted adversarial examples [@tag:Nguyen2014_adversarial] and can output confidence scores over 99.99% for samples that resemble pure noise. While this is largely still an unsolved problem, the interpretation of deep learning models can help understand these failure modes and how to prevent them.
+It has been shown that deep learning models are unusually susceptible to carefully crafted adversarial examples [@tag:Nguyen2014_adversarial] and can output confidence scores over 99.99% for samples that resemble pure noise.
+While this is largely still an unsolved problem, the interpretation of deep learning models may help understand these failure modes and how to prevent them.
 
-Several different levels of interpretability can be distinguished. Consider a prototypical CNN used for image classification. At a high level, one can perform an occulusion or sensitivity analysis to determine what sections of an image are most important for making a classification, generating a "saliency" heatmap. Then, if one wishes to understand what is going on in the layers of the model, several tools have been developed for visualizing the learned feature maps, such as the deconvnet[@tag:Zeiler2013_visualizing]. Finally, if one wishes to analyze the flow of information through a deep neural network layer-wise relevance propagation can be performed to see how  each layer contributes to different classifications.[@tag:Montavon2018_visualization]
+Several different levels of interpretability can be distinguished.
+Consider a prototypical CNN used for image classification.
+At a high level, one can perform an occlusion or sensitivity analysis to determine what sections of an image are most important for making a classification, generating a "saliency" heatmap.
+Then, if one wishes to understand what is going on in the layers of the model, several tools have been developed for visualizing the learned feature maps, such as the deconvnet[@tag:Zeiler2013_visualizing].
+Finally, if one wishes to analyze the flow of information through a deep neural network layer-wise relevance propagation can be performed to see how  each layer contributes to different classifications.[@tag:Montavon2018_visualization]
 
-A starting point for many discussions of interpretability is the interpretability-accuracy trade-off. The trade-off assumes that only simple models are interpretable and often a delineation is made between “white box" models (linear regression, decision trees) that are assumed to be not very accurate and “black box" models (neural networks, kernel SVMs) which are assumed to be more accurate. This view is becoming outmoded, however with the development of sophisticated tools for interrogating and understanding deep neural networks.[@tag:Montavon2018_visualization; @tag:Zeiler2013_visualizing] Still, this trade-off motivates a common practice whereby a easy to interpret model is trained next to a hard to interpret one. For instance, in the example discussed by Caruana et al. mentioned earlier, a rule-based model was trained next to a neural network using the same training data to understand the types of relations were learned by the neural network. More recently, a method for "distilling" a neural network into a decision tree has been developed.[@tag:Frosst2017_distilling]
+A starting point for many discussions of interpretability is the interpretability-accuracy trade-off.
+The trade-off assumes that only simple models are interpretable and often a delineation is made between “white box" models (linear regression, decision trees) that are assumed to be not very accurate and “black box" models (neural networks, kernel SVMs) which are assumed to be more accurate.
+This view is becoming outmoded, however with the development of sophisticated tools for interrogating and understanding deep neural networks, [@tag:Montavon2018_visualization; @tag:Zeiler2013_visualizing] and new methods for creating highly accurate interpretable models [@tag:Rudin2019].
+Still, this trade-off motivates a common practice whereby a easy to interpret model is trained next to a hard to interpret one, which is sometimes called "post-hoc interpretation".
+For instance, in the example discussed by Caruana et al. mentioned earlier, a rule-based model was trained next to a neural network using the same training data to understand the types of relations which may have been learned by the neural network.
+Along similar lines, a method for "distilling" a neural network into a decision tree has been developed.[@tag:Frosst2017_distilling]
 
 #### Assigning example-specific importance scores
 
@@ -221,7 +240,8 @@ Towards this end, Che et al. [@tag:Che2015_distill] used gradient boosted trees
 
 Finally, it is sometimes possible to train the model to provide justifications for its predictions.
 Lei et al. [@tag:Lei2016_rationalizing] used a generator to identify "rationales", which are short and coherent pieces of the input text that produce similar results to the whole input when passed through an encoder.
-The authors applied their approach to a sentiment analysis task and obtained substantially superior results compared to an attention-based method.
+Shen et al. [@tag:Shen2019] trained a CNN for lung nodule malignancy classification which also provides a series of attributes for the nodule, which they argue help understand how the network functions.
+These are both simple examples of an emerging approach towards engendering trust in AI systems which Elton calls "self-explaining AI" [@tag:Elton2020].
 
 #### Future outlook
 
diff --git a/content/citation-tags.tsv b/content/citation-tags.tsv
index ee40470f..74e7e90b 100644
--- a/content/citation-tags.tsv
+++ b/content/citation-tags.tsv
@@ -68,6 +68,7 @@ Edwards2015_growing_pains	doi:10.1145/2771283
 Ehran2009_visualizing	url:http://www.iro.umontreal.ca/~lisa/publications2/index.php/publications/show/247
 Elephas	url:https://github.com/maxpumperla/elephas
 Elton_molecular_design_review	doi:10.1039/C9ME00039A
+Elton2020 arxiv:2002.05149
 Errington2014_reproducibility	doi:10.7554/eLife.04333
 Eser2016_fiddle	doi:10.1101/081380
 Esfahani2016_melanoma	doi:10.1109/EMBC.2016.7590963
@@ -195,6 +196,7 @@ Mrzelj	url:https://repozitorij.uni-lj.si/IzpisGradiva.php?id=85515
 matis	doi:10.1016/S0097-8485(96)80015-5
 nbc	doi:10.1093/bioinformatics/btq619
 Murdoch2017_automatic	arxiv:1702.02540
+Murdoch2019 doi:10.1073/pnas.1900654116
 Nazor2012	doi:10.1016/j.stem.2012.02.013
 Nemati2016_rl	doi:10.1109/EMBC.2016.7591355
 Ni2018	doi:10.1101/385849
@@ -237,6 +239,7 @@ Rogers2010_fingerprints	doi:10.1021/ci100050t
 Roth2015_view_agg_cad	doi:10.1109/TMI.2015.2482920
 Romero2017_diet	url:https://openreview.net/pdf?id=Sk-oDY9ge
 Rosenberg2015_synthetic_seqs	doi:10.1016/j.cell.2015.09.054
+Rudin2019 doi:10.1038/s42256-019-0048-x
 Russakovsky2015_imagenet	doi:10.1007/s11263-015-0816-y
 Sa2015_buckwild	pmcid:PMC4907892
 Salas2018_GR	doi:10.1101/gr.233213.117
@@ -245,6 +248,7 @@ Salzberg	doi:10.1186/1471-2105-11-544
 Schatz2010_dna_cloud	doi:10.1038/nbt0710-691
 Schmidhuber2014_dnn_overview	doi:10.1016/j.neunet.2014.09.003
 Scotti2016_missplicing	doi:10.1038/nrg.2015.3
+Sculley2018 url:https://openreview.net/pdf?id=rJWF0Fywf
 Segata	doi:10.1371/journal.pcbi.1004977
 Segler2017_drug_design	arxiv:1701.01329
 Seide2014_parallel	doi:10.1109/ICASSP.2014.6853593
@@ -254,6 +258,7 @@ Serden	doi:10.1016/S0168-8510(02)00208-7
 Shaham2016_batch_effects	doi:10.1093/bioinformatics/btx196
 Shapely	doi:10.1515/9781400881970-018
 Shen2017_medimg_review	doi:10.1146/annurev-bioeng-071516-044442
+Shen2019 doi:10.1016/j.eswa.2019.01.048
 Shin2016_cad_tl	doi:10.1109/TMI.2016.2528162
 Shrikumar2017_learning	arxiv:1704.02685
 Shrikumar2017_reversecomplement	doi:10.1101/103663
@@ -276,6 +281,7 @@ Su2015_gpu	arxiv:1507.01239
 Subramanian2016_bace1	doi:10.1021/acs.jcim.6b00290
 Sun2016_ensemble	arxiv:1606.00575
 Sundararajan2017_axiomatic	arxiv:1703.01365
+Sumita2018 doi:10.1021/acscentsci.8b00213
 Sutskever	arxiv:1409.3215
 Swamidass2009_irv	doi:10.1021/ci8004379
 Tan2014_psb	doi:10.1142/9789814644730_0014
@@ -291,6 +297,7 @@ Torracinta2016_sim	doi:10.1101/079087
 Tu1996_anns	doi:10.1016/S0895-4356(96)00002-9
 Unterthiner2014_screening	url:http://www.bioinf.at/publications/2014/NIPS2014a.pdf
 Vanhoucke2011_cpu	url:https://research.google.com/pubs/pub37631.html
+Vamathevan2019 doi:10.1038/s41573-019-0024-5
 Vera2016_sc_analysis	doi:10.1146/annurev-genet-120215-034854
 Vervier	doi:10.1093/bioinformatics/btv683
 Wallach2015_atom_net	arxiv:1510.02855

From 721829f15b2c0d26aa5787f46910d0d64e280ad6 Mon Sep 17 00:00:00 2001
From: Daniel Elton <delton17@gmail.com>
Date: Fri, 14 Feb 2020 18:16:14 -0500
Subject: [PATCH 4/6] rehash/update my previous commit - single lines and other
 fixes

---
 content/06.discussion.md | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/content/06.discussion.md b/content/06.discussion.md
index 65159052..ec42462b 100644
--- a/content/06.discussion.md
+++ b/content/06.discussion.md
@@ -17,7 +17,8 @@ This phenomena, known as "double descent" indicates that deep neural networks ac
 
 To optimize neural networks, hyperparaters must be tuned to yield the network with the best test error.
 This is computationally expensive and often not done, however it is important to do when making claims about the superiority of one machine learning method vs. another.
-Several examples have now been uncovered where a new method said to be superior to a baseline method (like an LSTM) after sufficient hyperparameter tuning [@tag:Sculley2018].
+Several examples have now been uncovered where a new method was said to be superior to a baseline method (like an LSTM or vanilla CNN) but later it was found that the difference went away after sufficient hyperparameter tuning [@tag:Sculley2018].
+A related practice which should be more widely adopted is to perform "ablation studies", where parts of a network are removed and the network is retrained, as this helps with understanding the importance of different components, including any novel ones [@tag:Sculley2018]. 
 
 #### Evaluation metrics for imbalanced classification
 

From 6f0e60949df2493ef965835b0a683b18b82b6ee1 Mon Sep 17 00:00:00 2001
From: Daniel Elton <delton17@gmail.com>
Date: Fri, 14 Feb 2020 18:17:29 -0500
Subject: [PATCH 5/6] rehash/update my previous commit - single lines and other
 fixes

---
 content/06.discussion.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/content/06.discussion.md b/content/06.discussion.md
index ec42462b..76248872 100644
--- a/content/06.discussion.md
+++ b/content/06.discussion.md
@@ -3,7 +3,7 @@
 Despite the disparate types of data and scientific goals in the learning tasks covered above, several challenges are broadly important for deep learning in the biomedical domain.
 Here we examine these factors that may impede further progress, ask what steps have already been taken to overcome them, and suggest future research directions.
 
-### Preventing overfitting via hyperparameter tuning
+### Preventing overfitting and hyperparameter tuning
 
 Overfitting is one of the most common problems in machine learning which all practioners must learn to grapple with.
 In the classical theory of statistical learning and model fitting, there is a trade-off between "bias and variance" [@url:http://www.deeplearningbook.org/].
@@ -18,7 +18,7 @@ This phenomena, known as "double descent" indicates that deep neural networks ac
 To optimize neural networks, hyperparaters must be tuned to yield the network with the best test error.
 This is computationally expensive and often not done, however it is important to do when making claims about the superiority of one machine learning method vs. another.
 Several examples have now been uncovered where a new method was said to be superior to a baseline method (like an LSTM or vanilla CNN) but later it was found that the difference went away after sufficient hyperparameter tuning [@tag:Sculley2018].
-A related practice which should be more widely adopted is to perform "ablation studies", where parts of a network are removed and the network is retrained, as this helps with understanding the importance of different components, including any novel ones [@tag:Sculley2018]. 
+A related practice which should be more widely adopted is to perform "ablation studies", where parts of a network are removed and the network is retrained, as this helps with understanding the importance of different components, including any novel ones [@tag:Sculley2018].
 
 #### Evaluation metrics for imbalanced classification
 

From e18d9397bcc069c1e3395a771f3fb971f729b58a Mon Sep 17 00:00:00 2001
From: Daniel Elton <delton17@gmail.com>
Date: Fri, 14 Feb 2020 18:37:55 -0500
Subject: [PATCH 6/6] rehash/update my previous commit - single lines and other
 fixes

---
 content/05.treat.md            |  2 +-
 content/06.discussion.md       |  5 +++--
 content/citation-tags.tsv      | 14 ++++++------
 content/manual-references.json | 41 ++++++++++++++++++++++++++++++++++
 4 files changed, 52 insertions(+), 10 deletions(-)

diff --git a/content/05.treat.md b/content/05.treat.md
index 217b6c71..164c3c74 100644
--- a/content/05.treat.md
+++ b/content/05.treat.md
@@ -190,7 +190,7 @@ A popular approach which may help ensure synthesizability is to use rule-based v
 Deep learning models that generate realistic, synthesizable molecules have been proposed as an alternative.
 In contrast to the classical, symbolic approaches, generative models learned from data would not depend on laboriously encoded expert knowledge.
 
-In the past few years a large number of techniques for the generative modeling and optimization of molecules with deep learning have been explored, including recursive neural networks, variational autoencoders, generative adversarial networks, and reinforcement learning -- for a review see Elton, et al.[@tag:Elton_molecular_design_review] or Vamathevan et al.[@tagVamathevan2019].
+In the past few years a large number of techniques for the generative modeling and optimization of molecules with deep learning have been explored, including recursive neural networks, variational autoencoders, generative adversarial networks, and reinforcement learning -- for a review see Elton, et al.[@tag:Elton_molecular_design_review] or Vamathevan et al.[@tag:Vamathevan2019].
 
 Building off the large amount of work that has already gone into text generation,[@arxiv:1308.0850] many generative neural networks for drug design represent chemicals with the simplified molecular-input line-entry system (SMILES), a standard string-based representation with characters that represent atoms, bonds, and rings [@tag:Segler2017_drug_design].
 
diff --git a/content/06.discussion.md b/content/06.discussion.md
index 65159052..76248872 100644
--- a/content/06.discussion.md
+++ b/content/06.discussion.md
@@ -3,7 +3,7 @@
 Despite the disparate types of data and scientific goals in the learning tasks covered above, several challenges are broadly important for deep learning in the biomedical domain.
 Here we examine these factors that may impede further progress, ask what steps have already been taken to overcome them, and suggest future research directions.
 
-### Preventing overfitting via hyperparameter tuning
+### Preventing overfitting and hyperparameter tuning
 
 Overfitting is one of the most common problems in machine learning which all practioners must learn to grapple with.
 In the classical theory of statistical learning and model fitting, there is a trade-off between "bias and variance" [@url:http://www.deeplearningbook.org/].
@@ -17,7 +17,8 @@ This phenomena, known as "double descent" indicates that deep neural networks ac
 
 To optimize neural networks, hyperparaters must be tuned to yield the network with the best test error.
 This is computationally expensive and often not done, however it is important to do when making claims about the superiority of one machine learning method vs. another.
-Several examples have now been uncovered where a new method said to be superior to a baseline method (like an LSTM) after sufficient hyperparameter tuning [@tag:Sculley2018].
+Several examples have now been uncovered where a new method was said to be superior to a baseline method (like an LSTM or vanilla CNN) but later it was found that the difference went away after sufficient hyperparameter tuning [@tag:Sculley2018].
+A related practice which should be more widely adopted is to perform "ablation studies", where parts of a network are removed and the network is retrained, as this helps with understanding the importance of different components, including any novel ones [@tag:Sculley2018].
 
 #### Evaluation metrics for imbalanced classification
 
diff --git a/content/citation-tags.tsv b/content/citation-tags.tsv
index 74e7e90b..502511a6 100644
--- a/content/citation-tags.tsv
+++ b/content/citation-tags.tsv
@@ -68,7 +68,7 @@ Edwards2015_growing_pains	doi:10.1145/2771283
 Ehran2009_visualizing	url:http://www.iro.umontreal.ca/~lisa/publications2/index.php/publications/show/247
 Elephas	url:https://github.com/maxpumperla/elephas
 Elton_molecular_design_review	doi:10.1039/C9ME00039A
-Elton2020 arxiv:2002.05149
+Elton2020	arxiv:2002.05149
 Errington2014_reproducibility	doi:10.7554/eLife.04333
 Eser2016_fiddle	doi:10.1101/081380
 Esfahani2016_melanoma	doi:10.1109/EMBC.2016.7590963
@@ -196,7 +196,7 @@ Mrzelj	url:https://repozitorij.uni-lj.si/IzpisGradiva.php?id=85515
 matis	doi:10.1016/S0097-8485(96)80015-5
 nbc	doi:10.1093/bioinformatics/btq619
 Murdoch2017_automatic	arxiv:1702.02540
-Murdoch2019 doi:10.1073/pnas.1900654116
+Murdoch2019	doi:10.1073/pnas.1900654116
 Nazor2012	doi:10.1016/j.stem.2012.02.013
 Nemati2016_rl	doi:10.1109/EMBC.2016.7591355
 Ni2018	doi:10.1101/385849
@@ -239,7 +239,7 @@ Rogers2010_fingerprints	doi:10.1021/ci100050t
 Roth2015_view_agg_cad	doi:10.1109/TMI.2015.2482920
 Romero2017_diet	url:https://openreview.net/pdf?id=Sk-oDY9ge
 Rosenberg2015_synthetic_seqs	doi:10.1016/j.cell.2015.09.054
-Rudin2019 doi:10.1038/s42256-019-0048-x
+Rudin2019	doi:10.1038/s42256-019-0048-x
 Russakovsky2015_imagenet	doi:10.1007/s11263-015-0816-y
 Sa2015_buckwild	pmcid:PMC4907892
 Salas2018_GR	doi:10.1101/gr.233213.117
@@ -248,7 +248,7 @@ Salzberg	doi:10.1186/1471-2105-11-544
 Schatz2010_dna_cloud	doi:10.1038/nbt0710-691
 Schmidhuber2014_dnn_overview	doi:10.1016/j.neunet.2014.09.003
 Scotti2016_missplicing	doi:10.1038/nrg.2015.3
-Sculley2018 url:https://openreview.net/pdf?id=rJWF0Fywf
+Sculley2018	url:https://openreview.net/pdf?id=rJWF0Fywf
 Segata	doi:10.1371/journal.pcbi.1004977
 Segler2017_drug_design	arxiv:1701.01329
 Seide2014_parallel	doi:10.1109/ICASSP.2014.6853593
@@ -258,7 +258,7 @@ Serden	doi:10.1016/S0168-8510(02)00208-7
 Shaham2016_batch_effects	doi:10.1093/bioinformatics/btx196
 Shapely	doi:10.1515/9781400881970-018
 Shen2017_medimg_review	doi:10.1146/annurev-bioeng-071516-044442
-Shen2019 doi:10.1016/j.eswa.2019.01.048
+Shen2019	doi:10.1016/j.eswa.2019.01.048
 Shin2016_cad_tl	doi:10.1109/TMI.2016.2528162
 Shrikumar2017_learning	arxiv:1704.02685
 Shrikumar2017_reversecomplement	doi:10.1101/103663
@@ -281,7 +281,7 @@ Su2015_gpu	arxiv:1507.01239
 Subramanian2016_bace1	doi:10.1021/acs.jcim.6b00290
 Sun2016_ensemble	arxiv:1606.00575
 Sundararajan2017_axiomatic	arxiv:1703.01365
-Sumita2018 doi:10.1021/acscentsci.8b00213
+Sumita2018	doi:10.1021/acscentsci.8b00213
 Sutskever	arxiv:1409.3215
 Swamidass2009_irv	doi:10.1021/ci8004379
 Tan2014_psb	doi:10.1142/9789814644730_0014
@@ -297,7 +297,7 @@ Torracinta2016_sim	doi:10.1101/079087
 Tu1996_anns	doi:10.1016/S0895-4356(96)00002-9
 Unterthiner2014_screening	url:http://www.bioinf.at/publications/2014/NIPS2014a.pdf
 Vanhoucke2011_cpu	url:https://research.google.com/pubs/pub37631.html
-Vamathevan2019 doi:10.1038/s41573-019-0024-5
+Vamathevan2019	doi:10.1038/s41573-019-0024-5
 Vera2016_sc_analysis	doi:10.1146/annurev-genet-120215-034854
 Vervier	doi:10.1093/bioinformatics/btv683
 Wallach2015_atom_net	arxiv:1510.02855
diff --git a/content/manual-references.json b/content/manual-references.json
index 22d98717..bc7bb13b 100644
--- a/content/manual-references.json
+++ b/content/manual-references.json
@@ -52,6 +52,47 @@
    ]
   }
  },
+ {
+  "id": "url:https://openreview.net/pdf?id=rJWF0Fywf",
+  "type": "article-journal",
+  "title": "Winner's Curse? On Pace, Progress, and Empirical Rigor ...",
+  "container-title": "International Conference on Learning Representations 2018",
+  "URL": "https://openreview.net/pdf?id=rJWF0Fywf",
+  "author": [
+   {
+    "family": "Sculley",
+    "given": "D."
+   },
+   {
+    "family": "Snoek",
+    "given": "Jasper"
+   },
+   {
+    "family": "Rahimi",
+    "given": "Ali"
+   },
+   {
+    "family": "Wiltschko",
+    "given": "Alex"
+   }
+  ],
+  "issued": {
+   "date-parts": [
+    [
+     "2018"
+    ]
+   ]
+  },
+  "accessed": {
+   "date-parts": [
+    [
+     "2020",
+     2,
+     14
+    ]
+   ]
+  }
+ },
  {
   "id": "url:https://repozitorij.uni-lj.si/IzpisGradiva.php?id=85515",
   "type": "report",