diff --git a/joss-paper/paper.bib b/joss-paper/paper.bib
index 554a08e..1c157e7 100644
--- a/joss-paper/paper.bib
+++ b/joss-paper/paper.bib
@@ -33,7 +33,7 @@ @book{zheng2018feature
   title={Feature engineering for machine learning: principles and techniques for data scientists},
   author={Zheng, Alice and Casari, Amanda},
   year={2018},
-  publisher={" O'Reilly Media, Inc."}
+  publisher={O'Reilly Media, Inc.}
 }
 
 @inproceedings{anderson2013brainwash,
@@ -54,17 +54,18 @@ @inproceedings{kanter2015deep
 }
 
 @inproceedings{nargesian2017learning,
-  title={Learning Feature Engineering for Classification.},
-  author={Nargesian, Fatemeh and Samulowitz, Horst and Khurana, Udayan and Khalil, Elias B and Turaga, Deepak S},
-  booktitle={IJCAI},
-  pages={2529--2535},
-  year={2017},
-  doi={10.24963/ijcai.2017/352}
+  doi = {10.24963/ijcai.2017/352},
+  year = {2017},
+  month = aug,
+  publisher = {International Joint Conferences on Artificial Intelligence Organization},
+  author = {Fatemeh Nargesian and Horst Samulowitz and Udayan Khurana and Elias B. Khalil and Deepak Turaga},
+  title = {Learning Feature Engineering for Classification},
+  booktitle = {Proceedings of the Twenty-Sixth International Joint Conference on Artificial Intelligence}
 }
 
 @article{ArchibaldRichardK2020Caas,
 issn = {1600-5767},
-journal = {Journal of applied crystallography},
+journal = {Journal of Applied Crystallography},
 pages = {326--334},
 volume = {53},
 publisher = {International Union of Crystallography},
diff --git a/joss-paper/paper.md b/joss-paper/paper.md
index 56a99f6..6c9d854 100644
--- a/joss-paper/paper.md
+++ b/joss-paper/paper.md
@@ -51,9 +51,9 @@ bibliography: paper.bib
 
 High Throughput Experimentation (HTE) and High Throughput Testing (HTT) have exponentially increased the volume of experimental data available to scientists. One of the major bottlenecks in their implementation is the data analysis. The need for autonomous binning and classification has seen an increase in the employment of machine learning approaches in discovery of catalysts, energy materials and process parameters for design of experiment [@williams2019enabling; @becker2019low]. However, these solutions rely on specific sets of hyperparameters for their machine learning models to achieve the desired purpose. Furthermore, numerical data from experimental characterization of materials carries diversity in both features and magnitude. These features are traditionally extracted using deterministic models based on empirical relationships between variables of the process under investigation. As an example, X-ray diffraction (XRD) data is easier to characterize in linear form as compared to small angle X-ray scattering data, which requires transformation of axis to log-log scale.
 
-One of the most widely applied strategy to enhance the performance of machine learning model is Combined Automatic Machine Learning (AutoML) for CASH (Combined Alogrithm Selection and Hyperparameter Optimization) [@hutter2019automated]. However, these packages are only limited to hyper-parameter tuning and data features remain untouched. To improve the effectiveness of machine learning models, some of the popular feature engineering strategies used for simple numerical data include binning, binarization, normalization, Box-Cox Transformations and Quantile Sketch Array (QSA) [@zheng2018feature][@nargesian2017learning]. Moreover, Deep Feature Synthesis has also shown promising results. Here features are generated from relational databases by performing multi-layer mathematical transformation operations [@kanter2015deep].
+One of the most widely applied strategy to enhance the performance of machine learning model is Combined Automatic Machine Learning (AutoML) for CASH (Combined Alogrithm Selection and Hyperparameter Optimization) [@hutter2019automated]. However, these packages are only limited to hyper-parameter tuning and data features remain untouched. To improve the effectiveness of machine learning models, some of the popular feature engineering strategies used for simple numerical data include binning, binarization, normalization, Box-Cox Transformations and Quantile Sketch Array (QSA) [@zheng2018feature; @nargesian2017learning]. Moreover, Deep Feature Synthesis has also shown promising results. Here features are generated from relational databases by performing multi-layer mathematical transformation operations [@kanter2015deep].
 
-`HARDy` presents an infrastructure which aids in the identification of the best combination of numerical and visual transformations to improve data classification through Convolutional Neural Networks (CNN). `HARDy` exploits the difference between human-readable images of experimental data (i.e. Cartesian representation) and computer-readable plots, which maximizes the data density presented to an algorithm and reduce superfluous information. `HARDy` uses configuration files, fed to the open-source package `KerasTuner`, removing the need for the user to manually generate unique parameters combinations for each neural network model to be investigated.
+`HARDy` presents an infrastructure which aids in the identification of the best combination of numerical and visual transformations to improve data classification through Convolutional Neural Networks (CNN). `HARDy` exploits the difference between human-readable images of experimental data (i.e., Cartesian representation) and computer-readable plots, which maximizes the data density presented to an algorithm and reduce superfluous information. `HARDy` uses configuration files, fed to the open-source package `KerasTuner`, removing the need for the user to manually generate unique parameters combinations for each neural network model to be investigated.