phoible · ltxom · Apr 2, 2021 · Apr 3, 2021 · Apr 5, 2021 · Apr 7, 2021
diff --git a/.gitignore b/.gitignore
@@ -1,2 +1,4 @@
+.idea
+.DS_Store
 _site/
 .sass-cache/
diff --git a/README.md b/README.md
@@ -3,3 +3,8 @@ This is the development repository for the website http://phoible.github.io/, wh
 
 ## About PHOIBLE
 PHOIBLE is a database of phonological inventories and distinctive features, encompassing more than 1600 languages (and growing). PHOIBLE data is published in browsable form online at [PHOIBLE Online](http://phoible.org), which corresponds with the most recent year-numbered [release](https://github.com/phoible/phoible/releases) of the [development repository](https://github.com/phoible/phoible).
+
+### Convert Conventions and FAQ source files to HTML
+- Install package BS4 `pip install bs4`.
+- Use `Sys.getenv("RSTUDIO_PANDOC")` in RStudio to find the RSTUDIO_PANDOC path. Edit the path in `convertMdToHTML.py RSTUDIO_PANDOC = "YOUR_PATH"`.
+- Run `python convertMdToHTML.py`.
diff --git a/_faq.Rmd b/_faq.Rmd
@@ -9,6 +9,9 @@ output:
     preserve_yaml: true
     toc: true
     toc_depth: 2
+  html_document:
+    toc: true
+    theme: default
 csl: bib/phoible.csl
 ---
 

diff --git a/convertMdToHTML.py b/convertMdToHTML.py
@@ -0,0 +1,123 @@
+import os
+from os import path
+import bs4
+import re
+
+# Edit the following path
+RSTUDIO_PANDOC = '/Applications/RStudio.app/Contents/MacOS/pandoc'
+
+
+def fix_FAQ(file_path, output_path):
+    div_content = None
+    with open(file_path) as f:
+        soup = bs4.BeautifulSoup(f.read(), 'html.parser')
+        # beautify tables
+        tables = soup.find_all('table')
+        ths = soup.find_all('th')
+        for table in tables:
+            table['cellpadding'] = '0'
+            table['cellspacing'] = '0'
+            table['border'] = '0'
+            table['class'] = 'table table-bordered order-column compact stripe dataTable no-footer table-nonfluid'
+            table['role'] = 'grid'
+        for th in ths:
+            th['role'] = 'row'
+
+        ps = soup.find_all('p')
+        # fix blockquotes
+        for p in ps:
+            if '&gt;' in str(p):
+                p_temp = str(p).split('&gt;')
+                p.clear()
+                p.string = p_temp[0].replace('<p>','')
+                blockquote = soup.new_tag('blockquote')
+                blockquote.append(bs4.BeautifulSoup(p_temp[1][0: len(p_temp) - 6], 'html.parser'))
+                p.append(blockquote)
+        # fix <em> Spacing
+        ems = soup.find_all('em')
+        for em in ems:
+            temp = str(em).replace('<em>', '').replace('</em>', '').strip()
+            em.clear()
+            em.append(bs4.BeautifulSoup(temp, 'html.parser'))
+        # fix references
+        references_div = soup.find('div', {'class':'references'})
+        if references_div is not None:
+            references_ps = references_div.find_all('p')
+            for p in references_ps:
+                # fix url
+                if 'Online: urlhttp' in str(p):
+                    p_temp = p.get_text().split('Online: urlhttp')
+                    p.string = p_temp[0]
+                    a = soup.new_tag('a')
+                    a.string = 'http' + p_temp[1]
+                    a['href'] = a.string
+                    p.append(a)
+                # fix spacing
+                if ' ,' in str(p):
+                    p.string = re.sub(r' +,', ',', p.get_text())
+                if ' .' in str(p):
+                    p.string = re.sub(r' +.', '.', p.get_text())
+                if p.get_text().endswith(':'):
+                    p.string = p.get_text()[0 : len(p.get_text()) - 1] + '.'
+        # fix titles size
+        for level in list(range(5, 0, -1)):
+            tags = soup.find_all(f'h{level}')
+            for tag in tags:
+                tag.name = f'h{level + 1}'
+
+        div_content = soup.find('div', {'class': 'container-fluid main-container'})
+        with open(output_path, 'w') as file:
+            file.write(str(div_content))
+            # write scripts
+            with open('scripts.js') as f2:
+                file.write('\n')
+                file.write(f2.read())
+
+
+def fix_conventions(file_path, output_path):
+    div_content = None
+    with open(file_path) as f:
+        soup = bs4.BeautifulSoup(f.read(), 'html.parser')
+        # beautify tables
+        tables = soup.find_all('table')
+        ths = soup.find_all('th')
+        for table in tables:
+            table['cellpadding'] = '0'
+            table['cellspacing'] = '0'
+            table['border'] = '0'
+            table['class'] = 'table table-bordered order-column compact stripe dataTable no-footer table-nonfluid'
+            table['role'] = 'grid'
+        for th in ths:
+            th['role'] = 'row'
+        tbodys = soup.find_all('tbody')
+        for tbody in tbodys:
+            counter = 1
+            for tr in tbody.find_all('tr'):
+                if counter % 2 == 0:
+                    tr['class'] = 'even'
+                else:
+                    tr['class'] = 'odd'
+                counter += 1
+        # fix titles size
+        for level in list(range(5, 0, -1)):
+            tags = soup.find_all(f'h{level}')
+            for tag in tags:
+                tag.name = f'h{level + 1}'
+
+        with open(output_path, 'w') as file:
+            file.write(str(soup))
+
+
+def main():
+    print('Start kniting Rmd to HTML...')
+    print('File: _faq.Rmd')
+    os.system('Rscript --vanilla knitRmdToHTML.R _faq.Rmd ' + RSTUDIO_PANDOC)
+    fix_FAQ('_faq.html', 'faq_with_indexes.html')
+    print('File: conventions.rst')
+    os.system('rst2html5 conventions.rst conventions.html')
+    fix_conventions('conventions.html', 'conventions.html')
+    print('Converted! Output files: \033[94m faq_with_indexes.html conventions.html')
+
+
+if __name__ == '__main__':
+    main()
diff --git a/knitRmdToHTML.R b/knitRmdToHTML.R
@@ -0,0 +1,10 @@
+#!/usr/bin/env Rscript
+args = commandArgs(trailingOnly=TRUE)
+library(rmarkdown)
+
+if (length(args) < 2) {
+  stop("Please provide arguments of (1)input .Rmd file (2)RStudio pandoc path.", call.=FALSE)
+}
+Sys.setenv(RSTUDIO_PANDOC = args[2])
+
+render(args[1], 'html_document')
diff --git a/scripts.js b/scripts.js