diff --git a/.gitattributes b/.gitattributes index a236965b7f4a..a6aaa9577cb4 100644 --- a/.gitattributes +++ b/.gitattributes @@ -1 +1,3 @@ test/fb2/reader/* -text +pandoc-lua-engine/test/*.custom -text +pandoc-lua-engine/test/*.txt -text diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md index 6162c2e57bba..8eb77bc46568 100644 --- a/.github/ISSUE_TEMPLATE/bug_report.md +++ b/.github/ISSUE_TEMPLATE/bug_report.md @@ -6,20 +6,18 @@ labels: 'bug' assignees: '' --- - **Explain the problem.** -Include the exact command line you used and all inputs necessary to reproduce the issue. Please create as minimal an example as possible, to help the maintainers isolate the problem. Explain the output you received and how it differs from what you expected. +Include the **exact command line** you used and **all inputs necessary to reproduce the issue**. Please create as minimal an example as possible, to help the maintainers isolate the problem. Explain the output you received and how it differs from what you expected. **Pandoc version?** -What version of pandoc are you using, on what OS? - +What version of pandoc are you using, on what OS? (If it's not the latest release, please try with the latest release before reporting the issue.) diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml new file mode 100644 index 000000000000..8b01d406f41e --- /dev/null +++ b/.github/workflows/benchmark.yml @@ -0,0 +1,58 @@ +name: benchmarks + +on: workflow_dispatch + +permissions: + contents: read + +jobs: + benchmark: + + runs-on: ubuntu-latest + strategy: + fail-fast: true + matrix: + versions: + - ghc: '8.10.7' + cabal: '3.2' + - ghc: '9.2.2' + cabal: '3.6' + steps: + - uses: actions/checkout@v3 + + - name: Install cabal/ghc + run: | + ghcup install ghc --set ${{ matrix.versions.ghc }} + ghcup install cabal ${{ matrix.versions.cabal }} + + # declare/restore cached things + + - name: Cache cabal global package db + id: cabal-global + uses: actions/cache@v3 + with: + path: | + ~/.cabal + key: benchmark-${{ runner.os }}-${{ matrix.versions.ghc }}-${{ matrix.versions.cabal }}-cabal-global-${{ secrets.CACHE_VERSION }} + + - name: Cache cabal work + id: cabal-local + uses: actions/cache@v3 + with: + path: | + dist-newstyle + key: benchmark-${{ runner.os }}-${{ matrix.versions.ghc }}-${{ matrix.versions.cabal }}-cabal-local-${{ secrets.CACHE_VERSION }} + + - name: Install dependencies + run: | + v2=$([ "${{ matrix.versions.cabal }}" = "2.2" ] && printf 'new' || printf 'v2') + cabal $v2-update + cabal $v2-build --dependencies-only --enable-optimization=1 --enable-benchmarks --disable-tests + + - name: Build and test + run: | + v2=$([ "${{ matrix.versions.cabal }}" = "2.2" ] && printf 'new' || printf 'v2') + cabal $v2-build --enable-optimization=1 --enable-benchmarks --disable-tests 2>&1 | tee build.log + # fail if warnings in local build + ! grep -q ": *[Ww]arning:" build.log || exit 1 + cabal $v2-bench --enable-optimization=1 --benchmark-options='--timeout=6 +RTS -T -RTS' diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index fe65baa6e2fc..31a33f7e5c58 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -6,14 +6,14 @@ on: - '*' - '!rc/*' paths-ignore: - - 'doc/**' + - 'doc/*.md' - 'MANUAL.txt' - '*.md' - 'RELEASE_CHECKLIST' - 'BUGS' - - 'changelog' - 'README.template' - - 'appveyor.yml' + - 'hie.yaml' + - '*.nix' - 'tools/**' - 'linux/**' - 'macos/**' @@ -21,14 +21,14 @@ on: - 'man/**' pull_request: paths-ignore: - - 'doc/**' + - 'doc/*.md' - 'MANUAL.txt' - '*.md' - 'RELEASE_CHECKLIST' - 'BUGS' - - 'changelog' - 'README.template' - - 'appveyor.yml' + - 'hie.yaml' + - '*.nix' - 'tools/**' - 'linux/**' - 'macos/**' @@ -52,7 +52,7 @@ jobs: testopts: '--test-option=--hide-successes --test-option=--ansi-tricks=false' - ghc: '8.8.4' cabal: '3.2' - cabalopts: '' + cabalopts: '-f-lua -f-server --enable-benchmarks' testopts: '--test-option=--hide-successes --test-option=--ansi-tricks=false' - ghc: '8.10.7' cabal: '3.2' @@ -66,6 +66,15 @@ jobs: cabal: '3.6' cabalopts: '' testopts: '--test-option=--hide-successes --test-option=--ansi-tricks=false' + - ghc: '9.4.2' + cabal: '3.8' + cabalopts: '' + testopts: '--test-option=--hide-successes --test-option=--ansi-tricks=false' + + - ghc: '9.4.2' + cabal: '3.8' + cabalopts: '--allow-newer --constrain "mtl >= 2.3.1"' + testopts: '--test-option=--hide-successes --test-option=--ansi-tricks=false' steps: - uses: actions/checkout@v3 @@ -79,8 +88,6 @@ jobs: ghcup install cabal --set ${{ matrix.versions.cabal }} # declare/restore cached things - # caching doesn't work for scheduled runs yet - # https://github.com/actions/cache/issues/63 - name: Cache cabal global package db id: cabal-global @@ -88,7 +95,7 @@ jobs: with: path: | ~/.cabal - key: ${{ runner.os }}-${{ matrix.versions.ghc }}-${{ matrix.versions.cabal }}-cabal-global-${{ hashFiles('cabal.project') }} + key: ${{ runner.os }}-${{ matrix.versions.ghc }}-${{ matrix.versions.cabal }}-cabal-global-${{ secrets.CACHE_VERSION }} - name: Cache cabal work id: cabal-local @@ -96,19 +103,19 @@ jobs: with: path: | dist-newstyle - key: ${{ runner.os }}-${{ matrix.versions.ghc }}-${{ matrix.versions.cabal }}-cabal-local + key: ${{ runner.os }}-${{ matrix.versions.ghc }}-${{ matrix.versions.cabal }}-cabal-local-${{ secrets.CACHE_VERSION }} - name: Install dependencies run: | cabal update - cabal build ${{ matrix.versions.cabalopts }} --dependencies-only --enable-tests --disable-optimization + cabal build all ${{ matrix.versions.cabalopts }} --dependencies-only --enable-tests --disable-optimization - name: Build and test run: | cabal build ${{ matrix.versions.cabalopts }} --enable-tests --disable-optimization 2>&1 | tee build.log # fail if warnings in local build - ! grep -q ": *[Ww]arning:" build.log || exit 1 - cabal test ${{ matrix.versions.cabalopts }} --disable-optimization ${{ matrix.versions.testopts }} + ! grep -q ": *[Ww]arning:\|: *error:" build.log || exit 1 + cabal test all ${{ matrix.versions.cabalopts }} --disable-optimization ${{ matrix.versions.testopts }} windows: @@ -118,8 +125,6 @@ jobs: - uses: actions/checkout@v3 # declare/restore cached things - # caching doesn't work for scheduled runs yet - # https://github.com/actions/cache/issues/63 - name: Cache stack global package db id: stack-global-package-db @@ -130,12 +135,13 @@ jobs: key: ${{ runner.os }}-appdata-roaming-stack-${{ hashFiles('stack.yaml') }} # stack's local package dbs for the project and each package - # - name: Cache .stack-work - # uses: actions/cache@v1 - # with: - # path: .stack-work - # key: ${{ runner.os }}-stack-work-${{ hashFiles('stack.yaml') }} - # restore-keys: ${{ runner.os }}-stack-work + - name: Cache .stack-work + uses: actions/cache@v1 + with: + path: | + .stack-work + key: ${{ runner.os }}-stack-work-${{ hashFiles('stack.yaml') }}-${{ secrets.CACHE_VERSION }} + restore-keys: ${{ runner.os }}-stack-work-${{ secrets.CACHE_VERSION }} - name: Install dependencies run: | @@ -147,104 +153,50 @@ jobs: run: | stack test --fast --test-arguments="--hide-successes --ansi-tricks=false" - macos: - - runs-on: macos-11 - strategy: - fail-fast: true - matrix: - versions: - - ghc: '8.8.4' - cabal: '3.2' - - steps: - - uses: actions/checkout@v3 - - - name: Install cabal/ghc - run: | - ghcup install ghc --set ${{ matrix.versions.ghc }} - ghcup install cabal ${{ matrix.versions.cabal }} - - # declare/restore cached things - # caching doesn't work for scheduled runs yet - # https://github.com/actions/cache/issues/63 - - - name: Cache cabal global package db - id: cabal-global - uses: actions/cache@v3 - with: - path: | - ~/.cabal - key: ${{ runner.os }}-${{ matrix.versions.ghc }}-${{ matrix.versions.cabal }}-cabal-global-${{ hashFiles('cabal.project') }} - - - name: Cache cabal work - id: cabal-local - uses: actions/cache@v3 - with: - path: | - dist-newstyle - key: ${{ runner.os }}-${{ matrix.versions.ghc }}-${{ matrix.versions.cabal }}-cabal-local - - - name: Install dependencies - run: | - cabal v2-update - cabal v2-build --dependencies-only --enable-tests --disable-optimization - - name: Build and test - run: | - cabal v2-build --enable-tests --disable-optimization 2>&1 | tee build.log - # fail if warnings in local build - ! grep -q ": *[Ww]arning:" build.log || exit 1 - cabal v2-test --disable-optimization --test-option=--hide-successes --test-option=--ansi-tricks=false - - benchmark: - - runs-on: ubuntu-18.04 - strategy: - fail-fast: true - matrix: - versions: - - ghc: '8.10.7' - cabal: '3.2' - - ghc: '9.2.2' - cabal: '3.6' - steps: - - uses: actions/checkout@v3 - - - name: Install cabal/ghc - run: | - ghcup install ghc --set ${{ matrix.versions.ghc }} - ghcup install cabal ${{ matrix.versions.cabal }} - - # declare/restore cached things - # caching doesn't work for scheduled runs yet - # https://github.com/actions/cache/issues/63 - - - name: Cache cabal global package db - id: cabal-global - uses: actions/cache@v3 - with: - path: | - ~/.cabal - key: benchmark-${{ runner.os }}-${{ matrix.versions.ghc }}-${{ matrix.versions.cabal }}-cabal-global-${{ hashFiles('cabal.project') }} - - - name: Cache cabal work - id: cabal-local - uses: actions/cache@v3 - with: - path: | - dist-newstyle - key: benchmark-${{ runner.os }}-${{ matrix.versions.ghc }}-${{ matrix.versions.cabal }}-cabal-local - - - name: Install dependencies - run: | - v2=$([ "${{ matrix.versions.cabal }}" = "2.2" ] && printf 'new' || printf 'v2') - cabal $v2-update - cabal $v2-build --dependencies-only --enable-optimization=1 --enable-benchmarks --disable-tests - - - name: Build and test - run: | - v2=$([ "${{ matrix.versions.cabal }}" = "2.2" ] && printf 'new' || printf 'v2') - cabal $v2-build --enable-optimization=1 --enable-benchmarks --disable-tests 2>&1 | tee build.log - # fail if warnings in local build - ! grep -q ": *[Ww]arning:" build.log || exit 1 - cabal $v2-bench --enable-optimization=1 --benchmark-options='--timeout=6 +RTS -T -RTS' +# We no longer run the macos tests, to make CI faster. +# macos: + +# runs-on: macos-11 +# strategy: +# fail-fast: true +# matrix: +# versions: +# - ghc: '8.8.4' +# cabal: '3.2' + +# steps: +# - uses: actions/checkout@v3 + +# - name: Install cabal/ghc +# run: | +# ghcup install ghc --set ${{ matrix.versions.ghc }} +# ghcup install cabal ${{ matrix.versions.cabal }} + +# # declare/restore cached things + +# - name: Cache cabal global package db +# id: cabal-global +# uses: actions/cache@v3 +# with: +# path: | +# ~/.cabal +# key: ${{ runner.os }}-${{ matrix.versions.ghc }}-${{ matrix.versions.cabal }}-cabal-global-${{ secrets.CACHE_VERSION }} + +# - name: Cache cabal work +# id: cabal-local +# uses: actions/cache@v3 +# with: +# path: | +# dist-newstyle +# key: ${{ runner.os }}-${{ matrix.versions.ghc }}-${{ matrix.versions.cabal }}-cabal-local-${{ secrets.CACHE_VERSION }} + +# - name: Install dependencies +# run: | +# cabal v2-update +# cabal v2-build --dependencies-only --enable-tests --disable-optimization +# - name: Build and test +# run: | +# cabal v2-build --enable-tests --disable-optimization 2>&1 | tee build.log +# # fail if warnings in local build +# ! grep -q ": *[Ww]arning:" build.log || exit 1 +# cabal v2-test --disable-optimization --test-option=--hide-successes --test-option=--ansi-tricks=false diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml index cff15fc69b40..7a4906107cdc 100644 --- a/.github/workflows/nightly.yml +++ b/.github/workflows/nightly.yml @@ -1,6 +1,7 @@ name: Nightly on: + workflow_dispatch: schedule: - cron: '53 7 * * *' @@ -14,29 +15,21 @@ jobs: steps: - uses: actions/checkout@v3 - - name: Install cabal/ghc - run: | - ghcup install ghc --set 8.10.7 - ghcup install cabal --set 3.2 - - name: Install dependencies run: | ghc --version cabal --version - cabal v2-update - cabal v2-build --dependencies-only - - name: Build - run: | - cabal v2-install -fnightly - strip $HOME/.cabal/bin/pandoc - - name: Install artifact + cabal update + cabal build --dependencies-only -fembed_data_files -fnightly pandoc-cli + - name: Build and install artifact run: | export ARTIFACTS=nightly-linux/pandoc-nightly-linux-$(date +%Y-%m-%d) mkdir -p ${ARTIFACTS} - cp $HOME/.cabal/bin/pandoc ${ARTIFACTS}/ + cabal install --installdir="${ARTIFACTS}" --install-method=copy -fembed_data_files -fnightly pandoc-cli + strip "${ARTIFACTS}/pandoc" cp COPYRIGHT ${ARTIFACTS}/ echo "Built from ${GITHUB_SHA}" > ${ARTIFACTS}/README.nightly.txt - - uses: actions/upload-artifact@master + - uses: actions/upload-artifact@v3 with: name: nightly-linux path: nightly-linux @@ -49,20 +42,20 @@ jobs: - uses: actions/checkout@v3 - name: Install dependencies run: | - stack update - stack build --dependencies-only pandoc + cabal update + cabal build --dependencies-only -fembed_data_files -fnightly pandoc-cli - name: Build artifacts shell: cmd run: | for /f %%a in ('powershell -Command "Get-Date -format yyyy-MM-dd"') do set THEDATE=%%a set ARTIFACTS=%CD%\nightly-windows\pandoc-nightly-windows-%THEDATE% mkdir %ARTIFACTS% - stack build --flag pandoc:nightly --flag pandoc:embed_data_files pandoc - forfiles /P .\.stack-work\install /M pandoc*.exe /S /C "cmd /C copy @path %%ARTIFACTS%%" + cabal install --install-method=copy --installdir=%ARTIFACTS% -fnightly -fembed_data_files pandoc-cli + strip %ARTIFACTS%/pandoc copy COPYRIGHT %ARTIFACTS% ren %ARTIFACTS%\COPYRIGHT COPYRIGHT.txt echo Built from %GITHUB_SHA% > %ARTIFACTS%\README.nightly.txt - - uses: actions/upload-artifact@master + - uses: actions/upload-artifact@v3 with: name: nightly-windows path: nightly-windows @@ -75,19 +68,17 @@ jobs: - uses: actions/checkout@v3 - name: Install dependencies run: | - stack update - stack build --dependencies-only pandoc + cabal update + cabal build --dependencies-only -fnightly -fembed_data_files pandoc-cli - name: Build artifacts run: | export ARTIFACTS=nightly-macos/pandoc-nightly-macos-$(date +%Y-%m-%d) mkdir -p ${ARTIFACTS} - stack build --flag pandoc:nightly --flag pandoc:embed_data_files pandoc - for f in $(find .stack-work/install -name 'pandoc*' -perm +001 -type f); do cp $f ${ARTIFACTS}/; done - mv ${ARTIFACTS}/pandoc ${ARTIFACTS}/pandoc + cabal install --install-method=copy --installdir="${ARTIFACTS}" -fnightly -fembed_data_files pandoc-cli + strip "${ARTIFACTS}/pandoc" cp COPYRIGHT ${ARTIFACTS}/ echo "Built from ${GITHUB_SHA}" > ${ARTIFACTS}/README.nightly.txt - - uses: actions/upload-artifact@master + - uses: actions/upload-artifact@v3 with: name: nightly-macos path: nightly-macos - diff --git a/.github/workflows/release-candidate.yml b/.github/workflows/release-candidate.yml index 859c937b07c0..26e297fe2011 100644 --- a/.github/workflows/release-candidate.yml +++ b/.github/workflows/release-candidate.yml @@ -1,9 +1,6 @@ name: Release candidate -on: - push: - branches: - - 'rc/**' +on: workflow_dispatch permissions: contents: read @@ -20,7 +17,7 @@ jobs: mkdir linux-release-candidate cp linux/artifacts/*.deb linux-release-candidate/ cp linux/artifacts/*.tar.gz linux-release-candidate/ - - uses: actions/upload-artifact@v2 + - uses: actions/upload-artifact@v3 with: name: linux-release-candidate path: linux-release-candidate @@ -34,7 +31,7 @@ jobs: versions: - osarch: "windows-x86_64" arch: "x64" - stack_opts: '-j4 +RTS -A256m -RTS' + stack_opts: '-j4 +RTS -A256m -RTS --flag pandoc:embed_data_files --flag pandoc-cli:lua --flag pandoc-cli:server' steps: - uses: actions/checkout@v3 @@ -46,7 +43,7 @@ jobs: shell: cmd run: | for /f %%a in ('powershell -Command "Get-Date -format yyyy-MM-dd"') do set THEDATE=%%a - stack build ${{ matrix.versions.stack_opts }} pandoc + stack build ${{ matrix.versions.stack_opts }} pandoc-cli for /f "tokens=1-2 delims= " %%a in ('stack exec pandoc -- --version') do ( @set VERSION=%%b goto :next @@ -81,7 +78,7 @@ jobs: copy windows\pandoc-%VERSION%-${{ matrix.versions.osarch }}-UNSIGNED.msi windows-release-candidate copy windows\pandoc-%VERSION%-${{ matrix.versions.osarch }}.zip windows-release-candidate copy windows\Makefile windows-release-candidate - - uses: actions/upload-artifact@v2 + - uses: actions/upload-artifact@v3 with: name: windows-release-candidate path: windows-release-candidate @@ -116,13 +113,14 @@ jobs: strip ${DEST}/bin/pandoc cp man/pandoc.1 ${DEST}/share/man/man1/pandoc.1 cp man/pandoc-server.1 ${DEST}/share/man/man1/pandoc-server.1 + cp man/pandoc-lua.1 ${DEST}/share/man/man1/pandoc-lua.1 ~/.local/bin/pandoc -s COPYING.md -Vpagetitle=License -o ${RESOURCES}/license.html chown -R $ME:staff ${ROOT} sed -e "s/PANDOCVERSION/${VERSION}/" macos/distribution.xml.in > ${ARTIFACTS}/distribution.xml cp macos/Makefile ${ARTIFACTS}/ echo ${VERSION} > ${ARTIFACTS}/version.txt - - uses: actions/upload-artifact@v2 + - uses: actions/upload-artifact@v3 with: name: macos-release-candidate path: macos-release-candidate diff --git a/.gitignore b/.gitignore index 5bdcd2d5f86b..93a281a63ba0 100644 --- a/.gitignore +++ b/.gitignore @@ -32,9 +32,11 @@ !changelog.md !default.nix !pandoc.cabal +!hie.yaml !release.nix !shell.nix !stack.yaml +!weeder.dhall !app/** !benchmark/** !citeproc/** @@ -43,10 +45,14 @@ !linux/** !macos/** !man/** +!pandoc-lua-engine/** +!pandoc-server/** +!pandoc-cli/** !src/** !test/** !tools/** !trypandoc/** +!xml-light/** !windows/** *.bkp *.orig diff --git a/.hlint.yaml b/.hlint.yaml index 177aa2e32cea..afc8c6ff1910 100644 --- a/.hlint.yaml +++ b/.hlint.yaml @@ -19,7 +19,7 @@ - ignore: {name: "Use camelCase"} - ignore: {name: "Use fmap"} # specific for GHC 7.8 compat - ignore: {name: "Use isDigit"} - +- ignore: {name: "Use <&>"} - ignore: name: "Monad law, left identity" within: Text.Pandoc.App.OutputSettings diff --git a/INSTALL.md b/INSTALL.md index 88687f034476..6d1f7efbe260 100644 --- a/INSTALL.md +++ b/INSTALL.md @@ -215,8 +215,6 @@ The easiest way to build pandoc from source is to use [stack][stack]: `pandoc` executable into `~/.local/bin`, which you should add to your `PATH`. This process will take a while, and will consume a considerable amount of disk space. - If you also want the `pandoc-server` executable, add - `--flag pandoc:server` to the above command. ### Quick cabal method @@ -238,9 +236,6 @@ The easiest way to build pandoc from source is to use [stack][stack]: on linux/unix/macOS and in `%APPDATA%\cabal\bin` on Windows. Make sure this directory is in your path. - If you also want the `pandoc-server` executable, add - `-fserver` to the above command. - If you want to install a modified or development version of pandoc instead, switch to the source directory and do as above, but without the 'pandoc': @@ -285,14 +280,19 @@ You will need cabal version 2.0 or higher. `FLAGSPEC` is a list of Cabal configuration flags, optionally preceded by a `-` (to force the flag to `false`), and separated - by spaces. Pandoc's flags include: + by spaces. `pandoc`'s flags include: - `embed_data_files`: embed all data files into the binary (default no). This is helpful if you want to create a relocatable binary. - - `lua53`: embed support for Lua 5.3 instead of 5.4. + `pandoc-cli`'s flags include: + + - `lua`: compile in support for Lua filters and custom + writers. - - `server`: build the `pandoc-server` executable. + - `server`: compile in support for running in HTTP server + mode when the executable is renamed (or symlinked as) + `pandoc-server`. 3. Build: diff --git a/MANUAL.txt b/MANUAL.txt index f4e91e6c6d2b..41a12df13bfc 100644 --- a/MANUAL.txt +++ b/MANUAL.txt @@ -536,13 +536,6 @@ header when requesting a document from a URL: where X = NUMBER - 1.* Specify the base level for headings (defaults to 1). -`--strip-empty-paragraphs` - -: *Deprecated. Use the `+empty_paragraphs` extension instead.* - Ignore paragraphs with no content. This option is useful - for converting word processing documents where users have - used empty paragraphs to create inter-paragraph space. - `--indented-code-classes=`*CLASSES* : Specify classes to use for indented code blocks--for example, @@ -564,6 +557,17 @@ header when requesting a document from a URL: footnotes and links will not work across files. Reading binary files (docx, odt, epub) implies `--file-scope`. + If two or more files are processed using `--file-scope`, + prefixes based on the filenames will be added to identifiers + in order to disambiguate them, and internal links will + be adjusted accordingly. For example, a header with + identifier `foo` in `subdir/file1.txt` will have its + identifier changed to `subdir__file1.txt__foo`. + + In addition, a Div with an identifier based on the filename + will be added around the file's content, so that internal + links to the filename will point to this Div's identifier. + `-F` *PROGRAM*, `--filter=`*PROGRAM* : Specify an executable to be used as a filter transforming the @@ -970,7 +974,8 @@ header when requesting a document from a URL: `data-external="1"` will be left alone; the documents they link to will not be incorporated in the document. Limitation: resources that are loaded dynamically through - JavaScript cannot be incorporated; as a result, some + JavaScript cannot be incorporated; as a result, fonts may + be missing when `--mathjax` is used, and some advanced features (e.g. zoom or speaker notes) may not work in an offline "self-contained" `reveal.js` slide show. @@ -1013,9 +1018,9 @@ header when requesting a document from a URL: ATX-style headings are always used for levels 3+. This option also affects Markdown cells in `ipynb` output. -`--atx-headers` +`--list-tables` -: *Deprecated synonym for `--markdown-headings=atx`.* +: Render tables as list tables in RST output. `--top-level-division=default`|`section`|`chapter`|`part` @@ -1107,6 +1112,10 @@ header when requesting a document from a URL: : Link to a CSS style sheet. This option can be used repeatedly to include multiple files. They will be included in the order specified. + This option only affects HTML (including HTML slide shows) + and EPUB output. It should be used together with + `-s/--standalone`, because the link to the stylesheet goes + in the document header. A stylesheet is required for generating EPUB. If none is provided using this option (or the `css` or `stylesheet` @@ -1530,11 +1539,11 @@ Nonzero exit codes have the following meanings: 62 PandocShouldNeverHappenError 63 PandocSomeError 64 PandocParseError - 65 PandocParsecError 66 PandocMakePDFError 67 PandocSyntaxMapError 83 PandocFilterError 84 PandocLuaError + 89 PandocNoScriptingEngine 91 PandocMacroLoop 92 PandocUTF8DecodingError 93 PandocIpynbDecodingError @@ -1889,6 +1898,10 @@ To include the built-in citeproc filter, use either `citeproc` or | ``` | ``` | +----------------------------------+-----------------------------------+ | ``` | ``` yaml | +| --list-tables | list-tables: true | +| ``` | ``` | ++----------------------------------+-----------------------------------+ +| ``` | ``` yaml | | --top-level-division chapter | top-level-division: chapter | | ``` | ``` | +----------------------------------+-----------------------------------+ @@ -3075,7 +3088,14 @@ The `--css` option also affects the output. ### Variables for ms `fontfamily` -: font family (e.g. `T` or `P`) +: `A` (Avant Garde), `B` (Bookman), `C` (Helvetica), `HN` (Helvetica + Narrow), `P` (Palatino), or `T` (Times New Roman). This setting does not + affect source code, which is always displayed using monospace Courier. + These built-in fonts are limited in their coverage of characters. + Additional fonts may be installed using the script + [`install-fonts.sh`](https://www.schaffter.ca/mom/bin/install-font.sh) + provided by Peter Schaffter and documented in detail on + [his web site](https://www.schaffter.ca/mom/momdoc/appendices.html#steps). `indent` : paragraph indent (e.g. `2m`) @@ -3145,6 +3165,9 @@ on the output format, and include the following: `curdir` : working directory from which pandoc is run. +`pandoc-version` +: pandoc version. + `toc` : non-null value if `--toc/--table-of-contents` was specified @@ -3173,7 +3196,7 @@ for other formats are covered. Note that markdown extensions added to the `ipynb` format affect Markdown cells in Jupyter notebooks (as do command-line -options like `--atx-headers`). +options like `--markdown-headings`). ## Typography @@ -3756,7 +3779,7 @@ unless the `markdown_strict` format is used, the following does not produce a nested block quote in pandoc: > This is a block quote. - >> Nested. + >> Not nested, since `blank_before_blockquote` is enabled by default ## Verbatim (code) blocks @@ -3857,6 +3880,18 @@ This is equivalent to: qsort [] = [] ``` +This shortcut form may be combined with attributes: + + ```haskell {.numberLines} + qsort [] = [] + ``` + +Which is equivalent to: + + ``` {.haskell .numberLines} + qsort [] = [] + ``` + If the `fenced_code_attributes` extension is disabled, but input contains class attribute(s) for the code block, the first class attribute will be printed after the opening fence as a bare @@ -4231,8 +4266,8 @@ proportionally spaced fonts, as it does not require lining up columns. A caption may optionally be provided with all 4 kinds of tables (as illustrated in the examples below). A caption is a paragraph beginning -with the string `Table:` (or just `:`), which will be stripped off. -It may appear either before or after the table. +with the string `Table:` (or `table:` or just `:`), which will be stripped +off. It may appear either before or after the table. #### Extension: `simple_tables` #### @@ -4344,12 +4379,35 @@ Grid tables look like this: | | | - tasty | +---------------+---------------+--------------------+ -The row of `=`s separates the header from the table body, and can be -omitted for a headerless table. The cells of grid tables may contain -arbitrary block elements (multiple paragraphs, code blocks, lists, -etc.). Cells that span multiple columns or rows are not -supported. Grid tables can be created easily using Emacs' table-mode -(`M-x table-insert`). +The row of `=`s separates the header from the table body, +and can be omitted for a headerless table. The cells of grid +tables may contain arbitrary block elements (multiple +paragraphs, code blocks, lists, etc.). + +Cells can span multiple columns or rows: + + +---------------------+----------+ + | Property | Earth | + +=============+=======+==========+ + | | min | -89.2 °C | + | Temperature +-------+----------+ + | 1961-1990 | mean | 14 °C | + | +-------+----------+ + | | min | 56.7 °C | + +-------------+-------+----------+ + +A table header may contain more than one row: + + +---------------------+-----------------------+ + | Location | Temperature 1961-1990 | + | | in degree Celsius | + | +-------+-------+-------+ + | | min | mean | max | + +=====================+=======+=======+=======+ + | Antarctica | -89.2 | N/A | 19.8 | + +---------------------+-------+-------+-------+ + | Earth | -89.2 | 14 | 56.7 | + +---------------------+-------+-------+-------+ Alignments can be specified as with pipe tables, by putting colons at the boundaries of the separator line after the @@ -4367,18 +4425,23 @@ For headerless tables, the colons go on the top line instead: | Right | Left | Centered | +---------------+---------------+--------------------+ -##### Grid Table Limitations ##### +A table foot can be defined by enclosing it with separator lines +that use `=` instead of `-`: -Pandoc does not support grid tables with row spans or column spans. -This means that neither variable numbers of columns across rows nor -variable numbers of rows across columns are supported by Pandoc. -All grid tables must have the same number of columns in each row, -and the same number of rows in each column. For example, the -Docutils [sample grid tables] will not render as expected with -Pandoc. + +---------------+---------------+ + | Fruit | Price | + +===============+===============+ + | Bananas | $1.34 | + +---------------+---------------+ + | Oranges | $2.10 | + +===============+===============+ + | Sum | $3.44 | + +===============+===============+ -[sample grid tables]: https://docutils.sourceforge.io/docs/ref/rst/restructuredtext.html#grid-tables +The foot must always be placed at the very bottom of the table. +Grid tables can be created easily using Emacs' table-mode +(`M-x table-insert`). #### Extension: `pipe_tables` #### @@ -6466,8 +6529,10 @@ beamer slide shows, and pptx slide shows. ### On all slides (beamer, reveal.js, pptx) With beamer and reveal.js, the configuration option `background-image` can be -used either in the YAML metadata block or as a command-line variable to get the -same image on every slide. +used either in the YAML metadata block or as a command-line variable to get the same image on every slide. + +Note that for reveal.js, the `background-image` will be used as +a `parallaxBackgroundImage` (see below). For pptx, you can use a [reference doc](#option--reference-doc) in which background images have been set on the [relevant @@ -6476,11 +6541,12 @@ layouts](#powerpoint-layout-choice). #### `parallaxBackgroundImage` (reveal.js) For reveal.js, there is also the reveal.js-native option -`parallaxBackgroundImage`, which can be used instead of `background-image` to -produce a parallax scrolling background. You must also set -`parallaxBackgroundSize`, and can optionally set `parallaxBackgroundHorizontal` -and `parallaxBackgroundVertical` to configure the scrolling behaviour. See the -[reveal.js documentation](https://revealjs.com/backgrounds/#parallax-background) +`parallaxBackgroundImage`, which produces a parallax scrolling background. +You must also set `parallaxBackgroundSize`, and can optionally set +`parallaxBackgroundHorizontal` +and `parallaxBackgroundVertical` to configure the scrolling behaviour. +See the [reveal.js +documentation](https://revealjs.com/backgrounds/#parallax-background) for more details about the meaning of these options. In reveal.js's overview mode, the parallaxBackgroundImage will show up @@ -6852,8 +6918,8 @@ translating between Markdown and ipynb notebooks. Note that options and extensions that affect reading and writing of Markdown will also affect Markdown cells in ipynb notebooks. For example, `--wrap=preserve` will preserve -soft line breaks in Markdown cells; `--atx-headers` will -cause ATX-style headings to be used; and `--preserve-tabs` will +soft line breaks in Markdown cells; `--markdown-headings=setext` will +cause Setext-style headings to be used; and `--preserve-tabs` will prevent tabs from being turned to spaces. # Syntax highlighting @@ -7021,10 +7087,7 @@ second `options` parameter. A custom writer is a Lua script that defines a function that specifies how to render each element in a Pandoc AST. -To see a documented example which you can modify according -to your needs: - - pandoc --print-default-data-file sample.lua +See the [djot-writer.lua] for a full-featured example. Note that custom writers have no default template. If you want to use `--standalone` with a custom writer, you will need to @@ -7035,6 +7098,7 @@ subdirectory of your user data directory (see [Templates]). [Lua]: https://www.lua.org [lpeg]: http://www.inf.puc-rio.br/~roberto/lpeg/ +[djot-writer.lua]: https://github.com/jgm/djot/blob/main/djot-writer.lua # Reproducible builds @@ -7054,10 +7118,10 @@ metadata field (see [EPUB Metadata], above). # Running pandoc as a web server If you rename (or symlink) the pandoc executable to -`pandoc-server`, it will start up a web server with a JSON -API. This server exposes most of the conversion functionality -of pandoc. For full documentation, see the [pandoc-server] -man page. +`pandoc-server`, or if you call pandoc with `server` as the first +argument, it will start up a web server with a JSON API. This +server exposes most of the conversion functionality of pandoc. For +full documentation, see the [pandoc-server] man page. If you rename (or symlink) the pandoc executable to `pandoc-server.cgi`, it will function as a CGI program @@ -7069,6 +7133,17 @@ will be performed on the server during pandoc conversions. [pandoc-server]: https://github.com/jgm/pandoc/blob/master/doc/pandoc-server.md +# Running pandoc as a Lua interpreter + +Calling the pandoc executable under the name `pandoc-lua` or with +`lua` as the first argument will make it function as a standalone +Lua interpreter. The behavior is mostly identical to that of the +[standalone `lua` executable][lua standalone], version 5.4. +However, there is no REPL yet, and the `-i` option has no effect. +For full documentation, see the [pandoc-lua] man page. + +[lua standalone]: https://www.lua.org/manual/5.4/manual.html#7 +[pandoc-lua]: https://github.com/jgm/pandoc/blob/master/doc/pandoc-lua.md # A note on security diff --git a/Makefile b/Makefile index f5beb0a44e9f..44afe926f601 100644 --- a/Makefile +++ b/Makefile @@ -1,86 +1,147 @@ version?=$(shell grep '^[Vv]ersion:' pandoc.cabal | awk '{print $$2;}') pandoc=$(shell find dist -name pandoc -type f -exec ls -t {} \; | head -1) -SOURCEFILES?=$(shell git ls-tree -r master --name-only | grep "\.hs$$") -BRANCH?=master -ARCH=$(shell uname -m) -DOCKERIMAGE=registry.gitlab.b-data.ch/ghc/ghc4pandoc:9.2.3 -COMMIT=$(shell git rev-parse --short HEAD) +SOURCEFILES?=$(shell git ls-tree -r master --name-only src pandoc-cli pandoc-server pandoc-lua-engine | grep "\.hs$$") +PANDOCSOURCEFILES?=$(shell git ls-tree -r master --name-only src | grep "\.hs$$") +DOCKERIMAGE=registry.gitlab.b-data.ch/ghc/ghc4pandoc:9.4.3 TIMESTAMP=$(shell date "+%Y%m%d_%H%M") LATESTBENCH=$(word 1,$(shell ls -t bench_*.csv 2>/dev/null)) BASELINE?=$(LATESTBENCH) +ROOT?=Text.Pandoc ifeq ($(BASELINE),) BASELINECMD= else BASELINECMD=--baseline $(BASELINE) endif -GHCOPTS=-fdiagnostics-color=always -j4 +RTS -A8m -RTS +GHCOPTS=-fwrite-ide-info -fdiagnostics-color=always -j4 +RTS -A8m -RTS +CABALOPTS?=--disable-optimization WEBSITE=../../web/pandoc.org REVISION?=1 BENCHARGS?=--csv bench_$(TIMESTAMP).csv $(BASELINECMD) --timeout=6 +RTS -T --nonmoving-gc -RTS $(if $(PATTERN),--pattern "$(PATTERN)",) -quick-cabal: ## build & test with stack, no optimizations - cabal v2-test --ghc-options='$(GHCOPTS)' --disable-optimization --test-options="--hide-successes --ansi-tricks=false $(TESTARGS)" && cabal build --ghc-options='$(GHCOPTS)' --disable-optimization exe:pandoc +all: test build ## build executable and run tests +.PHONY: all + +build: ## build executable + cabal build \ + --ghc-options='$(GHCOPTS)' \ + $(CABALOPTS) pandoc-cli + @cabal list-bin $(CABALOPTS) --ghc-options='$(GHCOPTS)' pandoc-cli +.PHONY: build + +binpath: ## print path of built pandoc executable + @cabal list-bin $(CABALOPTS) pandoc-cli +.PHONY: binpath + +ghcid: ## run ghcid + ghcid -c 'cabal repl pandoc' +.PHONY: ghcid + +repl: ## run cabal repl + cabal repl $(CABALOPTS) pandoc +.PHONY: repl + +linecounts: ## print line counts for each module + @wc -l $(SOURCEFILES) | sort -n +.PHONY: linecounts # Note: to accept current results of golden tests, # make test TESTARGS='--accept' -quick: ## build & test with stack, no optimizations - stack install --ghc-options='$(GHCOPTS)' --system-ghc --flag 'pandoc:embed_data_files' --fast --test --test-arguments='-j4 --hide-successes --ansi-tricks=false $(TESTARGS)' +test: ## unoptimized build and run tests with cabal + cabal test \ + --ghc-options='$(GHCOPTS)' \ + $(CABALOPTS) \ + --test-options="--hide-successes --ansi-tricks=false $(TESTARGS)" all +.PHONY: test + +quick-stack: ## unoptimized build and tests with stack + stack install \ + --ghc-options='$(GHCOPTS)' \ + --system-ghc --flag 'pandoc:embed_data_files' \ + --fast \ + --test \ + --test-arguments='-j4 --hide-successes --ansi-tricks=false $(TESTARGS)' +.PHONY: quick-stack + +prerelease: README.md fix_spacing check-cabal check-stack checkdocs man uncommitted_changes ## prerelease checks +.PHONY: prerelease -full: ## build with stack, including benchmarks - stack install --flag 'pandoc:embed_data_files' --bench --no-run-benchmarks --test --test-arguments='-j4 --hide-successes--ansi-tricks-false' --ghc-options '-Wall -Werror -fno-warn-unused-do-bind -O0 $(GHCOPTS)' +uncommitted_changes: + ! git diff | grep '.' +.PHONY: uncommitted_changes -ghci: ## start ghci session - stack ghci --flag 'pandoc:embed_data_files' +authors: ## prints unique authors since LASTRELEASE (version) + git log --pretty=format:"%an" $(LASTRELEASE)..HEAD | sort | uniq -haddock: ## build haddocks - stack haddock -check: check-cabal checkdocs ## prerelease checks - cabal check # check cabal file - cabal outdated # check cabal dependencies +check-stack: stack-lint-extra-deps # check that stack.yaml dependencies are up to date ! grep 'git:' stack.yaml # use only released versions - ! grep 'git:' cabal.project # use only released versions +.PHONY: check-stack check-cabal: git-files.txt sdist-files.txt @echo "Checking to see if all committed test/data files are in sdist." diff -u $^ + @for pkg in . pandoc-lua-engine pandoc-server pandoc-cli; \ + do \ + pushd $$pkg ; \ + cabal check ; \ + cabal outdated ; \ + popd ; \ + done + ! grep 'git:' cabal.project # use only released versions + +.PHONY: check-cabal checkdocs: @echo "Checking for tabs in manual." - ! grep -q -n -e "\t" MANUAL.txt changelog.md - -ghcid: ## run ghcid/stack - ghcid -c "stack repl --flag 'pandoc:embed_data_files'" - -ghcid-test: ## run ghcid/stack with tests - ghcid -c "stack repl --ghc-options=-XNoImplicitPrelude --flag 'pandoc:embed_data_files' --ghci-options=-fobject-code pandoc:lib pandoc:test-pandoc" + ! grep -q -n -e "\t" \ + MANUAL.txt changelog.md doc/pandoc-server.md doc/pandoc-lua.md +.PHONY: checkdocs bench: ## build and run benchmarks cabal bench --benchmark-options='$(BENCHARGS)' 2>&1 | tee "bench_$(TIMESTAMP).txt" -# stack bench \ -# --ghc-options '$(GHCOPTS)' \ -# --benchmark-arguments='$(BENCHARGS)' 2>&1 | \ -# tee "bench_latest.txt" +.PHONY: bench reformat: ## reformat with stylish-haskell for f in $(SOURCEFILES); do echo $$f; stylish-haskell -i $$f ; done +.PHONY: reformat -lint: hlint fix_spacing ## run linters - -hlint: ## run hlint - for f in $(SOURCEFILES); do echo $$f; hlint --verbose --refactor --refactor-options='-s -o -' $$f; done +lint: ## run hlint + hlint --report=hlint.html $(SOURCEFILES) || open hlint.html +.PHONY: lint -fix_spacing: ## Fix trailing newlines and spaces - for f in $(SOURCEFILES); do printf '%s\n' "`cat $$f`" | sed -e 's/ *$$//' > $$f.tmp; mv $$f.tmp $$f; done +fix_spacing: ## fix trailing newlines and spaces + @ERRORS=0; echo "Checking for spacing errors..." && for f in $(SOURCEFILES); do printf '%s\n' "`cat $$f`" | sed -e 's/ *$$//' > $$f.tmp; diff -u $$f $$f.tmp || ERRORS=1; mv $$f.tmp $$f; done; [ $$ERRORS -eq 0 ] || echo "Spacing errors have been fixed; please commit the changes."; exit $$ERRORS +.PHONY: fix_spacing changes_github: ## copy this release's changes in gfm pandoc --lua-filter tools/extract-changes.lua changelog.md -t gfm --wrap=none --template tools/changes_template.html | sed -e 's/\\#/#/g' | pbcopy +.PHONY: changes_github -man: man/pandoc.1 man/pandoc-server.1 - +man: man/pandoc.1 man/pandoc-server.1 man/pandoc-lua.1 ## build man pages .PHONY: man +latex-package-dependencies: ## print packages used by default latex template + pandoc lua tools=latex-package-dependencies.lua +.PHONY: latex-package-dependencies + +coverage: ## code coverage information + cabal test \ + --ghc-options='-fhpc $(GHCOPTS)' \ + $(CABALOPTS) \ + --test-options="--hide-successes --ansi-tricks=false $(TESTARGS)" + hpc markup --destdir=coverage test/test-pandoc.tix + open coverage/hpc_index.html +.PHONY: coverage + +weeder: ## run weeder to find dead code + weeder +.PHONY: weeder + +transitive-deps: ## print transitive dependencies + cabal-plan topo | sort | sed -e 's/-[0-9]\..*//' +.PHONY: transitive-deps + debpkg: ## create linux package docker run -v `pwd`:/mnt \ -v `pwd`/linux/artifacts:/artifacts \ @@ -92,6 +153,7 @@ debpkg: ## create linux package $(DOCKERIMAGE) \ bash \ /mnt/linux/make_artifacts.sh +.PHONY: debpkg man/pandoc.1: MANUAL.txt man/pandoc.1.before man/pandoc.1.after pandoc $< -f markdown -t man -s \ @@ -102,28 +164,29 @@ man/pandoc.1: MANUAL.txt man/pandoc.1.before man/pandoc.1.after --variable footer="pandoc $(version)" \ -o $@ -man/pandoc-server.1: doc/pandoc-server.md +man/pandoc-%.1: doc/pandoc-%.md pandoc $< -f markdown -t man -s \ --lua-filter man/manfilter.lua \ - --variable footer="pandoc-server $(version)" \ + --variable footer="pandoc-$* $(version)" \ -o $@ README.md: README.template MANUAL.txt tools/update-readme.lua pandoc --lua-filter tools/update-readme.lua \ --reference-location=section -t gfm $< -o $@ -.PHONY: doc/lua-filters.md -doc/lua-filters.md: tools/update-lua-module-docs.lua +doc/lua-filters.md: tools/update-lua-module-docs.lua ## update lua-filters.md module docs cabal run pandoc -- --standalone \ --reference-links \ --lua-filter=$< \ --columns=66 \ --output=$@ \ $@ +.PHONY: doc/lua-filters.md download_stats: ## print download stats from GitHub releases curl https://api.github.com/repos/jgm/pandoc/releases | \ jq -r '.[] | .assets | .[] | "\(.download_count)\t\(.name)"' +.PHONY: download_stats pandoc-templates: ## update pandoc-templates repo rm ../pandoc-templates/default.* ; \ @@ -132,14 +195,46 @@ pandoc-templates: ## update pandoc-templates repo git add * && \ git commit -m "Updated templates for pandoc $(version)" && \ popd +.PHONY: pandoc-templates update-website: ## update website and upload make -C $(WEBSITE) update make -C $(WEBSITE) make -C $(WEBSITE) upload +.PHONY: update-website + +modules.csv: $(PANDOCSOURCEFILES) + @rg '^import.*Text\.Pandoc\.' --with-filename $^ \ + | rg -v 'Text\.Pandoc\.(Definition|Builder|Walk|Generic)' \ + | sort \ + | uniq \ + | sed -e 's/src\///' \ + | sed -e 's/\//\./g' \ + | sed -e 's/\.hs:import *\(qualified *\)*\([^ ]*\).*/,\2/' \ + > $@ + +modules.dot: modules.csv + @echo "digraph G {" > $@ + @echo "overlap=\"scale\"" >> $@ + @sed -e 's/\([^,]*\),\(.*\)/ "\1" -> "\2";/' $< >> $@ + @echo "}" >> $@ + +# To get the module dependencies of Text.Pandoc.Parsing: +# make modules.pdf ROOT=Text.Pandoc.Parsing +modules.pdf: modules.dot + gvpr -f tools/cliptree.gvpr -a '"$(ROOT)"' $< | dot -Tpdf > $@ + +# make moduledeps ROOT=Text.Pandoc.Parsing +moduledeps: modules.csv ## Print transitive dependencies of a module ROOT + @echo "$(ROOT)" + @lua tools/moduledeps.lua transitive $(ROOT) | sort +.PHONY: moduledeps clean: ## clean up - stack clean + cabal clean +.PHONY: clean + +.PHONY: .FORCE sdist-files.txt: .FORCE cabal sdist --list-only | sed 's/\.\///' | grep '^\(test\|data\)\/' | sort > $@ @@ -147,8 +242,18 @@ sdist-files.txt: .FORCE git-files.txt: .FORCE git ls-tree -r --name-only HEAD | grep '^\(test\|data\)\/' | sort > $@ +help: ## display this help + @echo "Targets:" + @grep -E '^[ a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | sort | awk 'BEGIN {FS = ":.*?## "}; {printf "%-16s %s\n", $$1, $$2}' + @echo + @echo "Environment variables with default values:" + @printf "%-16s%s\n" "CABALOPTS" "$(CABALOPTS)" + @printf "%-16s%s\n" "GHCOPTS" "$(GHCOPTS)" + @printf "%-16s%s\n" "TESTARGS" "$(TESTARGS)" + @printf "%-16s%s\n" "BASELINE" "$(BASELINE)" + @printf "%-16s%s\n" "REVISION" "$(REVISION)" .PHONY: help -help: ## Display this help - @grep -E '^[ a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | sort | awk 'BEGIN {FS = ":.*?## "}; {printf "%-30s %s\n", $$1, $$2}' -.PHONY: .FORCE deps quick haddock install clean test bench changes_github download_stats reformat lint weigh pandoc-templates update-website debpkg checkdocs ghcid ghci fix_spacing hlint check check-cabal check +hie.yaml: ## regenerate hie.yaml + gen-hie > $@ +.PHONY: hie.yaml diff --git a/README.template b/README.template index cb7060953e6a..e931a9fdec3f 100644 --- a/README.template +++ b/README.template @@ -9,7 +9,7 @@ Pandoc [![github release](https://img.shields.io/github/release/jgm/pandoc.svg?label=current+release)](https://github.com/jgm/pandoc/releases) [![hackage release](https://img.shields.io/hackage/v/pandoc.svg?label=hackage)](https://hackage.haskell.org/package/pandoc) [![homebrew](https://img.shields.io/homebrew/v/pandoc.svg)](https://formulae.brew.sh/formula/pandoc) -[![stackage LTS package](https://stackage.org/package/pandoc/badge/lts)](https://www.stackage.org/lts/package/pandoc-types) +[![stackage LTS package](https://stackage.org/package/pandoc/badge/lts)](https://www.stackage.org/lts/package/pandoc) [![CI tests](https://github.com/jgm/pandoc/workflows/CI%20tests/badge.svg)](https://github.com/jgm/pandoc/actions) [![license](https://img.shields.io/badge/license-GPLv2+-lightgray.svg)](https://www.gnu.org/licenses/gpl.html) [![pandoc-discuss on google groups](https://img.shields.io/badge/pandoc-discuss-red.svg?style=social)](https://groups.google.com/forum/#!forum/pandoc-discuss) diff --git a/RELEASE-CHECKLIST b/RELEASE-CHECKLIST index 7b05731dd459..2002bdc5d5aa 100644 --- a/RELEASE-CHECKLIST +++ b/RELEASE-CHECKLIST @@ -1,23 +1,29 @@ -[ ] make check -[ ] make README.md and commit if needed -[ ] make man +[ ] update MANUAL.txt date +[ ] make prerelease [ ] Finalize changelog -[ ] Update AUTHORS -[ ] push release candidate branch rc/VERSION and (if it builds - successfully) download artifacts +[ ] Update AUTHORS (make authors LASTRELEASE=x.y.z) +[ ] Run release candidate workflow manually on GitHub +[ ] if it builds successfully, download artifacts [ ] run tools/build-arm.sh to create and download arm64 linux package [ ] Use 'make' in macos and windows artifacts to sign code [ ] make update-website -[ ] Tag release in git +[ ] Tag release in git: + - use X.Y for pandoc + - if needed: pandoc-cli-X.Y + - if needed: pandoc-server-X.Y + - if needed: pandoc-lua-engine-X.Y +[ ] Upload packages to hackage if changed: + - pandoc + - pandoc-cli + - pandoc-server + - pandoc-lua-engine [ ] make pandoc-templates cd ../pandoc-templates git tag REL git push git push --tags [ ] Add release on github (use 'make changes_github' and upload files) -[ ] stack upload . [ ] copy deb to server, install it -[ ] cd trypandoc; make upload [ ] if needed, sh tools/build-and-upload-api-docs.sh [ ] Announce on pandoc-announce, pandoc-discuss diff --git a/Setup.hs b/Setup.hs deleted file mode 100644 index 9a994af677b0..000000000000 --- a/Setup.hs +++ /dev/null @@ -1,2 +0,0 @@ -import Distribution.Simple -main = defaultMain diff --git a/app/pandoc.hs b/app/pandoc.hs deleted file mode 100644 index 305fc405e3f1..000000000000 --- a/app/pandoc.hs +++ /dev/null @@ -1,33 +0,0 @@ -{- | - Module : Main - Copyright : Copyright (C) 2006-2022 John MacFarlane - License : GNU GPL, version 2 or above - - Maintainer : John MacFarlane - Stability : alpha - Portability : portable - -Parses command-line options and calls the appropriate readers and -writers. --} -module Main where -import qualified Control.Exception as E -import Text.Pandoc.App (convertWithOpts, defaultOpts, options, parseOptions) -import Text.Pandoc.Error (handleError) -import Text.Pandoc.Server (ServerOpts(..), parseServerOpts, app) -import Safe (readDef) -import System.Environment (getProgName, lookupEnv) -import qualified Network.Wai.Handler.CGI as CGI -import qualified Network.Wai.Handler.Warp as Warp -import Network.Wai.Middleware.Timeout (timeout) - -main :: IO () -main = E.handle (handleError . Left) $ do - prg <- getProgName - cgiTimeout <- maybe 2 (readDef 2) <$> lookupEnv "PANDOC_SERVER_TIMEOUT" - case prg of - "pandoc-server.cgi" -> CGI.run (timeout cgiTimeout app) - "pandoc-server" -> do - sopts <- parseServerOpts - Warp.run (serverPort sopts) (timeout (serverTimeout sopts) app) - _ -> parseOptions options defaultOpts >>= convertWithOpts diff --git a/benchmark/benchmark-pandoc.hs b/benchmark/benchmark-pandoc.hs index a006a43091c8..3b3c1fa8f162 100644 --- a/benchmark/benchmark-pandoc.hs +++ b/benchmark/benchmark-pandoc.hs @@ -28,6 +28,7 @@ import Test.Tasty.Bench import qualified Data.ByteString.Lazy as BL import Data.Maybe (mapMaybe) import Data.List (sortOn) +import Text.Pandoc.Format (FlavoredFormat(..)) readerBench :: Pandoc -> T.Text @@ -36,8 +37,8 @@ readerBench _ name | name `elem` ["bibtex", "biblatex", "csljson"] = Nothing readerBench doc name = either (const Nothing) Just $ runPure $ do - (rdr, rexts) <- getReader name - (wtr, wexts) <- getWriter name + (rdr, rexts) <- getReader $ FlavoredFormat name mempty + (wtr, wexts) <- getWriter $ FlavoredFormat name mempty case (rdr, wtr) of (TextReader r, TextWriter w) -> do inp <- w def{ writerWrapText = WrapAuto @@ -70,7 +71,7 @@ writerBench _ _ name | name `elem` ["bibtex", "biblatex", "csljson"] = Nothing writerBench imgs doc name = either (const Nothing) Just $ runPure $ do - (wtr, wexts) <- getWriter name + (wtr, wexts) <- getWriter $ FlavoredFormat name mempty case wtr of TextWriter writerFun -> return $ bench (T.unpack name) diff --git a/cabal.project b/cabal.project index 63601007d60c..43969d5fe0aa 100644 --- a/cabal.project +++ b/cabal.project @@ -1,13 +1,15 @@ -packages: pandoc.cabal +packages: . + pandoc-lua-engine + pandoc-server + pandoc-cli tests: True flags: +embed_data_files -constraints: aeson >= 2.0.1.0 +constraints: skylighting-format-blaze-html >= 0.1.1 source-repository-package type: git - location: https://github.com/jgm/commonmark-hs - tag: 4f4fbe277044de0724dd0828a45311ab1413e30b - subdir: commonmark-extensions + location: https://github.com/jgm/citeproc + tag: cb54223919ecd327250f1b167e4e0c61473f402e source-repository-package type: git diff --git a/changelog.md b/changelog.md index bb69953e279b..87b6c3219019 100644 --- a/changelog.md +++ b/changelog.md @@ -1,5 +1,552 @@ # Revision history for pandoc +## pandoc 3.0 (PROVISIONAL YYYY-MM-DD) + + * Split pandoc-server, pandoc-cli, and pandoc-lua-engine + into separate packages (#8309). + + * Pandoc now behaves like a Lua interpreter when called as + `pandoc-lua` or when `pandoc lua` is used (#8311, Albert Krewinkel). + The Lua API that is available in filters is automatically + available to the interpreter. (See the `pandoc-lua` man page.) + + * Pandoc behaves like a server when called as `pandoc-server` + or when `pandoc server` is used. (See the `pandoc-server` man page.) + + * A new command-line option `--list-tables`, causes tables to be + formatted as list tables in RST (#4564, with Francesco Occhipinti). + + * `--version` output no longer contains version info for dependent + packages. Instead, it contains a "Features" line that indicates + whether the binary was compiled with support for acting as a server, + and for using Lua filters and Custom writers. + + * Produce error if `--csl` is used more than once (#8195, Prat). + + * Remove deprecated `--atx-headers` option. + + * Remove deprecated option `--strip-empty-paragraphs`. + + * Add prefixes to identifiers with `--file-scope` (#6384). + This change only affects the case where `--file-scope` is used + and more than one file is specified on the command line. + In this case, identifiers will be prefixed with a string + derived from the file path, to disambiguate them. For example, + an identifier `foo` in `contents/file1.txt` will become + `contents__file1.txt__foo`. Links will be adjusted accordingly: + if `file2.txt` links to `file1.txt#foo`, then the link will + be changed to point to `#file1.txt__foo`. Similarly, a link + to `file1.txt` will point to `#file1.txt`. A Div with an + identifier derived from the file path will be added around + each file's content, so that links to files will still work. + + * Docx reader: + + + Mark unnumbered headings with class `unnumbered` (#8148, + Albert Krewinkel). This change ensures good conversion + results when converting with `--number-sections`. + + * JATS reader: + + + Handle uri element in references (#8270). + + * LaTeX reader: + + + Skip parenthenized args of toprule, midrule, etc (#8242). + + Handle `##` macro arguments properly (#8243). + + Remove unused function `toksToString` in Parsing module. + + * Mediawiki reader: + + + Parse table cell with attributess, to support rowspan, colspan (#8231, + Ruqi). + + * HTML reader: + + + Fix regression for `` (#8330). It was no longer being parsed as Code + (Justin Wood). + + * Markdown reader: + + + Allow fenced code block "bare" language to be combined + with attributes (#8174, Siphalor), e.g. + ```` + ```haskell {.class #id} + ``` + ```` + + Allow table caption labels to start with lowercase `t` (#8259). + + Grid tables: allow specifying a table foot by enclosing it with + part separator lines, i.e., row separator lines consisting only + of `+` and `=` characters (#8257, Albert Krewinkel). E.g.: + ``` + +------+-------+ + | Item | Price | + +======+=======+ + | Eggs | 5£ | + +------+-------+ + | Spam | 3£ | + +======+=======+ + | Sum | 8£ | + +======+=======+ + ``` + + Fix `implicit_header_references` with duplicate headings (#8300). + Documentation says that when more than one heading has the same text, + an implicit reference `[Heading text][]` refers to the first one. + Previously pandoc linked to the last one instead. This patch + makes pandoc conform to the documented behavior. + + * Org reader: + + + Allow org-ref v2 citations with `&` prefix (#8302). + + Make `#+pandoc-emphasis-pre` work as expected (#8360, Amir Dekel). + + * BibTeX reader: + + + Fix handling of `%` in `url` field (#7678). + `%` does not function as a comment character inside `url` + (where URL-encoding is common). + + Allow `url` field in `bibtex` as well as `biblatex` (#8287). + This field is not officially supported for BibTeX, but many styles + can handle it (), + and others will ignore it. + + * BibTeX writer: + + + Pass through `url` even for `bibtex` (#8287). + + * Org writer: + + + Pass through unknown languages in code blocks (#8278), instead + of producing `begin_example`. + + * LaTeX writer: + + + Do not repeat caption on headless tables (Albert Krewinkel). + The caption of headless tables was repeated on each page that contained + part of the table. It is now made part of the "first head", i.e. the + table head that is printed only once. + + Add separator line between table's body and its foot (Albert Krewinkel). + + Ignore languages with no babel equivalent, instead of generating an + invalid command in the preamble (#8325). + + Use `\includesvg` for SVGs and include the `svg` package (#8334). + + * JATS writer: + + + Use `` for LineBreak in the limited contexts that accept it + (#8344). + + Officially deprecate `writeJATS` in favor of `writeJatsArchiving`. + + * RTF writer: + + + Add space after unicode escape commands (#8264). This fixes + a bug that caused characters to disappear after unicode escapes. + + * RST writer: + + + Render tables as list tables when the `--list-tables` option is + specified (`writerListTables`) (#4564, Francesco Occhipinti). + + * Commonmark writer: + + + Ensure that we don't have blank lines in raw HTML (#8307). + + * HTML writer: + + + Only add role attribute in HTML5 (#8241). It is not valid in HTML4. + + Avoid aria-hidden in code blocks for HTML4 (#8241). + + Only treat `. . .` as a slide pause in slides, and not in regular + HTML output (#8281). + + Properly merge classes for headings of level > 6 (#8363). + + Prevent `` inside `` (#7585). If a link text contains a link, + we replace it with a span. + + Replace deprecated aria roles for bibliography entries (#8354). + `doc-biblioentry` -> `listitem`, `doc-bibliography` -> `list`. + + * HTML, Markdown writers: filter out empty class attributes (#8251). + These should not be generated by any pandoc readers, but they + might be produced programatically. + + * Docx writer: + + + Better handling of tables in lists (#5947). Previously the content + of each list cell was indented when the table belonged to a list + item. + + Indent tables in list items (#5947). + + Adjust correct attribute on `lang` element (#7022). For East Asian + languages, we need to adjust `w:eastAsia` rather than `w:val`. + This allows normal fonts to be used for any Latin-font text. + Similarly, for bidi languages, we need to adjust `w:bidi` + rather than `w:val`. We treat `he` and `ar` as bidi languages, + `zh`, `ja`, `ko` as East Asian languages. + + * Ms writer: + + + Properly format display equations (#8308). + + * XWiki writer: + + + Use template if it is specified (#8296). Previously + templates were ignored. + + * LaTeX template: set fonts after Beamer theme (Jeremie Knuesel). + Beamer themes such as metropolis and saintpetersburg change the default + fonts. This change gives precedence to the user font settings by moving + them after the loading of the Beamer theme. + + * reference.pptx: Remove unsupported element (#8342, #6338, Link Swanson). + The default template contained text above the header, which can mislead + users into thinking there is a way to put text there using pandoc. + + * Text.Pandoc.App: + + + Parameterize `convertWithOpts` over scripting engine [API Change] + (Albert Krewinkel). + + Move initial input-to-Pandoc code to internal submodule (Albert + Krewinkel). + + * Text.Pandoc.Citeproc: + + + Check both extension and mime type to determine bibliography type + when the bibliography is fetched remotely (#7151). + + CslJson: allow an object with `items` property in addition to + an array of references. This is what is returned by e.g. + `https://api.zotero.org/groups/904125/items?v=...&format=csljson` + + Require a digit for an implicit "page" locator inside explicit locator + syntax `{...}` (#8288). Previously a locator specified as `{}` would + be rendered as `p.` with nothing after it. + + Update `sub verbo` to `sub-verbo` (#8315). This is a change in + the term's canonical name in citeproc. As a result of this change, + `sub verbo` locators have not worked in pandoc since citeproc 0.7. + + Text.Pandoc.Citeproc.MetaValue: remove unused function `metaValueToPath`. + + Add internal module Text.Pandoc.Citeproc.Name (#8345). This exports + `toName`, which previously had been part of T.P.Citeproc.BibTeX, + and allows for cleaner module dependencies. + + * Add new module Text.Pandoc.Format [API change] (Albert Krewinkel). + The module provides functions and types for format spec parsing and + processing. The function `parseFormatSpec` was moved from + Text.Pandoc.Extensions to the new module and renamed to + `parseFlavoredFormat`. It now operates in + a PandocMonad and is based on the updated types. + + * Text.Pandoc.Extensions: + + + Fix JSON decoding of Extensions (#8352, Albert Krewinkel). + + Add new exported function `readExtension` [API change]. + + Remove `parseFormatSpec` [API change]. This has been moved + to Text.Pandoc.Format and renamed as `parseFlavoredFormat` + (Albert Krewinkel). + + Simpler implementation of Extensions based on Set + (benchmarks show no performance penalty). + + * Text.Pandoc.MIME: + + + Base module on package `mime-types`, which + is already a transitive dependency (#8277, Albert Krewinkel). + + Remove deprecated overrides (#8292). + + * Text.Pandoc.Shared: + + + Export `textToIdentifier` [API change]. + + Remove deprecated `crFilter`. [API change] + + Remove deprecated `deLink`. [API change] + + Deprecate `notElemText`. + + Deprecate `makeMeta`. + + Rename `pandocVersion` to (exported) `pandocVersionText` and add a new + `pandocVersion` that returns `Version` instead of `Text` [API change]. + This is consistent with the type used for `pandocTypesVersion` + and allows to use the value where a Version type is required. + + * Rename Text.Pandoc.Network.HTTP -> Text.Pandoc.URI. + This is still an unexported internal module. + Export `urlEncode`, `escapeURI`, `isURI`, `schemes`, `uriPathToPath`. + Drop exports of `schemes` and `uriPathToPath`. + + * Rename Text.Pandoc.Readers.LaTeX.Types -> Text.Pandoc.TeX (internal + module). + + * Text.Pandoc.Options: + + + WriterOptions now has a field `writerListTables`, + specifying that list tables be used in RST output [API change]. + + * Text.Pandoc.App: + + + Export `IpynbOutput(..)` [API change]. + + * Text.Pandoc.Filter: + + + Export `applyFilters` [API change]. + + Export `applyJSONFilter` [API Change] (Albert Krewinkel). + + Parameterize `applyFilters` over scripting engine [API change] (Albert + Krewinkel). + + * Text.Pandoc.Readers: + + + Change argument type of `getReader`, so it takes a `FlavoredFormat` + instead of a `Text` [API change] (Albert Krewinkel). + + * Text.Pandoc.Writers: + + + Change argument type of `getWriter`, so it takes a `FlavoredFormat` + instead of a `Text` [API change] (Albert Krewinkel). + + * Text.Pandoc.Templates: + + + Do not try to normalize input to `getDefaultTemplate` (Albert + Krewinkel). The function `getDefaultTemplate` no longer splits off + extension modifers from the given format, as that conflicts with + using custom writers as formats. Haskell library users should use + `getDefaultTemplate <=< (fmap formatName . parseFlavoredFormat)` + if the input format can still contain extensions. The same is true + for `compileDefaultTemplate`, which calls `getDefaultTemplate` + internally + + * New exported module Text.Pandoc.Scripting (Albert Krewinkel). + The module contains the central data structure for scripting engines + (e.g., Lua) [API change]. + + * Text.Pandoc.Error: + + + Add new PandocError constructor `PandocNoScriptingEngine` [API change] + (Albert Krewinkel). + + Add new PandocError constructor `PandocFormatError` [API change] + (Albert Krewinkel). The new error is used to report problems with + input or output format specifications. + + * Separate out Text.Pandoc.Data and Text.Pandoc.Translations from + Text.Pandoc.Class (#8348). This makes Text.Pandoc.Class + more self-contained. + + + Text.Pandoc.Data is now an exported module, providing `readDataFile` + and `readDefaultDataFile` (both formerly provided by Text.Pandoc.Class), + and also `getDataFileNames` (formerly unexported in + Text.Pandoc.App.CommandLineOptions). [API change] + + Text.Pandoc.Translations is now an exported module (along with + Text.Pandoc.Translations.Types), providing `readTranslations`, + `getTranslations`, `setTranslations`, `translateTerm`, + `lookupTerm`, `readTranslations`, `Term(..)`, and `Translations` [API + change]. + + Text.Pandoc.Class no longer exports `readDataFile`, + `readDefaultDataFile`, `setTranslations`, and `translateTerm` + [API change]. + + Text.Pandoc.Class now exports `checkUserDataDir` [API change]. + + * Text.Pandoc now exports Text.Pandoc.Data and `setTranslations` + and `translateTerm` {API change]. + + * Export module Text.Pandoc.Class.IO [API change]. + The module is useful when defining instances of class PandocMonad for + types that are also instances of MonadIO. + + * Remove modules Text.Pandoc.Writers.Custom and Text.Pandoc.Readers.Custom + [API Change] (Albert Krewinkel). The functions `writeCustom` and + `readCustom` are available from module Text.Pandoc.Lua. + + * Text.Pandoc.Server: + + + Split this module into a separate package, `pandoc-server`, + allowing the `pandoc` library to be compiled without server support. + + Return object if JSON is accepted. Previously we just returned + a JSON-encoded string. Now we return something like: + ``` + { + "output": "

hello

" + "base64": false, + "messages": [ + { + "message": "Not rendering RawInline (Format \"tex\") \"\\\\noe\"", + "verbosity": "INFO" + } + ], + } + ``` + This is a change in the pandoc-server JSON API. + + Set translations in the writer based on `lang` metadata. + + Return error in JSON object if response is JSON. + + Remove `parseServerOpts`. [API change] + + * Text.Pandoc.Lua: + + + This module has been moved to a separate package, + `pandoc-lua-engine`. + + Export `applyFilter`, `readCustom`, and `writeCustom`. + No longer export the lower-level function `runFilterFile` [API change]. + + Change type of `applyFilter` [API Change] (Albert Krewinkel). + The module Text.Pandoc.Filter.Lua has been merged into + Text.Pandoc.Lua. The function `applyFilter` now has type + ``` haskell + applyFilter :: (PandocMonad m, MonadIO m) + => Environment-> [String]-> FilePath-> Pandoc-> m Pandoc + ``` + where `Environment` is defined in Text.Pandoc.Filter.Environment. + + Export new function `getEngine` [API Change]. + The function returns the Lua scripting engine. + + Add unexported modules T.P.Lua.Reader, T.P.Lua.Writer. + These contain the definitions of `readCustom` and `writeCustom` + that were previously in T.P.Readers.Custom and T.P.Writers.Custom. + + Cleanup module dependencies, for a cleaner module dependency graph. + + The `writeCustom` function has changed to return a Writer and + an ExtensionsConfig [API change]. This allows ByteString writers to be + defined. + + The `readCustom` function has changed to return a Reader and an + ExtensionsConfig [API change]. + + * Lua subsystem (Albert Krewinkel): + + + The whole Lua subsystem has been moved to a separate package, + `pandoc-lua-engine`. `pandoc` does not depend on it. + `convertWithOpts` has a new parameter that can be used to + pass in the scripting engine defined in `pandoc-lua-engine` + (or a different one, in theory). + + Fix the behavior of Lua "Version" objects under equality + comparisons (#8267). + + Support running Lua with a GC-collected Lua state. + + Ensure that extensions marshaling is consistent. + + Produce more informative error messages for pandoc errors. + Errors are reported in Lua in the same words in which they would be + reported in the terminal. + + Add new module `pandoc.format`. The module provides functions to + query the set of extensions supported by formats and the set + of extension enabled per default. + + Add function `pandoc.template.apply`. + + Add function `pandoc.template.meta_to_context`. + The functions converts Meta values to template contexts; the intended + use is in combination with `pandoc.template.apply`. + + Allow Doc values in `WriterOptions.variables`. + The specialized peeker and pusher function for `Context Text` values + does not go via JSON, and thus keeps Doc values unchanged during + round-tripping. + + * Custom writers: + + + The global variables `PANDOC_DOCUMENT` and `PANDOC_WRITER_OPTIONS` + are no longer set when the writer script is loaded. Both variables + are still set in classic writers before the conversion is started, + so they can be used when they are wrapped in functions. + + Deprecate classic custom writers. + + Add function `pandoc.write_classic`. The function can be used to + convert a classic writer into a new-style writer by setting it as + the value of `Writer`: + ``` lua + Writer = pandoc.write_classic + ``` + or to fully restore the old behavior: + ``` lua + function Writer (doc, opts) + PANDOC_DOCUMENT = doc + PANDOC_WRITER_OPTIONS = opts + load(PANDOC_SCRIPT_FILE)() + return pandoc.write_classic(doc, opts) + end + ``` + + Support extensions in custom writers. Custom writers can define the + extensions that they support via the global `writer_extensions`. + The variable's value must be a table with all supported extensions + as keys, and their default status as values. For example, + the below specifies that the writer supports the extensions `smart` + and `sourcepos`, but only the `smart` extension is enabled by default: + ``` lua + writer_extensions = { + smart = true, + sourcepos = false, + } + ``` + + Custom writers can define a default template via a global `Template` + function; the data directory is no longer searched for a default + template. Writer authors can restore the old lookup behavior with + ``` lua + Template = function () + local template + return template.compile(template.default(PANDOC_SCRIPT_FILE)) + end + ``` + + * Custom readers: + + + Support extensions in custom readers. + Custom readers, like writers, can define the set of supported + extensions by setting a global. E.g.: + ``` lua + reader_extensions = { + smart = true, + citations = false, + } + ``` + + * Use latest versions of `commonmark-extensions`, `texmath`, + `citeproc`, `gridtables`, and `skylighting`. + + * Require aeson >= 2.0. + + * Remove `lua53` flag. We now only support Lua 5.4. + + * Add hie.yaml for haskell language server. + + * Documentation: + + + Deprecate `PANDOC_WRITER_OPTIONS` in custom writers (Albert Krewinkel). + + Document `pandoc.write_classic` (Albert Krewinkel). + + Document new table features (Albert Krewinkel). + + Clarify what background-image does in reveal.js (#6450). + + Documentation improvements for `blank_before_blockquote` + (#8324, Pranesh Prakash). + + Update grid table documentation (#8346). + + Add note about MathJax fonts to `--embed-resources`. + + Use cabal's --package-env more (#8317, Artem Pelenitsyn). + + Modify Zerobrane instructions to use Lua 5.4 (#8353, Ian Max Andolina). + + Fix documentation for highlight-style in `pandoc-server.md`. + + Fix link to fedora package site (#8246, Akos Marton). + + Rephrase paragraph on format extensions (#8375, Ilona + Silverwood). + + * Tests.Command: remove unused `runTest`. + + * Add pandoc-lua.1 man page. + + * Improve `shell.nix`. + + * Add `tools/moduledeps.lua` for inspecting the internal module + dependency tree. + + * Fix macOS zip so pandoc-server is a symlink. This cuts its size by 2x. + + * CI: Improve CI speed by caching more, eliminating macos builds, + and splitting benchmarks into a separate action, run by + manual dispatch. (We still test that benchmarks build in + the regular CI.) The cache can be expired manually by + modifying the secret `CACHE_VERSION`. + + * Remove the unnecessary Setup.hs from pandoc. + Cabal does not need this with build-type 'simple'. + + * Add pandoc-lua and pandoc-server (symlinks) and their man pages + to releases. + + * Use hslua-cli package for pandoc-lua interface (Albert Krewinkel). + + * Add `server` flag to pandoc-cli, allowing it to be compiled without + server support. + + * pandoc-cli: Allow building a binary without Lua support (Albert Krewinkel). + Disabling the `lua` cabal flag will result in a binary without Lua. + + * Move `--version` handling to pandoc-cli. We need it here in order + to print information about whether server and Lua support have been + copmiled in. + + * Move `nightly` flag from pandoc to pandoc-cli (#8339). + + * Rewrite Makefile and add a number of convenient targets, for + coverage, weeder, module dependencies, prelease. + `make help` will print the targets. + + * Factor out xml-light into an internal library. + + * Move trypandoc to a separate repository, jgm/trypandoc. + + + ## pandoc 2.19.2 (2022-08-22) * Fix regression with data uris in 2.19.1 (#8239). @@ -888,7 +1435,7 @@ extends beyond the `--columns` width, we need to adjust the widths of the pipe separators to encode this width information. - * Docx writer: Separate tables even with RawBlocks between (#7224, + * Docx writer: Separate tables even with RawBlocks between (#7724, Michael Hoffmann). Adjacent docx tables need to be separated by an empty paragraph. If there's a RawBlock between tables which renders to nothing, be sure to still insert the empty paragraph so that diff --git a/data/epub.css b/data/epub.css index f7d4ab14ee51..3f9bfa5bfaff 100644 --- a/data/epub.css +++ b/data/epub.css @@ -1,12 +1,183 @@ /* This defines styles and classes used in the book */ -body { margin: 5%; text-align: justify; font-size: medium; } -code { font-family: monospace; } -h1 { text-align: left; } -h2 { text-align: left; } -h3 { text-align: left; } -h4 { text-align: left; } -h5 { text-align: left; } -h6 { text-align: left; } +@page { + margin: 10px; +} +html, body, div, span, applet, object, iframe, h1, h2, h3, h4, h5, h6, p, blockquote, pre, a, abbr, acronym, address, big, cite, code, del, dfn, em, img, ins, kbd, q, s, samp, small, strike, strong, sub, sup, tt, var, b, u, i, center, fieldset, form, label, legend, table, caption, tbody, tfoot, thead, tr, th, td, article, aside, canvas, details, embed, figure, figcaption, footer, header, hgroup, menu, nav, output, ruby, section, summary, time, mark, audio, video { + margin: 0; + padding: 0; + border: 0; + font-size: 100%; + vertical-align: baseline; +} +ol, ul, li, dl, dt, dd { + margin: 0; + padding: 0; + border: 0; + font-size: 100%; + vertical-align: baseline; +} +html { + line-height: 1.2; + font-family: Georgia, serif; + color: #1a1a1a; + background-color: #fdfdfd; +} +p { + text-indent: 0; + margin: 1em 0; + widows: 2; + orphans: 2; +} +a { + color: #1a1a1a; +} +a:visited { + color: #1a1a1a; +} +img { + max-width: 100%; +} +h1 { + text-indent: 0; + text-align: left; + margin: 3em 0 0 0; + font-size: 2em; + font-weight: bold; + page-break-before: always; + line-height: 150%; +} +h2 { + text-indent: 0; + text-align: left; + margin: 1.5em 0 0 0; + font-size: 1.5em; + font-weight: bold; + line-height: 135%; +} +h3 { + text-indent: 0; + text-align: left; + margin: 1.3em 0 0 0; + font-size: 1.3em; + font-weight: bold; +} +h4 { + text-indent: 0; + text-align: left; + margin: 1.2em 0 0 0; + font-size: 1.2em; + font-weight: bold; +} +h5 { + text-indent: 0; + text-align: left; + margin: 1.1em 0 0 0; + font-size: 1.1em; + font-weight: bold; +} +h5 { + text-indent: 0; + text-align: left; + font-size: 1em; + font-weight: bold; +} +h1, h2, h3, h4, h5, h6 { + page-break-after: avoid; + page-break-inside: avoid; +} + +ol, ul { + margin: 1em 0 0 1.7em; +} +li > ol, li > ul { + margin-top: 0; +} +blockquote { + margin: 1em 0 1em 1.7em; +} +code { + font-family: Menlo, Monaco, 'Lucida Console', Consolas, monospace; + font-size: 85%; + margin: 0; +} +pre { + margin: 1em 0; + overflow: auto; +} +pre code { + padding: 0; + overflow: visible; + overflow-wrap: normal; +} +.sourceCode { + background-color: transparent; + overflow: visible; +} +hr { + background-color: #1a1a1a; + border: none; + height: 1px; + margin: 1em 0; +} +table { + margin: 1em 0; + border-collapse: collapse; + width: 100%; + overflow-x: auto; + display: block; +} +table caption { + margin-bottom: 0.75em; +} +tbody { + margin-top: 0.5em; + border-top: 1px solid #1a1a1a; + border-bottom: 1px solid #1a1a1a; +} +th { + border-top: 1px solid #1a1a1a; + padding: 0.25em 0.5em 0.25em 0.5em; +} +td { + padding: 0.125em 0.5em 0.25em 0.5em; +} +header { + margin-bottom: 4em; + text-align: center; +} +#TOC li { + list-style: none; +} +#TOC ul { + padding-left: 1.3em; +} +#TOC > ul { + padding-left: 0; +} +#TOC a:not(:hover) { + text-decoration: none; +} +code{white-space: pre-wrap;} +span.smallcaps{font-variant: small-caps;} + +/* This is the most compatible CSS, but it only allows two columns: */ +div.column{ display: inline-block; vertical-align: top; width: 50%; } +/* If you can rely on CSS3 support, use this instead: */ +div.columns{display: flex; gap: min(4vw, 1.5em);} +div.column{flex: auto; overflow-x: auto;} */ + +div.hanging-indent{margin-left: 1.5em; text-indent: -1.5em;} +ul.task-list{list-style: none;} +ul.task-list li input[type="checkbox"] { + width: 0.8em; + margin: 0 0.8em 0.2em -1.6em; + vertical-align: middle; +} +.display.math{ + display: block; + text-align: center; + margin: 0.5rem auto; +} /* For title, author, and date on the cover page */ h1.title { } p.author { } @@ -18,12 +189,7 @@ nav#landmarks ol li { list-style-type: none; margin: 0; padding: 0; } a.footnote-ref { vertical-align: super; } em, em em em, em em em em em { font-style: italic;} em em, em em em em { font-style: normal; } -code{ white-space: pre-wrap; } -span.smallcaps{ font-variant: small-caps; } -span.underline{ text-decoration: underline; } q { quotes: "“" "”" "‘" "’"; } -div.column{ display: inline-block; vertical-align: top; width: 50%; } -div.hanging-indent{margin-left: 1.5em; text-indent: -1.5em;} @media screen { /* Workaround for iBooks issue; see #6242 */ .sourceCode { overflow: visible !important; diff --git a/data/pptx/ppt/slides/slide3.xml b/data/pptx/ppt/slides/slide3.xml index f9ce81d90cc5..6f6f0eca6735 100644 --- a/data/pptx/ppt/slides/slide3.xml +++ b/data/pptx/ppt/slides/slide3.xml @@ -1,2 +1,2 @@ -Section headerSome explanatory text \ No newline at end of file +Section header \ No newline at end of file diff --git a/data/templates/default.epub2 b/data/templates/default.epub2 index 28de9c11b32a..b10705119966 100644 --- a/data/templates/default.epub2 +++ b/data/templates/default.epub2 @@ -7,8 +7,11 @@ $pagetitle$ diff --git a/data/templates/default.epub3 b/data/templates/default.epub3 index 19961ab133a7..0fac2e11ae3f 100644 --- a/data/templates/default.epub3 +++ b/data/templates/default.epub3 @@ -6,8 +6,11 @@ $pagetitle$ diff --git a/data/templates/default.latex b/data/templates/default.latex index e3b54ec4a43d..1e76be34bffc 100644 --- a/data/templates/default.latex +++ b/data/templates/default.latex @@ -82,11 +82,6 @@ $if(beamerarticle)$ \usepackage{beamerarticle} % needs to be loaded first $endif$ \usepackage{amsmath,amssymb} -$if(fontfamily)$ -\usepackage[$for(fontfamilyoptions)$$fontfamilyoptions$$sep$,$endfor$]{$fontfamily$} -$else$ -\usepackage{lmodern} -$endif$ $if(linestretch)$ \usepackage{setspace} $endif$ @@ -98,15 +93,48 @@ $endif$ \else % if luatex or xetex $if(mathspec)$ \ifXeTeX - \usepackage{mathspec} + \usepackage{mathspec} % this also loads fontspec \else - \usepackage{unicode-math} + \usepackage{unicode-math} % this also loads fontspec \fi $else$ - \usepackage{unicode-math} + \usepackage{unicode-math} % this also loads fontspec $endif$ - \defaultfontfeatures{Scale=MatchLowercase} + \defaultfontfeatures{Scale=MatchLowercase}$-- must come before Beamer theme \defaultfontfeatures[\rmfamily]{Ligatures=TeX,Scale=1} +\fi +$if(fontfamily)$ +$else$ +$-- Set default font before Beamer theme so the theme can override it +\usepackage{lmodern} +$endif$ +$-- Set Beamer theme before user font settings so they can override theme +$if(beamer)$ +$if(theme)$ +\usetheme[$for(themeoptions)$$themeoptions$$sep$,$endfor$]{$theme$} +$endif$ +$if(colortheme)$ +\usecolortheme{$colortheme$} +$endif$ +$if(fonttheme)$ +\usefonttheme{$fonttheme$} +$endif$ +$if(mainfont)$ +\usefonttheme{serif} % use mainfont rather than sansfont for slide text +$endif$ +$if(innertheme)$ +\useinnertheme{$innertheme$} +$endif$ +$if(outertheme)$ +\useoutertheme{$outertheme$} +$endif$ +$endif$ +$-- User font settings (must come after default font and Beamer theme) +$if(fontfamily)$ +\usepackage[$for(fontfamilyoptions)$$fontfamilyoptions$$sep$,$endfor$]{$fontfamily$} +$endif$ +\ifPDFTeX\else + % xetex/luatex font selection $if(mainfont)$ \setmainfont[$for(mainfontoptions)$$mainfontoptions$$sep$,$endfor$]{$mainfont$} $endif$ @@ -167,26 +195,6 @@ $if(zero-width-non-joiner)$ \fi %% End of ZWNJ support $endif$ -$if(beamer)$ -$if(theme)$ -\usetheme[$for(themeoptions)$$themeoptions$$sep$,$endfor$]{$theme$} -$endif$ -$if(colortheme)$ -\usecolortheme{$colortheme$} -$endif$ -$if(fonttheme)$ -\usefonttheme{$fonttheme$} -$endif$ -$if(mainfont)$ -\usefonttheme{serif} % use mainfont rather than sansfont for slide text -$endif$ -$if(innertheme)$ -\useinnertheme{$innertheme$} -$endif$ -$if(outertheme)$ -\useoutertheme{$outertheme$} -$endif$ -$endif$ % Use upquote if available, for straight quotes in verbatim environments \IfFileExists{upquote.sty}{\usepackage{upquote}}{} \IfFileExists{microtype.sty}{% use microtype if available @@ -270,6 +278,9 @@ $if(graphics)$ \def\fps@figure{htbp} \makeatother $endif$ +$if(svg)$ +\usepackage{svg} +$endif$ $if(strikeout)$ $-- also used for underline \usepackage[normalem]{ulem} diff --git a/data/templates/styles.citations.html b/data/templates/styles.citations.html new file mode 100644 index 000000000000..029755f310ee --- /dev/null +++ b/data/templates/styles.citations.html @@ -0,0 +1,23 @@ +/* CSS for citations */ +div.csl-bib-body { } +div.csl-entry { + clear: both; +$if(csl-entry-spacing)$ + margin-bottom: $csl-entry-spacing$; +$endif$ +} +.hanging div.csl-entry { + margin-left:2em; + text-indent:-2em; +} +div.csl-left-margin { + min-width:2em; + float:left; +} +div.csl-right-inline { + margin-left:2em; + padding-left:1em; +} +div.csl-indent { + margin-left: 2em; +} diff --git a/data/templates/styles.html b/data/templates/styles.html index 776d6a872d1b..c21f1acab761 100644 --- a/data/templates/styles.html +++ b/data/templates/styles.html @@ -28,6 +28,9 @@ } } @media print { + html { + background-color: $if(backgroundcolor)$$backgroundcolor$$else$white$endif$; + } body { background-color: transparent; color: black; @@ -175,33 +178,13 @@ $if(quotes)$ q { quotes: "“" "”" "‘" "’"; } $endif$ -$if(highlighting-css)$ -$highlighting-css$ -$endif$ $if(displaymath-css)$ .display.math{display: block; text-align: center; margin: 0.5rem auto;} $endif$ -$if(csl-css)$ -div.csl-bib-body { } -div.csl-entry { - clear: both; -$if(csl-entry-spacing)$ - margin-bottom: $csl-entry-spacing$; +$if(highlighting-css)$ +/* CSS for syntax highlighting */ +$highlighting-css$ $endif$ -} -.hanging div.csl-entry { - margin-left:2em; - text-indent:-2em; -} -div.csl-left-margin { - min-width:2em; - float:left; -} -div.csl-right-inline { - margin-left:2em; - padding-left:1em; -} -div.csl-indent { - margin-left: 2em; -} +$if(csl-css)$ +$styles.citations.html()$ $endif$ diff --git a/doc/custom-readers.md b/doc/custom-readers.md index add2317a265d..2d99381211f7 100644 --- a/doc/custom-readers.md +++ b/doc/custom-readers.md @@ -76,6 +76,65 @@ ensuring backwards compatibility. [patterns]: http://lua-users.org/wiki/PatternsTutorial [lpeg]: http://www.inf.puc-rio.br/~roberto/lpeg/ +# Bytestring readers + +Pandoc expects text input to be UTF-8 encoded. However, formats +like docx, odt, epub, etc. are not text but binary formats. To +read them, pandoc supports `ByteStringReader` functions. These +functions work just like the `Reader` function that process text +input, but instead of a list of sources, `ByteStringReader` +functions are passed a bytestring, i.e., a string that contains +the binary input. + +``` lua +-- read input as epub +function ByteStringReader (input) + return pandoc.read(input, 'epub') +end +``` + +# Format extensions + +Custom readers can be built such that their behavior is +controllable through format extensions, such as `smart`, +`citations`, or `hard-line-breaks`. Supported extensions are those +that are present as a key in the global `Extensions` table. +Fields of extensions that are enabled default have the value +`true`, while those that are supported but disabled have value +`false`. + +Example: A writer with the following global table supports the +extensions `smart`, `citations`, and `foobar`, with `smart` enabled and +the other two disabled by default: + +``` lua +Extensions = { + smart = true, + citations = false, + foobar = false +} +``` + +The users control extensions as usual, e.g., `pandoc -f +my-reader.lua+citations`. The extensions are accessible through +the reader options' `extensions` field, e.g.: + +``` lua +function Reader (input, opts) + print( + 'The citations extension is', + opts.extensions:includes 'citations' and 'enabled' or 'disabled' + ) + -- ... +end +``` + +Extensions that are neither enabled nor disabled in the +`Extensions` field are treated as unsupported by the +reader. Trying to modify such an extension via the command line +will lead to an error. + + # Example: plain text reader This is a simple example using [lpeg] to parse the input diff --git a/doc/custom-writers.md b/doc/custom-writers.md index fc839c8a0bdc..55c64eb8a181 100644 --- a/doc/custom-writers.md +++ b/doc/custom-writers.md @@ -16,79 +16,85 @@ install any additional software to do this. [Lua]: https://www.lua.org A custom writer is a Lua file that defines how to render the -document. Two styles of custom writers are supported: classic -custom writers must define rendering functions for each AST -element. New style writers, available since pandoc 2.17.2, must -define just a single function `Writer`, which gets passed the -document and writer options, and then does all rendering. +document. Writers must define just a single function, named either +`Writer` or `ByteStringWriter`, which gets passed the document and +writer options, and then handles the conversion of the document, +rendering it into a string. This interface was introduced in +pandoc 2.17.2, with ByteString writers becoming available in +pandoc 3.0. -# Classic style +Pandoc also supports "classic" custom writers, where a Lua +function must be defined for each AST element type. Classic style +writers are *deprecated* and should be replaced with new-style +writers if possible. -A writer using the classic style defines rendering functions for -each element of the pandoc AST. +# Writers -For example, +Custom writers using the new style must contain a global function +named `Writer` or `ByteStringWriter`. Pandoc calls this function +with the document and writer options as arguments, and expects the +function to return a UTF-8 encoded string. ``` lua -function Para(s) - return "" .. s .. "" +function Writer (doc, opts) + -- ... end ``` -The best way to go about creating a classic custom writer is to -modify the example that comes with pandoc. To get the example, -you can do +Writers that do not return text but binary data should define a +function with name `ByteStringWriter` instead. The function must +still return a string, but it does not have to be UTF-8 encoded +and can contain arbitrary binary data. -``` -pandoc --print-default-data-file sample.lua > sample.lua -``` +If both `Writer` and `ByteStringWriter` functions are defined, +then only the `Writer` function will be used. -## A custom HTML writer +## Format extensions -`sample.lua` is a full-features HTML writer, with explanatory -comments. To use it, just use the path to the custom writer as -the writer name: +Writers can be customized through format extensions, such as +`smart`, `citations`, or `hard_line_breaks`. The global +`Extensions` table indicates supported extensions with a +key. Extensions enabled by default are assigned a true value, +while those that are supported but disabled are assigned a false +value. -``` -pandoc -t sample.lua myfile.md -``` - -`sample.lua` defines all the functions needed by any custom -writer, so you can design your own custom writer by modifying -the functions in `sample.lua` according to your needs. - -``` {.lua include="sample.lua"} -``` - -## Template variables - -New template variables can be added, or existing ones -modified, by returning a second value from function `Doc`. - -For example, the following will add the current date in -variable `date`, unless `date` is already defined as either a -metadata value or a variable: +Example: A writer with the following global table supports the +extensions `smart`, `citations`, and `foobar`, with `smart` enabled and +the others disabled by default: ``` lua -function Doc (body, meta, vars) - vars.date = vars.date or meta.data or os.date '%B %e, %Y' - return body, vars -end +Extensions = { + smart = true, + citations = false, + foobar = false +} ``` -# New style - -Custom writers using the new style must contain a global function -named `Writer`. Pandoc calls this function with the document and -writer options as arguments, and expects the function to return a -string. +The users control extensions as usual, e.g., `pandoc -t +my-writer.lua+citations`. The extensions are accessible through +the writer options' `extensions` field, e.g.: ``` lua function Writer (doc, opts) + print( + 'The citations extension is', + opts.extensions:includes 'citations' and 'enabled' or 'disabled' + ) -- ... end ``` +## Default template + +The default template of a custom writer is defined by the return +value of the global function `Template`. Pandoc uses the default +template for rendering when the user has not specified a template, +but invoked with the `-s`/`--standalone` flag. + +The `Template` global can be left undefined, in which case pandoc +will throw an error when it would otherwise use the default +template. + ## Example: modified Markdown writer Writers have access to all modules described in the [Lua filters @@ -112,6 +118,115 @@ function Writer (doc, opts) } return pandoc.write(doc:walk(filter), 'gfm', opts) end + +function Template () + local template = pandoc.template + return template.compile(template.default 'gfm') +end ``` [Lua filters documentation]: https://pandoc.org/lua-filters.html + +## Reducing boilerplate with `pandoc.scaffolding.Writer` + +The `pandoc.scaffolding.Writer` structure is a custom writer scaffold +that serves to avoid common boilerplate code when defining a custom +writer. The object can be used as a function and allows to skip details +like metadata and template handling, requiring only the render functions +for each AST element type. + +The value of `pandoc.scaffolding.Writer` is a function that should +usually be assigned to the global `Writer`: + +``` lua +Writer = pandoc.scaffolding.Writer +``` + +The render functions for Block and Inline values can then be added +to `Writer.Block` and `Writer.Inline`, respectively. The functions +are passed the element and the WriterOptions. + +``` lua +Writer.Inline.Str = function (str) + return str.text +end +Writer.Inline.SoftBreak = function (_, opts) + return opts.wrap_text == "wrap-preserve" + and cr + or space +end +Writer.Inline.LineBreak = cr + +Writer.Block.Para = function (para) + return {Writer.Inlines(para.content), pandoc.layout.blankline} +end +``` + +The render functions must return a string, a pandoc.layout *Doc* +element, or a list of such elements. In the latter case, the +values are concatenated as if they were passed to +`pandoc.layout.concat`. If the value does not depend on the input, +a constant can be used as well. + +The tables `Writer.Block` and `Writer.Inline` can be used as +functions; they apply the right render function for an element of +the respective type. E.g., `Writer.Block(pandoc.Para 'x')` will +delegate to the `Writer.Para` render function and will return the +result of that call. + +Similarly, the functions `Writer.Blocks` and `Writer.Inlines` can +be used to render lists of elements, and `Writer.Pandoc` renders +the document's blocks. + +All predefined functions can be overwritten when needed. + +The resulting Writer uses the render functions to handle metadata +values and converts them to template variables. The template is +applied automatically if one is given. + +# Classic style + +A writer using the classic style defines rendering functions for +each element of the pandoc AST. Note that this style is +*deprecated* and may be removed in later versions. + +For example, + +``` lua +function Para(s) + return "" .. s .. "" +end +``` + +## Template variables + +New template variables can be added, or existing ones +modified, by returning a second value from function `Doc`. + +For example, the following will add the current date in +variable `date`, unless `date` is already defined as either a +metadata value or a variable: + +``` lua +function Doc (body, meta, vars) + vars.date = vars.date or meta.data or os.date '%B %e, %Y' + return body, vars +end +``` + +## Changes in pandoc 3.0 + +Custom writers were reworked in pandoc 3.0. For technical reasons, +the global variables `PANDOC_DOCUMENT` and `PANDOC_WRITER_OPTIONS` +are set to the empty document and default values, respectively. +The old behavior can be restored by adding the following snippet, +which turns a classic into a new style writer. + +``` lua +function Writer (doc, opts) + PANDOC_DOCUMENT = doc + PANDOC_WRITER_OPTIONS = opts + loadfile(PANDOC_SCRIPT_FILE)() + return pandoc.write_classic(doc, opts) +end +``` diff --git a/doc/filters.md b/doc/filters.md index 3be905c98038..87bb3ca4d962 100644 --- a/doc/filters.md +++ b/doc/filters.md @@ -172,7 +172,7 @@ and then (It is also necessary that `pandoc-types` be installed in the local package repository. To do this using cabal-install, -`cabal v2-update && cabal v2-install --lib pandoc-types`.) +`cabal v2-update && cabal v2-install --lib pandoc-types --package-env .`.) Alternatively, we could compile the filter: diff --git a/doc/libraries.md b/doc/libraries.md index 4e799d18b702..2fbcf94b7f17 100644 --- a/doc/libraries.md +++ b/doc/libraries.md @@ -53,9 +53,16 @@ pandoc: : Bindings, wrappers, and helper functions to access Haskell data types from Lua via an object-oriented interface. +[hslua-module-path], [-system], [-text], and [-version] +: Lua modules that expose functionality of basic Haskell + libraries to Lua. + [hslua-aeson] : Converter from aeson data types to Lua objects. +[hslua-cli] +: Command-line interface mimicking the default `lua` executable. + [skylighting]: https://hackage.haskell.org/package/skylighting [skylighting-core]: https://hackage.haskell.org/package/skylighting-core [citeproc]: https://hackage.haskell.org/package/citeproc @@ -75,4 +82,8 @@ pandoc: [hslua-objectorientation]: https://hackage.haskell.org/package/hslua-objectorientation [hslua-packaging]: https://hackage.haskell.org/package/hslua-packaging [hslua-aeson]: https://hackage.haskell.org/package/hslua-aeson - +[hslua-cli]: https://hackage.haskell.org/package/hslua-cli +[hslua-module-path]: https://hackage.haskell.org/package/hslua-module-path +[-system]: https://hackage.haskell.org/package/hslua-module-system +[-text]: https://hackage.haskell.org/package/hslua-module-text +[-version]: https://hackage.haskell.org/package/hslua-module-version diff --git a/doc/lua-filters.md b/doc/lua-filters.md index 6c772ae6a7a9..6435280e66bf 100644 --- a/doc/lua-filters.md +++ b/doc/lua-filters.md @@ -30,7 +30,7 @@ executable. Starting with version 2.0, pandoc makes it possible to write filters in Lua without any external dependencies at all. A Lua -interpreter (version 5.3) and a Lua library for creating pandoc +interpreter (version 5.4) and a Lua library for creating pandoc filters is built into the pandoc executable. Pandoc data types are marshaled to Lua directly, avoiding the overhead of writing JSON to stdout and reading it from stdin. @@ -269,7 +269,12 @@ variables. be picked up by pandoc. ([WriterOptions](#type-writeroptions)) - This variable is also set in custom writers. + Accessing this variable in **custom writers** is + **deprecated**. Starting with pandoc 3.0, it is set to a + placeholder value (the default options) in custom writers. + Access to the actual writer options is provided via the + `Writer` or `ByteStringWriter` function, to which the options + are passed as the second function argument. *Since: pandoc 2.17* @@ -406,21 +411,24 @@ step through a Lua filter line by line as it is run inside Pandoc. This is accomplished using the remote-debugging interface of the package [`mobdebug`](https://github.com/pkulchenko/MobDebug). Although mobdebug can be run from the terminal, it is more useful -run within the donation-ware Lua editor and IDE, -[ZeroBrane](https://studio.zerobrane.com/). ZeroBrane offers a -REPL console and UI to step-through and view all variables and -state. - -If you already have Lua 5.3 installed, you can add -[`mobdebug`](https://luarocks.org/modules/paulclinger/mobdebug) -and its dependency -[`luasocket`](https://luarocks.org/modules/luasocket/luasocket) -using [`luarocks`](https://luarocks.org), which should then be -available on the path. ZeroBrane also includes both of these in -its package, so if you don't want to install Lua separately, you -should add/modify your `LUA_PATH` and `LUA_CPATH` to include the -correct locations; [see detailed instructions -here](https://studio.zerobrane.com/doc-remote-debugging). +run within the donation-ware Lua editor and IDE, [ZeroBrane +Studio](https://studio.zerobrane.com/). ZeroBrane offers a REPL +console and UI to step-through and view all variables and state. + +ZeroBrane doesn't come with Lua 5.4 bundled, but it can debug it, so +you should install Lua 5.4, and then add +[`mobdebug`](https://luarocks.org/modules/paulclinger/mobdebug) and +its dependency +[`luasocket`](https://luarocks.org/modules/luasocket/luasocket) using +[`luarocks`](https://luarocks.org). ZeroBrane can use your Lua 5.4 +install by adding `path.lua = "/path/to/your/lua"` in your ZeroBrane +settings file. Next, open your Lua filter in ZeroBrane, and add +`require('mobdebug').start()` at the line where you want your +breakpoint. Then make sure the Project > Lua Intepreter is set to the +"Lua" you added in settings and enable "Start Debugger Server" [see +detailed instructions +here](https://studio.zerobrane.com/doc-remote-debugging). Run Pandoc +as you normally would, and ZeroBrane should break at the correct line. ## Common pitfalls @@ -3543,6 +3551,36 @@ Usage: local html = pandoc.write(doc, 'html') assert(html == "

Tea

") +### `write_classic (doc[, writer_options])` {#pandoc.write_custom} + +Runs a classic custom Lua writer, using the functions defined +in the current environment. + +Parameters: + +`doc` +: document to convert ([Pandoc](#type-pandoc)) + +`writer_options` +: options passed to the writer; may be a [WriterOptions] object + or a table with a subset of the keys and values of a + WriterOptions object; defaults to the default values + documented in the manual. ([WriterOptions]|table) + +Returns: +- converted document (string) + +Usage: + + -- Adding this function converts a classic writer into a + -- new-style custom writer. + function Writer (doc, opts) + PANDOC_DOCUMENT = doc + PANDOC_WRITER_OPTIONS = opts + loadfile(PANDOC_SCRIPT_FILE)() + return pandoc.write_classic(doc, opts) + end + [WriterOptions]: #type-writeroptions # Module pandoc.utils @@ -4157,7 +4195,7 @@ Inserts element `value` at position `pos` in list, shifting elements to the next-greater index if necessary. This function is identical to -[`table.insert`](https://www.lua.org/manual/5.3/manual.html#6.6). +[`table.insert`](https://www.lua.org/manual/5.4/manual.html#6.6). Parameters: @@ -4196,7 +4234,7 @@ Removes the element at position `pos`, returning the value of the removed element. This function is identical to -[`table.remove`](https://www.lua.org/manual/5.3/manual.html#6.6). +[`table.remove`](https://www.lua.org/manual/5.4/manual.html#6.6). Parameters: @@ -4226,7 +4264,7 @@ by the given order may have their relative positions changed by the sort. This function is identical to -[`table.sort`](https://www.lua.org/manual/5.3/manual.html#6.6). +[`table.sort`](https://www.lua.org/manual/5.4/manual.html#6.6). Parameters: @@ -5129,10 +5167,45 @@ Returns [Doc]: #type-doc +# Module pandoc.scaffolding + +Scaffolding for custom writers. + +## Writer {#pandoc.scaffolding.writer} + +A structure to be used as a `Writer` function; the construct +handles most of the boilerplate, expecting only render functions +for all AST elements. See the documentation for custom writers for +details. + + + # Module pandoc.template Handle pandoc templates. +### apply {#pandoc.template.apply} + +`apply (template, context)` + +Applies a context with variable assignments to a template, +returning the rendered template. The `context` parameter must be a +table with variable names as keys and [Doc], string, boolean, or +table as values, where the table can be either be a list of the +aforementioned types, or a nested context. + +Parameters: + +`template` +: template to apply ([Template]{#type-template}) + +`context` +: variable values (table) + +Returns: + +- rendered template ([Doc]) + ### compile {#pandoc.template.compile} `compile (template[, templates_path])` @@ -5178,6 +5251,29 @@ Returns: - raw template (string) +### meta_to_context {#pandoc.template.meta_to_context} + +`meta_to_context (meta, blocks_writer, inlines_writer)` + +Creates template context from the document's [Meta]{#type-meta} +data, using the given functions to convert [Blocks] and [Inlines] +to [Doc] values. + +Parameters: + +`meta` +: document metadata ([Meta]) + +`blocks_writer` +: converter from [Blocks] to [Doc] values (function) + +`inlines_writer` +: converter from [Inlines] to [Doc] values (function) + +Returns: + +- template context (table) + # Module pandoc.types Constructors for types which are not part of the pandoc AST. @@ -5198,3 +5294,148 @@ Parameters: Returns: - A new [Version] object. + +# Module pandoc.zip + +Functions to create, modify, and extract files from zip archives. + +The module can be called as a function, in which case it behaves +like the `zip` function described below. + +Zip options are optional; when defined, they must be a table with +any of the following keys: + + - `recursive`: recurse directories when set to `true`; + - `verbose`: print info messages to stdout; + - `destination`: the value specifies the directory in which to + extract; + - `location`: value is used as path name, defining where files + are placed. + - `preserve_symlinks`: Boolean value, controlling whether + symbolic links are preserved as such. This option is ignored + on Windows. + +## Functions + +### Archive {#pandoc.zip.Archive} + +`Archive (bytestring_or_entries)` + +Reads an *Archive* structure from a raw zip archive or a list of +Entry items; throws an error if the given string cannot be decoded +into an archive. + +*Since: 1.0.0* + +Parameters: + +bytestring_or_entries +: (string|{ZipEntry,...}) + +Returns: + + - (ZipArchive) + +### Entry {#pandoc.zip.Entry} + +`Entry (path, contents[, modtime])` + +Generates a zip Entry from a filepath, the file's uncompressed +content, and the file's modification time. + +*Since: 1.0.0* + +Parameters: + +path +: file path in archive (string) + +contents +: uncompressed contents (string) + +modtime +: modification time (integer) + +### read_entry {#pandoc.zip.read_entry} + +`read_entry (filepath, opts)` + +Generates a ZipEntry from a file or directory. + +*Since: 1.0.0* + +Parameters: + +filepath +: (string) + +opts +: zip options (table) + +Returns: + + - a new zip archive entry (ZipEntry) + +### zip {#pandoc.zip.zip} + +`zip (filepaths[, options])` + +Package and compress the given files into a new Archive. + +*Since: 1.0.0* + +Parameters: + +filepaths +: list of files from which the archive is created. ({string,...}) + +options +: zip options (table) + +Returns: + + - a new archive (ZipArchive) + +## Types + +### Archive {#type-pandoc.zip.Archive} + +A zip archive with file entries. + +#### Fields + +`entries` +: files in this zip archive ({Entry,...}) + +#### Methods + +`extract([opts])` +: Extract all files from this archive, creating directories as + needed. Note that the last-modified time is set correctly only + in POSIX, not in Windows. This function fails if encrypted + entries are present. + + Use `archive:extract{destination = 'dir'}` to extract to + subdirectory `dir`. + +`bytestring()` +: Returns the raw binary string representation of the archive. + +### Entry {#type-pandoc.zip.Entry} + +File or directory entry in a zip archive. + +#### Fields + +`path` +: relative path, using `/` as separator + +`modtime` +: modification time (seconds since unix epoch) + +#### Methods + +`contents([password])` +: Get the uncompressed contents of a zip entry. If `password` is + given, then that password is used to decrypt the contents. An + error is throws if decrypting fails. diff --git a/doc/pandoc-lua.md b/doc/pandoc-lua.md new file mode 100644 index 000000000000..5badbfb3277d --- /dev/null +++ b/doc/pandoc-lua.md @@ -0,0 +1,64 @@ +--- +title: pandoc-lua +section: 1 +date: September 22, 2022 +--- + +# SYNOPSIS + +`pandoc-lua` [*options*] [*script* [*args*]] + +# DESCRIPTION + +`pandoc-lua` is a standalone Lua interpreter with behavior similar +to that of the standard `lua` executable, but exposing all of +pandoc's Lua libraries. All `pandoc.*` packages, as well as the +packages `re` and `lpeg`, are available via global variables. +Furthermore, the globals `PANDOC_VERSION`, `PANDOC_STATE`, and +`PANDOC_API_VERSION` are set at startup. + +If no script argument is given, then the script is assumed to be +passed in via *stdin*. Interactive mode is not supported at this +time. + +When called without the option `-E`, the interpreter checks for an +environment variable `LUA_INIT` before running any argument. If +the variable content has the format *`@filename`*, then +`pandoc-lua` executes the file. Otherwise, `pandoc-lua` executes +the string itself. + +# OPTIONS + +`-e stat` +: Execute statement `stat`. + +`-l mod` +: If mod has the pattern `g=m`, then require library `m` into + global `g`; otherwise require library `mod` into global + `mod`. + +`-v` +: Show version information. + +`-i` +: Not supported yet; print a warning to that effect. + +`-E` +: Ignore environment variables. This is not fully implemented + yet and only ignores the `LUA_INIT` variable. Other variables + like `LUA_PATH` and `LUA_CPATH` are **not** ignored. + +`-W` +: Turn warnings on. + +# AUTHORS + +Copyright 2022 John MacFarlane (jgm@berkeley.edu) and +contributors. Released under the [GPL], version 2 or later. This +software carries no warranty of any kind. (See COPYRIGHT for full +copyright and warranty notices.) + +Lua: Copyright 1994-2022 Lua.org, PUC-Rio. + +[GPL]: https://www.gnu.org/copyleft/gpl.html "GNU General Public License" + diff --git a/doc/pandoc-server.md b/doc/pandoc-server.md index 9977fe70eeb7..108e9c1f641f 100644 --- a/doc/pandoc-server.md +++ b/doc/pandoc-server.md @@ -56,17 +56,44 @@ does, however, impose certain limitations: ## Root endpoint The root (`/`) endpoint accepts only POST requests. + +### Response + It returns a converted document in one of the following -formats, depending on Accept headers: +formats (in order of preference), depending on the `Accept` header: +- `application/octet-stream` - `text/plain` - `application/json` -- `application/octet-stream` If the result is a binary format (e.g., `epub` or `docx`) and the content is returned as plain text or JSON, the binary will be base64 encoded. +If a JSON response is given, it will have one of the +following formats. If the conversion is not successful: + +``` +{ "error": string with the error message } +``` + +If the conversion is successful: + +``` +{ "output": string with textual or base64-encoded binary output, + "base64": boolean (true means the "output" is base64-encoded), + "messages": array of message objects (see below) } +``` + +Each element of the "messages" array will have the format + +``` +{ "message": string, + "verbosity": string (either "WARNING" or "INFO") } +``` + +### Request + The body of the POST request should be a JSON object, with the following fields. Only the `text` field is required; all of the others can be omitted for default @@ -168,7 +195,7 @@ the first one given is the default. : Causes HTML comments to be stripped in Markdown or Textile source, instead of being passed through to the output format. -`highlight-style` (string, default `"pygments"`) +`highlight-style` (string, leave unset for no highlighting) : Specify the style to use for syntax highlighting of code. Standard styles are `"pygments"` (the default), `"kate"`, @@ -337,8 +364,7 @@ except for these two points: - It accepts a JSON array, each element of which is a JSON object like the one expected by the root endpoint. -- It returns a JSON array of results. (It will not return - plain text or octet-stream, like the root endpoint.) +- It returns a JSON array of JSON results. This endpoint can be used to convert a sequence of small snippets in one request. diff --git a/hie.yaml b/hie.yaml new file mode 100644 index 000000000000..76b143e591ca --- /dev/null +++ b/hie.yaml @@ -0,0 +1,31 @@ +cradle: + cabal: + - path: "./xml-light" + component: "pandoc:lib:xml-light" + + - path: "./src" + component: "lib:pandoc" + + - path: "./test" + component: "pandoc:test:test-pandoc" + + - path: "./benchmark/benchmark-pandoc.hs" + component: "pandoc:bench:benchmark-pandoc" + + - path: "pandoc-cli/no-lua/pandoc.hs" + component: "pandoc-cli:exe:pandoc" + + - path: "pandoc-cli/no-lua/PandocCLI/Lua.hs" + component: "pandoc-cli:exe:pandoc" + + - path: "pandoc-cli/no-lua/PandocCLI/Server.hs" + component: "pandoc-cli:exe:pandoc" + + - path: "pandoc-lua-engine/src" + component: "lib:pandoc-lua-engine" + + - path: "pandoc-lua-engine/test" + component: "pandoc-lua-engine:test:test-pandoc-lua-engine" + + - path: "pandoc-server/src" + component: "lib:pandoc-server" diff --git a/linux/make_artifacts.sh b/linux/make_artifacts.sh index 4653a0c19cbf..1acdf4a913b2 100644 --- a/linux/make_artifacts.sh +++ b/linux/make_artifacts.sh @@ -27,8 +27,7 @@ ghc --version cabal update cabal clean -cabal configure -f-export-dynamic -fembed_data_files --enable-executable-static --ghc-options '-j4 +RTS -A256m -RTS -split-sections -optc-Os -optl=-pthread' pandoc -cabal build -j4 +cabal build -f-export-dynamic -fembed_data_files --enable-executable-static --ghc-options '-j4 +RTS -A256m -RTS -split-sections -optc-Os -optl=-pthread' -j4 all for f in $(find dist-newstyle -name 'pandoc' -type f -perm /400); do cp $f $ARTIFACTS/; done # Confirm that we have static builds @@ -53,11 +52,14 @@ make_deb() { cd $DEST/bin strip pandoc ln -s pandoc pandoc-server + ln -s pandoc pandoc-lua cd /mnt cp /mnt/man/pandoc.1 $DEST/share/man/man1/pandoc.1 gzip -9 $DEST/share/man/man1/pandoc.1 cp /mnt/man/pandoc-server.1 $DEST/share/man/man1/pandoc-server.1 gzip -9 $DEST/share/man/man1/pandoc-server.1 + cp /mnt/man/pandoc-server.1 $DEST/share/man/man1/pandoc-lua.1 + gzip -9 $DEST/share/man/man1/pandoc-lua.1 cp /mnt/COPYRIGHT $COPYRIGHT echo "" >> $COPYRIGHT @@ -84,13 +86,16 @@ make_tarball() { mkdir $TARGET/bin $TARGET/share $TARGET/share/man $TARGET/share/man/man1 cp /mnt/man/pandoc.1 $TARGET/share/man/man1 cp /mnt/man/pandoc-server.1 $TARGET/share/man/man1 + cp /mnt/man/pandoc-lua.1 $TARGET/share/man/man1 mv pandoc $TARGET/bin cd $TARGET/bin strip pandoc ln -s pandoc pandoc-server + ln -s pandoc pandoc-lua cd $ARTIFACTS gzip -9 $TARGET/share/man/man1/pandoc.1 gzip -9 $TARGET/share/man/man1/pandoc-server.1 + gzip -9 $TARGET/share/man/man1/pandoc-lua.1 tar cvzf $TARGET-linux-$ARCHITECTURE.tar.gz $TARGET rm -r $TARGET diff --git a/macos/Makefile b/macos/Makefile index b97b9965f6c9..dbec1e9824dd 100644 --- a/macos/Makefile +++ b/macos/Makefile @@ -13,13 +13,16 @@ signed.txt: $(DEST) $(DEST)/bin/pandoc-server: cd $(DEST)/bin && ln -s pandoc pandoc-server -pandoc.pkg: $(DEST)/bin/pandoc-server signed.txt +$(DEST)/bin/pandoc-lua: + cd $(DEST)/bin && ln -s pandoc pandoc-lua + +pandoc.pkg: $(DEST)/bin/pandoc-server $(DEST)/bin/pandoc-lua signed.txt pkgbuild --root pandoc --identifier net.johnmacfarlane.pandoc --version $(VERSION) --ownership recommended $@ $(BASE)-macOS.pkg: pandoc.pkg productbuild --distribution distribution.xml --resources Resources --package-path $< --version $(VERSION) --sign 'Developer ID Installer: John Macfarlane' $@ -$(BASE)-macOS.zip: $(DEST)/bin/pandoc-server signed.txt +$(BASE)-macOS.zip: $(DEST)/bin/pandoc-server $(DEST)/bin/pandoc-lua signed.txt mv $(DEST) $(BASE) zip --symlinks -r $@ $(BASE) diff --git a/man/pandoc-lua.1 b/man/pandoc-lua.1 new file mode 100644 index 000000000000..08d7fdfc51a6 --- /dev/null +++ b/man/pandoc-lua.1 @@ -0,0 +1,76 @@ +.\" Automatically generated by Pandoc 2.19.2 +.\" +.\" Define V font for inline verbatim, using C font in formats +.\" that render this, and otherwise B font. +.ie "\f[CB]x\f[]"x" \{\ +. ftr V B +. ftr VI BI +. ftr VB B +. ftr VBI BI +.\} +.el \{\ +. ftr V CR +. ftr VI CI +. ftr VB CB +. ftr VBI CBI +.\} +.TH "pandoc-lua" "1" "September 22, 2022" "pandoc-lua 3.0" "" +.hy +.SH SYNOPSIS +.PP +\f[V]pandoc-lua\f[R] [\f[I]options\f[R]] [\f[I]script\f[R] +[\f[I]args\f[R]]] +.SH DESCRIPTION +.PP +\f[V]pandoc-lua\f[R] is a standalone Lua interpreter with behavior +similar to that of the standard \f[V]lua\f[R] executable, but exposing +all of pandoc\[cq]s Lua libraries. +All \f[V]pandoc.*\f[R] packages, as well as the packages \f[V]re\f[R] +and \f[V]lpeg\f[R], are available via global variables. +Furthermore, the globals \f[V]PANDOC_VERSION\f[R], +\f[V]PANDOC_STATE\f[R], and \f[V]PANDOC_API_VERSION\f[R] are set at +startup. +.PP +If no script argument is given, then the script is assumed to be passed +in via \f[I]stdin\f[R]. +Interactive mode is not supported at this time. +.PP +When called without the option \f[V]-E\f[R], the interpreter checks for +an environment variable \f[V]LUA_INIT\f[R] before running any argument. +If the variable content has the format +\f[I]\f[VI]\[at]filename\f[I]\f[R], then \f[V]pandoc-lua\f[R] executes +the file. +Otherwise, \f[V]pandoc-lua\f[R] executes the string itself. +.SH OPTIONS +.TP +\f[V]-e stat\f[R] +Execute statement \f[V]stat\f[R]. +.TP +\f[V]-l mod\f[R] +If mod has the pattern \f[V]g=m\f[R], then require library \f[V]m\f[R] +into global \f[V]g\f[R]; otherwise require library \f[V]mod\f[R] into +global \f[V]mod\f[R]. +.TP +\f[V]-v\f[R] +Show version information. +.TP +\f[V]-i\f[R] +Not supported yet; print a warning to that effect. +.TP +\f[V]-E\f[R] +Ignore environment variables. +This is not fully implemented yet and only ignores the +\f[V]LUA_INIT\f[R] variable. +Other variables like \f[V]LUA_PATH\f[R] and \f[V]LUA_CPATH\f[R] are +\f[B]not\f[R] ignored. +.TP +\f[V]-W\f[R] +Turn warnings on. +.SH AUTHORS +.PP +Copyright 2022 John MacFarlane (jgm\[at]berkeley.edu) and contributors. +Released under the GPL, version 2 or later. +This software carries no warranty of any kind. +(See COPYRIGHT for full copyright and warranty notices.) +.PP +Lua: Copyright 1994-2022 Lua.org, PUC-Rio. diff --git a/man/pandoc-server.1 b/man/pandoc-server.1 index 15373d35f9a7..988695b9b4ec 100644 --- a/man/pandoc-server.1 +++ b/man/pandoc-server.1 @@ -1,4 +1,4 @@ -.\" Automatically generated by Pandoc 2.19 +.\" Automatically generated by Pandoc 2.19.2 .\" .\" Define V font for inline verbatim, using C font in formats .\" that render this, and otherwise B font. @@ -14,7 +14,7 @@ . ftr VB CB . ftr VBI CBI .\} -.TH "pandoc-server" "1" "August 15, 2022" "pandoc-server 2.19.1" "" +.TH "pandoc-server" "1" "August 15, 2022" "pandoc-server 3.0" "" .hy .SH SYNOPSIS .PP @@ -69,19 +69,50 @@ Print version. .SS Root endpoint .PP The root (\f[V]/\f[R]) endpoint accepts only POST requests. -It returns a converted document in one of the following formats, -depending on Accept headers: +.SS Response +.PP +It returns a converted document in one of the following formats (in +order of preference), depending on the \f[V]Accept\f[R] header: +.IP \[bu] 2 +\f[V]application/octet-stream\f[R] .IP \[bu] 2 \f[V]text/plain\f[R] .IP \[bu] 2 \f[V]application/json\f[R] -.IP \[bu] 2 -\f[V]application/octet-stream\f[R] .PP If the result is a binary format (e.g., \f[V]epub\f[R] or \f[V]docx\f[R]) and the content is returned as plain text or JSON, the binary will be base64 encoded. .PP +If a JSON response is given, it will have one of the following formats. +If the conversion is not successful: +.IP +.nf +\f[C] +{ \[dq]error\[dq]: string with the error message } +\f[R] +.fi +.PP +If the conversion is successful: +.IP +.nf +\f[C] +{ \[dq]output\[dq]: string with textual or base64-encoded binary output, + \[dq]base64\[dq]: boolean (true means the \[dq]output\[dq] is base64-encoded), + \[dq]messages\[dq]: array of message objects (see below) } +\f[R] +.fi +.PP +Each element of the \[lq]messages\[rq] array will have the format +.IP +.nf +\f[C] +{ \[dq]message\[dq]: string, + \[dq]verbosity\[dq]: string (either \[dq]WARNING\[dq] or \[dq]INFO\[dq]) } +\f[R] +.fi +.SS Request +.PP The body of the POST request should be a JSON object, with the following fields. Only the \f[V]text\f[R] field is required; all of the others can be @@ -166,7 +197,7 @@ Depth of sections to include in the table of contents. Causes HTML comments to be stripped in Markdown or Textile source, instead of being passed through to the output format. .TP -\f[V]highlight-style\f[R] (string, default \f[V]\[dq]pygments\[dq]\f[R]) +\f[V]highlight-style\f[R] (string, leave unset for no highlighting) Specify the style to use for syntax highlighting of code. Standard styles are \f[V]\[dq]pygments\[dq]\f[R] (the default), \f[V]\[dq]kate\[dq]\f[R], \f[V]\[dq]monochrome\[dq]\f[R], @@ -308,8 +339,7 @@ these two points: It accepts a JSON array, each element of which is a JSON object like the one expected by the root endpoint. .IP \[bu] 2 -It returns a JSON array of results. -(It will not return plain text or octet-stream, like the root endpoint.) +It returns a JSON array of JSON results. .PP This endpoint can be used to convert a sequence of small snippets in one request. diff --git a/man/pandoc.1 b/man/pandoc.1 index 928463df8949..ec70a04a1de4 100644 --- a/man/pandoc.1 +++ b/man/pandoc.1 @@ -1,4 +1,4 @@ -.\" Automatically generated by Pandoc 2.19 +.\" Automatically generated by Pandoc 2.19.2 .\" .\" Define V font for inline verbatim, using C font in formats .\" that render this, and otherwise B font. @@ -14,7 +14,7 @@ . ftr VB CB . ftr VBI CBI .\} -.TH "Pandoc User\[cq]s Guide" "" "August 22, 2022" "pandoc 2.19.2" "" +.TH "Pandoc User\[cq]s Guide" "" "August 22, 2022" "pandoc 3.0" "" .hy .SH NAME pandoc - general markup converter @@ -580,13 +580,6 @@ pandoc uses a level-1 heading to render the document title. Use \f[VI]--shift-heading-level-by\f[I]=X instead, where X = NUMBER - 1.\f[R] Specify the base level for headings (defaults to 1). .TP -\f[V]--strip-empty-paragraphs\f[R] -\f[I]Deprecated. -Use the \f[VI]+empty_paragraphs\f[I] extension instead.\f[R] Ignore -paragraphs with no content. -This option is useful for converting word processing documents where -users have used empty paragraphs to create inter-paragraph space. -.TP \f[V]--indented-code-classes=\f[R]\f[I]CLASSES\f[R] Specify classes to use for indented code blocks\[en]for example, \f[V]perl,numberLines\f[R] or \f[V]haskell\f[R]. @@ -605,6 +598,19 @@ This will allow footnotes in different files with the same identifiers to work as expected. If this option is set, footnotes and links will not work across files. Reading binary files (docx, odt, epub) implies \f[V]--file-scope\f[R]. +.RS +.PP +If two or more files are processed using \f[V]--file-scope\f[R], +prefixes based on the filenames will be added to identifiers in order to +disambiguate them, and internal links will be adjusted accordingly. +For example, a header with identifier \f[V]foo\f[R] in +\f[V]subdir/file1.txt\f[R] will have its identifier changed to +\f[V]subdir__file1.txt__foo\f[R]. +.PP +In addition, a Div with an identifier based on the filename will be +added around the file\[cq]s content, so that internal links to the +filename will point to this Div\[cq]s identifier. +.RE .TP \f[V]-F\f[R] \f[I]PROGRAM\f[R], \f[V]--filter=\f[R]\f[I]PROGRAM\f[R] Specify an executable to be used as a filter transforming the pandoc AST @@ -1059,8 +1065,8 @@ Specify whether to use ATX-style (\f[V]#\f[R]-prefixed) or Setext-style ATX-style headings are always used for levels 3+. This option also affects Markdown cells in \f[V]ipynb\f[R] output. .TP -\f[V]--atx-headers\f[R] -\f[I]Deprecated synonym for \f[VI]--markdown-headings=atx\f[I].\f[R] +\f[V]--list-tables\f[R] +Render tables as list tables in RST output. .TP \f[V]--top-level-division=default\f[R]|\f[V]section\f[R]|\f[V]chapter\f[R]|\f[V]part\f[R] Treat top-level headings as the given division type in LaTeX, ConTeXt, @@ -1660,6 +1666,7 @@ Nonzero exit codes have the following meanings: 67 PandocSyntaxMapError 83 PandocFilterError 84 PandocLuaError + 89 PandocNoScriptingEngine 91 PandocMacroLoop 92 PandocUTF8DecodingError 93 PandocIpynbDecodingError @@ -1939,6 +1946,8 @@ or \f[V]{type: citeproc}\f[R]. --markdown-headings atx markdown-headings: atx + --list-tables list-tables: true + --top-level-division chapter top-level-division: chapter --number-sections number-sections: true @@ -3266,7 +3275,7 @@ covered. .PP Note that markdown extensions added to the \f[V]ipynb\f[R] format affect Markdown cells in Jupyter notebooks (as do command-line options like -\f[V]--atx-headers\f[R]). +\f[V]--markdown-headings\f[R]). .SS Typography .SS Extension: \f[V]smart\f[R] .PP @@ -3952,7 +3961,7 @@ does not produce a nested block quote in pandoc: .nf \f[C] > This is a block quote. ->> Nested. +>> Not nested, since \[ga]blank_before_blockquote\[ga] is enabled by default \f[R] .fi .SS Verbatim (code) blocks @@ -4078,6 +4087,26 @@ qsort [] = [] \f[R] .fi .PP +This shortcut form may be combined with attributes: +.IP +.nf +\f[C] +\[ga]\[ga]\[ga]haskell {.numberLines} +qsort [] = [] +\[ga]\[ga]\[ga] +\f[R] +.fi +.PP +Which is equivalent to: +.IP +.nf +\f[C] +\[ga]\[ga]\[ga] {.haskell .numberLines} +qsort [] = [] +\[ga]\[ga]\[ga] +\f[R] +.fi +.PP If the \f[V]fenced_code_attributes\f[R] extension is disabled, but input contains class attribute(s) for the code block, the first class attribute will be printed after the opening fence as a bare word. @@ -4537,7 +4566,7 @@ not require lining up columns. A caption may optionally be provided with all 4 kinds of tables (as illustrated in the examples below). A caption is a paragraph beginning with the string \f[V]Table:\f[R] (or -just \f[V]:\f[R]), which will be stripped off. +\f[V]table:\f[R] or just \f[V]:\f[R]), which will be stripped off. It may appear either before or after the table. .SS Extension: \f[V]simple_tables\f[R] .PP @@ -4677,9 +4706,39 @@ The row of \f[V]=\f[R]s separates the header from the table body, and can be omitted for a headerless table. The cells of grid tables may contain arbitrary block elements (multiple paragraphs, code blocks, lists, etc.). -Cells that span multiple columns or rows are not supported. -Grid tables can be created easily using Emacs\[cq] table-mode -(\f[V]M-x table-insert\f[R]). +.PP +Cells can span multiple columns or rows: +.IP +.nf +\f[C] ++---------------------+----------+ +| Property | Earth | ++=============+=======+==========+ +| | min | -89.2 \[de]C | +| Temperature +-------+----------+ +| 1961-1990 | mean | 14 \[de]C | +| +-------+----------+ +| | min | 56.7 \[de]C | ++-------------+-------+----------+ +\f[R] +.fi +.PP +A table header may contain more than one row: +.IP +.nf +\f[C] ++---------------------+-----------------------+ +| Location | Temperature 1961-1990 | +| | in degree Celsius | +| +-------+-------+-------+ +| | min | mean | max | ++=====================+=======+=======+=======+ +| Antarctica | -89.2 | N/A | 19.8 | ++---------------------+-------+-------+-------+ +| Earth | -89.2 | 14 | 56.7 | ++---------------------+-------+-------+-------+ +\f[R] +.fi .PP Alignments can be specified as with pipe tables, by putting colons at the boundaries of the separator line after the header: @@ -4703,15 +4762,28 @@ For headerless tables, the colons go on the top line instead: +---------------+---------------+--------------------+ \f[R] .fi -.SS Grid Table Limitations .PP -Pandoc does not support grid tables with row spans or column spans. -This means that neither variable numbers of columns across rows nor -variable numbers of rows across columns are supported by Pandoc. -All grid tables must have the same number of columns in each row, and -the same number of rows in each column. -For example, the Docutils sample grid tables will not render as expected -with Pandoc. +A table foot can be defined by enclosing it with separator lines that +use \f[V]=\f[R] instead of \f[V]-\f[R]: +.IP +.nf +\f[C] + +---------------+---------------+ + | Fruit | Price | + +===============+===============+ + | Bananas | $1.34 | + +---------------+---------------+ + | Oranges | $2.10 | + +===============+===============+ + | Sum | $3.44 | + +===============+===============+ +\f[R] +.fi +.PP +The foot must always be placed at the very bottom of the table. +.PP +Grid tables can be created easily using Emacs\[cq] table-mode +(\f[V]M-x table-insert\f[R]). .SS Extension: \f[V]pipe_tables\f[R] .PP Pipe tables look like this: @@ -7299,13 +7371,16 @@ With beamer and reveal.js, the configuration option \f[V]background-image\f[R] can be used either in the YAML metadata block or as a command-line variable to get the same image on every slide. .PP +Note that for reveal.js, the \f[V]background-image\f[R] will be used as +a \f[V]parallaxBackgroundImage\f[R] (see below). +.PP For pptx, you can use a reference doc in which background images have been set on the relevant layouts. .SS \f[V]parallaxBackgroundImage\f[R] (reveal.js) .PP For reveal.js, there is also the reveal.js-native option -\f[V]parallaxBackgroundImage\f[R], which can be used instead of -\f[V]background-image\f[R] to produce a parallax scrolling background. +\f[V]parallaxBackgroundImage\f[R], which produces a parallax scrolling +background. You must also set \f[V]parallaxBackgroundSize\f[R], and can optionally set \f[V]parallaxBackgroundHorizontal\f[R] and \f[V]parallaxBackgroundVertical\f[R] to configure the scrolling @@ -7729,9 +7804,9 @@ Markdown and ipynb notebooks. Note that options and extensions that affect reading and writing of Markdown will also affect Markdown cells in ipynb notebooks. For example, \f[V]--wrap=preserve\f[R] will preserve soft line breaks in -Markdown cells; \f[V]--atx-headers\f[R] will cause ATX-style headings to -be used; and \f[V]--preserve-tabs\f[R] will prevent tabs from being -turned to spaces. +Markdown cells; \f[V]--markdown-headings=setext\f[R] will cause +Setext-style headings to be used; and \f[V]--preserve-tabs\f[R] will +prevent tabs from being turned to spaces. .SH SYNTAX HIGHLIGHTING .PP Pandoc will automatically highlight syntax in fenced code blocks that @@ -7963,7 +8038,8 @@ metadata field (see EPUB Metadata, above). .SH RUNNING PANDOC AS A WEB SERVER .PP If you rename (or symlink) the pandoc executable to -\f[V]pandoc-server\f[R], it will start up a web server with a JSON API. +\f[V]pandoc-server\f[R], or if you call pandoc with \f[V]server\f[R] as +the first argument, it will start up a web server with a JSON API. This server exposes most of the conversion functionality of pandoc. For full documentation, see the pandoc-server man page. .PP @@ -7974,6 +8050,16 @@ the same API as \f[V]pandoc-server\f[R]. \f[V]pandoc-server\f[R] is designed to be maximally secure; it uses Haskell\[cq]s type system to provide strong guarantees that no I/O will be performed on the server during pandoc conversions. +.SH RUNNING PANDOC AS A LUA INTERPRETER +.PP +Calling the pandoc executable under the name \f[V]pandoc-lua\f[R] or +with \f[V]lua\f[R] as the first argument will make it function as a +standalone Lua interpreter. +The behavior is mostly identical to that of the standalone \f[V]lua\f[R] +executable, version 5.4. +However, there is no REPL yet, and the \f[V]-i\f[R] option has no +effect. +For full documentation, see the [pandoc-lua] man page. .SH A NOTE ON SECURITY .IP "1." 3 Although pandoc itself will not create or modify any files other than diff --git a/pandoc-cli/COPYING.md b/pandoc-cli/COPYING.md new file mode 120000 index 000000000000..0c9476f2b40f --- /dev/null +++ b/pandoc-cli/COPYING.md @@ -0,0 +1 @@ +../COPYING.md \ No newline at end of file diff --git a/pandoc-cli/README.md b/pandoc-cli/README.md new file mode 100644 index 000000000000..9589fce457cb --- /dev/null +++ b/pandoc-cli/README.md @@ -0,0 +1,12 @@ +# pandoc-cli + +This package provides the command-line document-conversion program `pandoc`. +There is not much to this package; all of the work is done by +the libraries `pandoc` and `pandoc-server`. + +## License + +© 2006-2022 John MacFarlane (jgm@berkeley.edu). Released under the +[GPL](https://www.gnu.org/licenses/old-licenses/gpl-2.0.html "GNU General Public License"), +version 2 or greater. This software carries no warranty of any kind. +(See COPYRIGHT for full copyright and warranty notices.) diff --git a/pandoc-cli/lua/PandocCLI/Lua.hs b/pandoc-cli/lua/PandocCLI/Lua.hs new file mode 100644 index 000000000000..8c50e66b339c --- /dev/null +++ b/pandoc-cli/lua/PandocCLI/Lua.hs @@ -0,0 +1,35 @@ +{-# LANGUAGE OverloadedStrings #-} +{- | + Module : PandocCLI.Lua + Copyright : © 2022 Albert Krewinkel + License : GPL-2.0-or-later + Maintainer : Albert Krewinkel + +Functions to run the pandoc Lua scripting engine. +-} +module PandocCLI.Lua (runLuaInterpreter, getEngine) where + +import Control.Monad ((<=<)) +import HsLua.CLI (EnvBehavior (..), Settings (..), runStandalone) +import Text.Pandoc.Class (runIOorExplode) +import Text.Pandoc.Error (handleError) +import Text.Pandoc.Lua (runLua, runLuaNoEnv, getEngine) +import Text.Pandoc.Version (pandocVersionText) + +-- | Runs pandoc as a Lua interpreter that is (mostly) compatible with +-- the default @lua@ program shipping with Lua. +runLuaInterpreter :: String -- ^ Program name + -> [String] -- ^ Command line arguments + -> IO () +runLuaInterpreter progName args = do + let settings = Settings + { settingsVersionInfo = "\nEmbedded in pandoc " <> pandocVersionText + , settingsRunner = runner + } + runStandalone settings progName args + where + runner envBehavior = + let runLua' = case envBehavior of + IgnoreEnvVars -> runLuaNoEnv + ConsultEnvVars -> runLua + in handleError <=< runIOorExplode . runLua' diff --git a/pandoc-cli/no-lua/PandocCLI/Lua.hs b/pandoc-cli/no-lua/PandocCLI/Lua.hs new file mode 100644 index 000000000000..7ada019e8b61 --- /dev/null +++ b/pandoc-cli/no-lua/PandocCLI/Lua.hs @@ -0,0 +1,25 @@ +{- | + Module : PandocCLI.Lua + Copyright : © 2022 Albert Krewinkel + License : GPL-2.0-or-later + Maintainer : Albert Krewinkel + +Placeholder values to be used when pandoc is compiled without support +for the Lua scripting engine. +-} +module PandocCLI.Lua (runLuaInterpreter, getEngine) where + +import Control.Monad.IO.Class (MonadIO) +import Text.Pandoc.Error (PandocError (PandocNoScriptingEngine), handleError) +import Text.Pandoc.Scripting (ScriptingEngine, noEngine) + +-- | Raises an error, reporting that the scripting engine is unavailable. +runLuaInterpreter :: String -- ^ Program name + -> [String] -- ^ Command line arguments + -> IO () +runLuaInterpreter _progName _args = do + handleError (Left PandocNoScriptingEngine) + +-- | Placeholder scripting engine. +getEngine :: MonadIO m => m ScriptingEngine +getEngine = pure noEngine diff --git a/pandoc-cli/no-server/PandocCLI/Server.hs b/pandoc-cli/no-server/PandocCLI/Server.hs new file mode 100644 index 000000000000..c2b391cbdbae --- /dev/null +++ b/pandoc-cli/no-server/PandocCLI/Server.hs @@ -0,0 +1,33 @@ +{- | + Module : PandocCLI.Server + Copyright : © 2006-2022 John MacFarlane + License : GPL-2.0-or-later + Maintainer : John MacFarlane + +Placeholder module to be used when pandoc is compiled without server +support. +-} +module PandocCLI.Server + ( runCGI + , runServer + ) +where + +import System.IO (hPutStrLn, stderr) +import System.Exit (exitWith, ExitCode(ExitFailure)) + +-- | Placeholder function for the CGI server; prints an error message +-- and exists with error code. +runCGI :: IO () +runCGI = serverUnsupported + +-- | Placeholder function for the HTTP server; prints an error message +-- and exists with error code. +runServer :: [String] -> IO () +runServer _args = serverUnsupported + +serverUnsupported :: IO () +serverUnsupported = do + hPutStrLn stderr $ "Server mode unsupported.\n" <> + "Pandoc was not compiled with the 'server' flag." + exitWith $ ExitFailure 4 diff --git a/pandoc-cli/pandoc-cli.cabal b/pandoc-cli/pandoc-cli.cabal new file mode 100644 index 000000000000..22a01f939a96 --- /dev/null +++ b/pandoc-cli/pandoc-cli.cabal @@ -0,0 +1,91 @@ +cabal-version: 2.4 +name: pandoc-cli +version: 0.1 +build-type: Simple +license: GPL-2.0-or-later +license-file: COPYING.md +copyright: (c) 2006-2022 John MacFarlane +author: John MacFarlane +maintainer: John MacFarlane +bug-reports: https://github.com/jgm/pandoc/issues +stability: alpha +homepage: https://pandoc.org +category: Text +tested-with: GHC == 8.6.5, GHC == 8.8.4, GHC == 8.10.7, GHC == 9.0.2, + GHC == 9.2.3 +synopsis: Conversion between documentation formats +description: Pandoc-cli provides a command-line executable that uses the pandoc library to convert between markup formats. +-- data-files: +-- extra-source-files: +source-repository head + type: git + location: git://github.com/jgm/pandoc.git + +flag lua + description: Support custom modifications and conversions with the + pandoc Lua scripting engine. + default: True + +flag server + Description: Include support for running pandoc as an HTTP server. + Default: True + +flag nightly + Description: Add '-nightly-COMPILEDATE' to the output of '--version'. + Default: False + +common common-options + default-language: Haskell2010 + other-extensions: OverloadedStrings + build-depends: base >= 4.12 && < 5 + ghc-options: -Wall -fno-warn-unused-do-bind + -Wincomplete-record-updates + -Wnoncanonical-monad-instances + -Wcpp-undef + -Wincomplete-uni-patterns + -Widentities + -Wpartial-fields + -Wmissing-signatures + -fhide-source-paths + + if impl(ghc >= 8.10) + ghc-options: -Wunused-packages + + if impl(ghc >= 9.0) + ghc-options: -Winvalid-haddock + + if os(windows) + cpp-options: -D_WINDOWS + +common common-executable + import: common-options + ghc-options: -rtsopts -with-rtsopts=-A8m -threaded + +executable pandoc + import: common-executable + hs-source-dirs: src + main-is: pandoc.hs + buildable: True + build-depends: pandoc >= 3.0, + text + other-modules: PandocCLI.Lua + , PandocCLI.Server + if flag(nightly) + cpp-options: -DNIGHTLY + build-depends: template-haskell, + time + if flag(server) + build-depends: pandoc-server >= 0.1 && < 0.2, + wai-extra >= 3.0.24, + warp, + safe + hs-source-dirs: server + else + hs-source-dirs: no-server + + if flag(lua) + build-depends: hslua-cli >= 1.2 && < 1.3, + pandoc-lua-engine >= 0.1 && < 0.2 + hs-source-dirs: lua + else + hs-source-dirs: no-lua diff --git a/pandoc-cli/server/PandocCLI/Server.hs b/pandoc-cli/server/PandocCLI/Server.hs new file mode 100644 index 000000000000..55cc9ffbd6ef --- /dev/null +++ b/pandoc-cli/server/PandocCLI/Server.hs @@ -0,0 +1,35 @@ +{-# LANGUAGE OverloadedStrings #-} +{- | + Module : Main + Copyright : © 2006-2022 John MacFarlane + License : GPL-2.0-or-later + Maintainer : John MacFarlane + +Functions for the pandoc server CLI. +-} +module PandocCLI.Server + ( runCGI + , runServer + ) +where +import qualified Network.Wai.Handler.CGI as CGI +import qualified Network.Wai.Handler.Warp as Warp +import Network.Wai.Middleware.Timeout (timeout) +import Safe (readDef) +import System.Environment (lookupEnv) +import Text.Pandoc.Server (ServerOpts(..), parseServerOptsFromArgs, app) +import System.IO (stderr, hPutStrLn) + +-- | Runs the CGI server. +runCGI :: IO () +runCGI = do + cgiTimeout <- maybe 2 (readDef 2) <$> lookupEnv "PANDOC_SERVER_TIMEOUT" + CGI.run (timeout cgiTimeout app) + +-- | Runs the HTTP server. +runServer :: [String] -> IO () +runServer args = do + sopts <- parseServerOptsFromArgs args + hPutStrLn stderr $ + "Starting server on port " <> show (serverPort sopts) <> "..." + Warp.run (serverPort sopts) (timeout (serverTimeout sopts) app) diff --git a/pandoc-cli/src/pandoc.hs b/pandoc-cli/src/pandoc.hs new file mode 100644 index 000000000000..6bf54bda3271 --- /dev/null +++ b/pandoc-cli/src/pandoc.hs @@ -0,0 +1,105 @@ +{-# LANGUAGE CPP #-} +{-# LANGUAGE TemplateHaskell #-} +{- | + Module : Main + Copyright : Copyright (C) 2006-2022 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane + Stability : alpha + Portability : portable + +Parses command-line options and calls the appropriate readers and +writers. +-} +module Main where +import qualified Control.Exception as E +import System.Environment (getArgs, getProgName) +import Text.Pandoc.App ( convertWithOpts, defaultOpts, options + , parseOptionsFromArgs, handleOptInfo ) +import Text.Pandoc.Error (handleError) +import qualified Text.Pandoc.UTF8 as UTF8 +import System.Exit (exitSuccess) +import Data.Monoid (Any(..)) +import Control.Monad (when) +import PandocCLI.Lua +import PandocCLI.Server +import Text.Pandoc.Version (pandocVersion) +import Text.Pandoc.Data (defaultUserDataDir) +import Text.Pandoc.Scripting (ScriptingEngine(..)) +import Data.Version (showVersion) +import qualified Data.Text as T + +#ifdef NIGHTLY +import qualified Language.Haskell.TH as TH +import Data.Time +#endif + +#ifdef NIGHTLY +versionSuffix :: String +versionSuffix = "-nightly-" ++ + $(TH.stringE =<< + TH.runIO (formatTime defaultTimeLocale "%F" <$> Data.Time.getCurrentTime)) +#else +versionSuffix :: String +versionSuffix = "" +#endif + +main :: IO () +main = E.handle (handleError . Left) $ do + prg <- getProgName + rawArgs <- map UTF8.decodeArg <$> getArgs + let hasVersion = getAny $ foldMap + (\s -> Any (s == "-v" || s == "--version")) + (takeWhile (/= "--") rawArgs) + when hasVersion versionInfo + case prg of + "pandoc-server.cgi" -> runCGI + "pandoc-server" -> runServer rawArgs + "pandoc-lua" -> runLuaInterpreter prg rawArgs + _ -> + case rawArgs of + "lua" : args -> runLuaInterpreter "pandoc lua" args + "server": args -> runServer args + args -> do + engine <- getEngine + res <- parseOptionsFromArgs options defaultOpts prg args + case res of + Left e -> handleOptInfo engine e + Right opts -> convertWithOpts engine opts + +copyrightMessage :: String +copyrightMessage = + "Copyright (C) 2006-2022 John MacFarlane. Web: https://pandoc.org\n" + ++ + "This is free software; see the source for copying conditions. There is no\n" + ++ + "warranty, not even for merchantability or fitness for a particular purpose." + +flagSettings :: String +flagSettings = "Features: " ++ +#ifdef VERSION_pandoc_server + "+server" +#else + "-server" +#endif + ++ " " ++ +#ifdef VERSION_hslua_cli + "+lua" +#else + "-lua" +#endif + +versionInfo :: IO () +versionInfo = do + progname <- getProgName + defaultDatadir <- defaultUserDataDir + scriptingEngine <- getEngine + putStr $ unlines + [ progname ++ " " ++ showVersion pandocVersion ++ versionSuffix + , flagSettings + , "Scripting engine: " ++ T.unpack (engineName scriptingEngine) + , "User data directory: " ++ defaultDatadir + , copyrightMessage + ] + exitSuccess diff --git a/pandoc-lua-engine/COPYING.md b/pandoc-lua-engine/COPYING.md new file mode 120000 index 000000000000..0c9476f2b40f --- /dev/null +++ b/pandoc-lua-engine/COPYING.md @@ -0,0 +1 @@ +../COPYING.md \ No newline at end of file diff --git a/pandoc-lua-engine/README.md b/pandoc-lua-engine/README.md new file mode 100644 index 000000000000..81d3a5a1b8b1 --- /dev/null +++ b/pandoc-lua-engine/README.md @@ -0,0 +1,6 @@ +# pandoc-lua-engine + +This package provides a Lua pandoc scripting engine based. It +allows to write filters, custom readers, and custom writers in +Lua. + diff --git a/pandoc-lua-engine/pandoc-lua-engine.cabal b/pandoc-lua-engine/pandoc-lua-engine.cabal new file mode 100644 index 000000000000..9910b9fef1f7 --- /dev/null +++ b/pandoc-lua-engine/pandoc-lua-engine.cabal @@ -0,0 +1,142 @@ +cabal-version: 2.4 +name: pandoc-lua-engine +version: 0.1 +build-type: Simple +license: GPL-2.0-or-later +license-file: COPYING.md +copyright: © 2006-2022 John MacFarlane, 2017-2022 Albert Krewinkel +author: John MacFarlane, Albert Krewinkel +maintainer: Albert Krewinkel +bug-reports: https://github.com/jgm/pandoc/issues +homepage: https://pandoc.org +category: Text +tested-with: GHC == 8.6.5 + , GHC == 8.8.4 + , GHC == 8.10.7 + , GHC == 9.0.2 + , GHC == 9.2.3 +synopsis: Lua engine to power custom pandoc conversions +description: This package provides a pandoc scripting engine based on + Lua. +extra-source-files: README.md + , test/bytestring.bin + , test/bytestring.lua + , test/bytestring-reader.lua + , test/extensions.lua + , test/lua/*.lua + , test/lua/module/*.lua + , test/lua/module/partial.test + , test/lua/module/tiny.epub + , test/sample.lua + , test/tables.custom + , test/tables.native + , test/testsuite.native + , test/writer.custom + , test/writer-template.lua + , test/writer-template.out.txt + +source-repository head + type: git + location: https://github.com/jgm/pandoc.git + subdir: pandoc-lua-engine + +common common-options + default-language: Haskell2010 + build-depends: base >= 4.12 && < 5 + ghc-options: -Wall -fno-warn-unused-do-bind + -Wincomplete-record-updates + -Wnoncanonical-monad-instances + -Wcpp-undef + -Wincomplete-uni-patterns + -Widentities + -Wpartial-fields + -Wmissing-export-lists + -Wmissing-signatures + -fhide-source-paths + + if impl(ghc >= 8.10) + ghc-options: -Wunused-packages + + if impl(ghc >= 9.0) + ghc-options: -Winvalid-haddock + +library + import: common-options + hs-source-dirs: src + exposed-modules: Text.Pandoc.Lua + other-modules: Text.Pandoc.Lua.Filter + , Text.Pandoc.Lua.Global + , Text.Pandoc.Lua.Init + , Text.Pandoc.Lua.Marshal.CommonState + , Text.Pandoc.Lua.Marshal.Context + , Text.Pandoc.Lua.Marshal.Format + , Text.Pandoc.Lua.Marshal.PandocError + , Text.Pandoc.Lua.Marshal.ReaderOptions + , Text.Pandoc.Lua.Marshal.Reference + , Text.Pandoc.Lua.Marshal.Sources + , Text.Pandoc.Lua.Marshal.Template + , Text.Pandoc.Lua.Marshal.WriterOptions + , Text.Pandoc.Lua.Module.Format + , Text.Pandoc.Lua.Module.MediaBag + , Text.Pandoc.Lua.Module.Pandoc + , Text.Pandoc.Lua.Module.Scaffolding + , Text.Pandoc.Lua.Module.System + , Text.Pandoc.Lua.Module.Template + , Text.Pandoc.Lua.Module.Types + , Text.Pandoc.Lua.Module.Utils + , Text.Pandoc.Lua.Orphans + , Text.Pandoc.Lua.PandocLua + , Text.Pandoc.Lua.Reader + , Text.Pandoc.Lua.Writer + , Text.Pandoc.Lua.Writer.Classic + , Text.Pandoc.Lua.Writer.Scaffolding + + build-depends: SHA >= 1.6 && < 1.7 + , bytestring >= 0.9 && < 0.12 + , citeproc >= 0.8 && < 0.9 + , containers >= 0.6.0.1 && < 0.7 + , data-default >= 0.4 && < 0.8 + , doclayout >= 0.4 && < 0.5 + , doctemplates >= 0.10 && < 0.11 + , exceptions >= 0.8 && < 0.11 + , hslua >= 2.2.1 && < 2.3 + , hslua-aeson >= 2.2.1 && < 2.3 + , hslua-core >= 2.2.1 && < 2.3 + , hslua-module-doclayout>= 1.0.4 && < 1.1 + , hslua-module-path >= 1.0.3 && < 1.1 + , hslua-module-system >= 1.0 && < 1.1 + , hslua-module-text >= 1.0 && < 1.1 + , hslua-module-version >= 1.0.3 && < 1.1 + , hslua-module-zip >= 1.0.0 && < 1.1 + , lpeg >= 1.0.1 && < 1.1 + , mtl >= 2.2 && < 2.3 + , pandoc >= 3.0 && < 3.1 + , pandoc-lua-marshal >= 0.1.7 && < 0.2 + , pandoc-types >= 1.22.2 && < 1.23 + , parsec >= 3.1 && < 3.2 + , text >= 1.1.1 && < 2.1 + + +test-suite test-pandoc-lua-engine + import: common-options + type: exitcode-stdio-1.0 + main-is: test-pandoc-lua-engine.hs + hs-source-dirs: test + build-depends: pandoc-lua-engine + , bytestring + , directory + , data-default + , exceptions >= 0.8 && < 0.11 + , filepath + , hslua >= 2.1 && < 2.3 + , pandoc + , pandoc-types >= 1.22.2 && < 1.23 + , tasty + , tasty-golden + , tasty-hunit + , tasty-lua >= 1.0 && < 1.1 + , text >= 1.1.1 && < 2.1 + other-modules: Tests.Lua + , Tests.Lua.Module + , Tests.Lua.Reader + , Tests.Lua.Writer diff --git a/pandoc-lua-engine/src/Text/Pandoc/Lua.hs b/pandoc-lua-engine/src/Text/Pandoc/Lua.hs new file mode 100644 index 000000000000..8ff9a7c64251 --- /dev/null +++ b/pandoc-lua-engine/src/Text/Pandoc/Lua.hs @@ -0,0 +1,51 @@ +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE TypeApplications #-} +{- | + Module : Text.Pandoc.Lua + Copyright : Copyright © 2017-2022 Albert Krewinkel + License : GNU GPL, version 2 or above + + Maintainer : Albert Krewinkel + Stability : alpha + +Running pandoc Lua filters. +-} +module Text.Pandoc.Lua + ( -- * High-level functions + applyFilter + , readCustom + , writeCustom + -- * Low-level functions + , Global(..) + , setGlobals + , runLua + , runLuaNoEnv + -- * Engine + , getEngine + ) where + +import Control.Monad.IO.Class (MonadIO (liftIO)) +import HsLua.Core (getglobal, openlibs, run, top, tostring) +import Text.Pandoc.Error (PandocError) +import Text.Pandoc.Lua.Filter (applyFilter) +import Text.Pandoc.Lua.Global (Global (..), setGlobals) +import Text.Pandoc.Lua.Init (runLua, runLuaNoEnv) +import Text.Pandoc.Lua.Reader (readCustom) +import Text.Pandoc.Lua.Writer (writeCustom) +import Text.Pandoc.Lua.Orphans () +import Text.Pandoc.Scripting (ScriptingEngine (..)) +import qualified Text.Pandoc.UTF8 as UTF8 + +-- | Constructs the Lua scripting engine. +getEngine :: MonadIO m => m ScriptingEngine +getEngine = do + versionName <- liftIO . run @PandocError $ do + openlibs + getglobal "_VERSION" + tostring top + pure $ ScriptingEngine + { engineName = maybe "Lua (unknown version)" UTF8.toText versionName + , engineApplyFilter = applyFilter + , engineReadCustom = readCustom + , engineWriteCustom = writeCustom + } diff --git a/pandoc-lua-engine/src/Text/Pandoc/Lua/Filter.hs b/pandoc-lua-engine/src/Text/Pandoc/Lua/Filter.hs new file mode 100644 index 000000000000..6e7dc0fbafd8 --- /dev/null +++ b/pandoc-lua-engine/src/Text/Pandoc/Lua/Filter.hs @@ -0,0 +1,81 @@ +{-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE IncoherentInstances #-} +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE ScopedTypeVariables #-} +{- | +Module : Text.Pandoc.Lua.Filter +Copyright : © 2012-2022 John MacFarlane, + © 2017-2022 Albert Krewinkel +License : GNU GPL, version 2 or above +Maintainer : Albert Krewinkel +Stability : alpha + +Types and functions for running Lua filters. +-} +module Text.Pandoc.Lua.Filter + ( applyFilter + ) where +import Control.Monad ((>=>), (<$!>)) +import HsLua as Lua +import Text.Pandoc.Definition +import Text.Pandoc.Filter (Environment (..)) +import Text.Pandoc.Lua.Marshal.AST +import Text.Pandoc.Lua.Marshal.Filter +import Text.Pandoc.Lua.Global (Global (..), setGlobals) +import Text.Pandoc.Lua.Init (runLua) +import Text.Pandoc.Lua.PandocLua () +import Control.Exception (throw) +import qualified Data.Text as T +import Text.Pandoc.Class (PandocMonad) +import Control.Monad.Trans (MonadIO) +import Text.Pandoc.Error (PandocError (PandocFilterError, PandocLuaError)) + +-- | Transform document using the filter defined in the given file. +runFilterFile :: FilePath -> Pandoc -> LuaE PandocError Pandoc +runFilterFile filterPath doc = do + oldtop <- gettop + stat <- dofileTrace filterPath + if stat /= Lua.OK + then throwErrorAsException + else do + newtop <- gettop + -- Use the returned filters, or the implicitly defined global + -- filter if nothing was returned. + luaFilters <- forcePeek $ + if newtop - oldtop >= 1 + then peekList peekFilter top + else (:[]) <$!> (liftLua pushglobaltable *> peekFilter top) + settop oldtop + runAll luaFilters doc + +runAll :: [Filter] -> Pandoc -> LuaE PandocError Pandoc +runAll = foldr ((>=>) . applyFully) return + +-- | Run the Lua filter in @filterPath@ for a transformation to the +-- target format (first element in args). Pandoc uses Lua init files to +-- setup the Lua interpreter. +applyFilter :: (PandocMonad m, MonadIO m) + => Environment + -> [String] + -> FilePath + -> Pandoc + -> m Pandoc +applyFilter fenv args fp doc = do + let globals = [ FORMAT $ case args of + x:_ -> T.pack x + _ -> "" + , PANDOC_READER_OPTIONS (envReaderOptions fenv) + , PANDOC_WRITER_OPTIONS (envWriterOptions fenv) + , PANDOC_SCRIPT_FILE fp + ] + runLua >=> forceResult fp $ do + setGlobals globals + runFilterFile fp doc + +forceResult :: (PandocMonad m, MonadIO m) + => FilePath -> Either PandocError Pandoc -> m Pandoc +forceResult fp eitherResult = case eitherResult of + Right x -> return x + Left err -> throw . PandocFilterError (T.pack fp) $ case err of + PandocLuaError msg -> msg + _ -> T.pack $ show err diff --git a/src/Text/Pandoc/Lua/Global.hs b/pandoc-lua-engine/src/Text/Pandoc/Lua/Global.hs similarity index 93% rename from src/Text/Pandoc/Lua/Global.hs rename to pandoc-lua-engine/src/Text/Pandoc/Lua/Global.hs index 938cc84b7b82..304ad8695b6d 100644 --- a/src/Text/Pandoc/Lua/Global.hs +++ b/pandoc-lua-engine/src/Text/Pandoc/Lua/Global.hs @@ -16,16 +16,16 @@ module Text.Pandoc.Lua.Global import HsLua as Lua import HsLua.Module.Version (pushVersion) -import Paths_pandoc (version) -import Text.Pandoc.Class.CommonState (CommonState) +import Text.Pandoc.Class (CommonState) import Text.Pandoc.Definition (Pandoc, pandocTypesVersion) import Text.Pandoc.Error (PandocError) import Text.Pandoc.Lua.Marshal.CommonState (pushCommonState) import Text.Pandoc.Lua.Marshal.Pandoc (pushPandoc) import Text.Pandoc.Lua.Marshal.ReaderOptions (pushReaderOptionsReadonly) import Text.Pandoc.Lua.Marshal.WriterOptions (pushWriterOptions) -import Text.Pandoc.Lua.Orphans () +import Text.Pandoc.Lua.PandocLua () import Text.Pandoc.Options (ReaderOptions, WriterOptions) +import Text.Pandoc.Version (pandocVersion) import qualified Data.Text as Text @@ -70,5 +70,5 @@ setGlobal global = case global of pushCommonState commonState Lua.setglobal "PANDOC_STATE" PANDOC_VERSION -> do - pushVersion version + pushVersion pandocVersion Lua.setglobal "PANDOC_VERSION" diff --git a/src/Text/Pandoc/Lua/Init.hs b/pandoc-lua-engine/src/Text/Pandoc/Lua/Init.hs similarity index 70% rename from src/Text/Pandoc/Lua/Init.hs rename to pandoc-lua-engine/src/Text/Pandoc/Lua/Init.hs index 966f0a581881..a05d68355442 100644 --- a/src/Text/Pandoc/Lua/Init.hs +++ b/pandoc-lua-engine/src/Text/Pandoc/Lua/Init.hs @@ -1,5 +1,6 @@ {-# LANGUAGE LambdaCase #-} {-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE RankNTypes #-} {- | Module : Text.Pandoc.Lua Copyright : Copyright © 2017-2022 Albert Krewinkel @@ -12,6 +13,8 @@ Functions to initialize the Lua interpreter. -} module Text.Pandoc.Lua.Init ( runLua + , runLuaNoEnv + , runLuaWith ) where import Control.Monad (forM, forM_, when) @@ -19,10 +22,13 @@ import Control.Monad.Catch (throwM, try) import Control.Monad.Trans (MonadIO (..)) import Data.Maybe (catMaybes) import HsLua as Lua hiding (status, try) -import Text.Pandoc.Class.PandocMonad (PandocMonad, readDataFile) +import HsLua.Core.Run as Lua +import Text.Pandoc.Class (PandocMonad (..)) +import Text.Pandoc.Data (readDataFile) import Text.Pandoc.Error (PandocError (PandocLuaError)) +import Text.Pandoc.Lua.Global (Global (..), setGlobals) import Text.Pandoc.Lua.Marshal.List (newListMetatable, pushListModule) -import Text.Pandoc.Lua.PandocLua (PandocLua, liftPandocLua, runPandocLua) +import Text.Pandoc.Lua.PandocLua (PandocLua (..), liftPandocLua) import qualified Data.ByteString.Char8 as Char8 import qualified Data.Text as T import qualified Lua.LPeg as LPeg @@ -30,19 +36,42 @@ import qualified HsLua.Aeson import qualified HsLua.Module.DocLayout as Module.Layout import qualified HsLua.Module.Path as Module.Path import qualified HsLua.Module.Text as Module.Text -import qualified Text.Pandoc.Lua.Module.Pandoc as Module.Pandoc +import qualified HsLua.Module.Zip as Module.Zip +import qualified Text.Pandoc.Lua.Module.Format as Pandoc.Format import qualified Text.Pandoc.Lua.Module.MediaBag as Pandoc.MediaBag +import qualified Text.Pandoc.Lua.Module.Pandoc as Module.Pandoc +import qualified Text.Pandoc.Lua.Module.Scaffolding as Pandoc.Scaffolding import qualified Text.Pandoc.Lua.Module.System as Pandoc.System import qualified Text.Pandoc.Lua.Module.Template as Pandoc.Template import qualified Text.Pandoc.Lua.Module.Types as Pandoc.Types import qualified Text.Pandoc.Lua.Module.Utils as Pandoc.Utils --- | Run the lua interpreter, using pandoc's default way of environment +-- | Run the Lua interpreter, using pandoc's default way of environment -- initialization. runLua :: (PandocMonad m, MonadIO m) => LuaE PandocError a -> m (Either PandocError a) -runLua action = - runPandocLua . try $ do +runLua action = do + runPandocLuaWith Lua.run . try $ do + initLuaState + liftPandocLua action + +runLuaWith :: (PandocMonad m, MonadIO m) + => GCManagedState -> LuaE PandocError a -> m (Either PandocError a) +runLuaWith luaState action = do + runPandocLuaWith (withGCManagedState luaState) . try $ do + initLuaState + liftPandocLua action + +-- | Like 'runLua', but ignores all environment variables like @LUA_PATH@. +runLuaNoEnv :: (PandocMonad m, MonadIO m) + => LuaE PandocError a -> m (Either PandocError a) +runLuaNoEnv action = do + runPandocLuaWith Lua.run . try $ do + liftPandocLua $ do + -- This is undocumented, but works -- the code is adapted from the + -- `lua.c` sources for the default interpreter. + Lua.pushboolean True + Lua.setfield Lua.registryindex "LUA_NOENV" initLuaState liftPandocLua action @@ -53,7 +82,9 @@ runLua action = -- it must be handled separately. loadedModules :: [Module PandocError] loadedModules = - [ Pandoc.MediaBag.documentedModule + [ Pandoc.Format.documentedModule + , Pandoc.MediaBag.documentedModule + , Pandoc.Scaffolding.documentedModule , Pandoc.System.documentedModule , Pandoc.Template.documentedModule , Pandoc.Types.documentedModule @@ -61,6 +92,7 @@ loadedModules = , Module.Layout.documentedModule { moduleName = "pandoc.layout" } , Module.Path.documentedModule { moduleName = "pandoc.path" } , Module.Text.documentedModule + , Module.Zip.documentedModule { moduleName = "pandoc.zip" } ] -- | Initialize the lua state with all required values @@ -151,3 +183,31 @@ initLuaState = do initJsonMetatable :: PandocLua () initJsonMetatable = liftPandocLua $ do newListMetatable HsLua.Aeson.jsonarray (pure ()) + +-- | Evaluate a @'PandocLua'@ computation, running all contained Lua +-- operations. +runPandocLuaWith :: (PandocMonad m, MonadIO m) + => (forall b. LuaE PandocError b -> IO b) + -> PandocLua a + -> m a +runPandocLuaWith runner pLua = do + origState <- getCommonState + globals <- defaultGlobals + (result, newState) <- liftIO . runner . unPandocLua $ do + putCommonState origState + liftPandocLua $ setGlobals globals + r <- pLua + c <- getCommonState + return (r, c) + putCommonState newState + return result + +-- | Global variables which should always be set. +defaultGlobals :: PandocMonad m => m [Global] +defaultGlobals = do + commonState <- getCommonState + return + [ PANDOC_API_VERSION + , PANDOC_STATE commonState + , PANDOC_VERSION + ] diff --git a/src/Text/Pandoc/Lua/Marshal/CommonState.hs b/pandoc-lua-engine/src/Text/Pandoc/Lua/Marshal/CommonState.hs similarity index 100% rename from src/Text/Pandoc/Lua/Marshal/CommonState.hs rename to pandoc-lua-engine/src/Text/Pandoc/Lua/Marshal/CommonState.hs diff --git a/pandoc-lua-engine/src/Text/Pandoc/Lua/Marshal/Context.hs b/pandoc-lua-engine/src/Text/Pandoc/Lua/Marshal/Context.hs new file mode 100644 index 000000000000..26dffec219a7 --- /dev/null +++ b/pandoc-lua-engine/src/Text/Pandoc/Lua/Marshal/Context.hs @@ -0,0 +1,73 @@ +{-# LANGUAGE FlexibleInstances #-} +{-# LANGUAGE LambdaCase #-} +{-# LANGUAGE OverloadedStrings #-} +{-# OPTIONS_GHC -fno-warn-orphans #-} +{- | + Module : Text.Pandoc.Lua.Marshaling.Context + Copyright : © 2012-2022 John MacFarlane + © 2017-2022 Albert Krewinkel + License : GNU GPL, version 2 or above + + Maintainer : Albert Krewinkel + Stability : alpha + +Marshaling instance for doctemplates Context and its components. +-} +module Text.Pandoc.Lua.Marshal.Context + ( peekContext + , pushContext + ) where + +import Control.Monad (when, (<$!>)) +import Data.Text (Text) +import HsLua as Lua +import HsLua.Module.DocLayout (peekDoc, pushDoc) +import Text.DocTemplates (Context(..), Val(..)) + +instance Pushable (Context Text) where + push = pushContext + +instance Pushable (Val Text) where + push = pushVal + +-- | Retrieves a template context from the Lua stack. +peekContext :: LuaError e => Peeker e (Context Text) +peekContext idx = Context <$!> peekMap peekText peekVal idx + +-- | Pushes a template context to the Lua stack. +pushContext :: LuaError e => Pusher e (Context Text) +pushContext ctx = do + pushMap pushText pushVal $ unContext ctx + created <- Lua.newmetatable "pandoc Context" + when created $ do + pushName "__concat" + pushHaskellFunction $ do + c1 <- forcePeek $ peekContext (nthBottom 1) + c2 <- forcePeek $ peekContext (nthBottom 2) + pushContext (c1 <> c2) + return 1 + rawset (nth 3) + setmetatable (nth 2) + +pushVal :: LuaError e => Pusher e (Val Text) +pushVal = \case + NullVal -> Lua.pushnil + BoolVal b -> Lua.pushBool b + MapVal ctx -> pushContext ctx + ListVal xs -> pushList pushVal xs + SimpleVal d -> pushDoc d + +peekVal :: LuaError e => Peeker e (Val Text) +peekVal idx = liftLua (ltype idx) >>= \case + TypeNil -> pure NullVal + TypeBoolean -> BoolVal <$!> peekBool idx + TypeNumber -> SimpleVal <$!> peekDoc idx + TypeString -> SimpleVal <$!> peekDoc idx + TypeTable -> do + len <- liftLua $ Lua.rawlen idx + if len <= 0 + then MapVal <$!> peekContext idx + else ListVal <$!> peekList peekVal idx + TypeUserdata -> SimpleVal <$!> peekDoc idx + _ -> failPeek =<< + typeMismatchMessage "Doc, string, boolean, table, or nil" idx diff --git a/pandoc-lua-engine/src/Text/Pandoc/Lua/Marshal/Format.hs b/pandoc-lua-engine/src/Text/Pandoc/Lua/Marshal/Format.hs new file mode 100644 index 000000000000..39b1b98a000c --- /dev/null +++ b/pandoc-lua-engine/src/Text/Pandoc/Lua/Marshal/Format.hs @@ -0,0 +1,51 @@ +{-# OPTIONS_GHC -fno-warn-orphans #-} +{- | + Module : Text.Pandoc.Lua.Marshaling.Format + Copyright : © 2022 Albert Krewinkel + License : GPL-2.0-or-later + Maintainer : Albert Krewinkel + +Marshaling functions and instance for format related types, including +'Extensions' and 'ExtensionConfig'. +-} +module Text.Pandoc.Lua.Marshal.Format + ( peekExtensions + , pushExtensions + , peekExtensionsConfig + ) where + +import HsLua +import Text.Pandoc.Extensions (Extension, Extensions, extensionsFromList, readExtension) +import Text.Pandoc.Format (ExtensionsConfig (..)) + +-- | Retrieves an 'Extensions' set from the Lua stack. +peekExtension :: LuaError e => Peeker e Extension +peekExtension idx = do + extString <- peekString idx + return $ readExtension extString +{-# INLINE peekExtension #-} + +-- | Retrieves an 'Extensions' set from the Lua stack. +peekExtensions :: LuaError e => Peeker e Extensions +peekExtensions = peekViaJSON +{-# INLINE peekExtensions #-} + +-- | Pushes a set of 'Extensions' to the top of the Lua stack. +pushExtensions :: LuaError e => Pusher e Extensions +pushExtensions = pushViaJSON +{-# INLINE pushExtensions #-} + +instance Peekable Extensions where + safepeek = peekExtensions + +instance Pushable Extensions where + push = pushExtensions + +-- | Retrieves an 'ExtensionsConfig' value from the Lua stack. +peekExtensionsConfig :: LuaError e => Peeker e ExtensionsConfig +peekExtensionsConfig idx = do + exts <- peekKeyValuePairs peekExtension peekBool idx + return $ ExtensionsConfig + { extsDefault = extensionsFromList . map fst $ filter snd exts + , extsSupported = extensionsFromList . map fst $ exts + } diff --git a/src/Text/Pandoc/Lua/Marshal/PandocError.hs b/pandoc-lua-engine/src/Text/Pandoc/Lua/Marshal/PandocError.hs similarity index 75% rename from src/Text/Pandoc/Lua/Marshal/PandocError.hs rename to pandoc-lua-engine/src/Text/Pandoc/Lua/Marshal/PandocError.hs index 7f83f2fc04e7..fe4227c5c57c 100644 --- a/src/Text/Pandoc/Lua/Marshal/PandocError.hs +++ b/pandoc-lua-engine/src/Text/Pandoc/Lua/Marshal/PandocError.hs @@ -1,7 +1,6 @@ {-# LANGUAGE LambdaCase #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE ScopedTypeVariables #-} -{-# LANGUAGE TypeApplications #-} {- | Module : Text.Pandoc.Lua.Marshal.PandocError Copyright : © 2020-2022 Albert Krewinkel @@ -19,9 +18,9 @@ module Text.Pandoc.Lua.Marshal.PandocError ) where -import HsLua (LuaError, Peeker, Pusher, liftLua, pushString) +import HsLua (LuaError, Peeker, Pusher, liftLua, pushText) import HsLua.Packaging -import Text.Pandoc.Error (PandocError (PandocLuaError)) +import Text.Pandoc.Error (PandocError (PandocLuaError), renderError) import qualified HsLua as Lua import qualified Text.Pandoc.UTF8 as UTF8 @@ -30,9 +29,11 @@ import qualified Text.Pandoc.UTF8 as UTF8 typePandocError :: LuaError e => DocumentedType e PandocError typePandocError = deftype "PandocError" [ operation Tostring $ defun "__tostring" - ### liftPure (show @PandocError) + ### liftPure (\case + PandocLuaError e -> e + err -> renderError err) <#> udparam typePandocError "obj" "PandocError object" - =#> functionResult pushString "string" "string representation of error." + =#> functionResult pushText "string" "string representation of error." ] mempty -- no members @@ -46,5 +47,7 @@ peekPandocError idx = Lua.retrieving "PandocError" $ liftLua (Lua.ltype idx) >>= \case Lua.TypeUserdata -> peekUD typePandocError idx _ -> do - msg <- liftLua $ Lua.state >>= \l -> Lua.liftIO (Lua.popErrorMessage l) + msg <- liftLua $ do + Lua.pushvalue idx + Lua.state >>= \l -> Lua.liftIO (Lua.popErrorMessage l) return $ PandocLuaError (UTF8.toText msg) diff --git a/src/Text/Pandoc/Lua/Marshal/ReaderOptions.hs b/pandoc-lua-engine/src/Text/Pandoc/Lua/Marshal/ReaderOptions.hs similarity index 96% rename from src/Text/Pandoc/Lua/Marshal/ReaderOptions.hs rename to pandoc-lua-engine/src/Text/Pandoc/Lua/Marshal/ReaderOptions.hs index bec7d81bf011..8a02a6d7e247 100644 --- a/src/Text/Pandoc/Lua/Marshal/ReaderOptions.hs +++ b/pandoc-lua-engine/src/Text/Pandoc/Lua/Marshal/ReaderOptions.hs @@ -22,9 +22,7 @@ module Text.Pandoc.Lua.Marshal.ReaderOptions import Data.Default (def) import HsLua as Lua -#if !MIN_VERSION_hslua(2,2,0) -import HsLua.Aeson (peekViaJSON, pushViaJSON) -#endif +import Text.Pandoc.Lua.Marshal.Format (peekExtensions, pushExtensions) import Text.Pandoc.Lua.Marshal.List (pushPandocList) import Text.Pandoc.Options (ReaderOptions (..)) @@ -91,8 +89,8 @@ readerOptionsMembers = (pushText, readerDefaultImageExtension) (peekText, \opts x -> opts{ readerDefaultImageExtension = x }) , property "extensions" "" - (pushViaJSON, readerExtensions) - (peekViaJSON, \opts x -> opts{ readerExtensions = x }) + (pushExtensions, readerExtensions) + (peekExtensions, \opts x -> opts{ readerExtensions = x }) , property "indented_code_classes" "" (pushPandocList pushText, readerIndentedCodeClasses) (peekList peekText, \opts x -> opts{ readerIndentedCodeClasses = x }) diff --git a/src/Text/Pandoc/Lua/Marshal/Reference.hs b/pandoc-lua-engine/src/Text/Pandoc/Lua/Marshal/Reference.hs similarity index 100% rename from src/Text/Pandoc/Lua/Marshal/Reference.hs rename to pandoc-lua-engine/src/Text/Pandoc/Lua/Marshal/Reference.hs diff --git a/src/Text/Pandoc/Lua/Marshal/Sources.hs b/pandoc-lua-engine/src/Text/Pandoc/Lua/Marshal/Sources.hs similarity index 100% rename from src/Text/Pandoc/Lua/Marshal/Sources.hs rename to pandoc-lua-engine/src/Text/Pandoc/Lua/Marshal/Sources.hs diff --git a/src/Text/Pandoc/Lua/Marshal/Template.hs b/pandoc-lua-engine/src/Text/Pandoc/Lua/Marshal/Template.hs similarity index 55% rename from src/Text/Pandoc/Lua/Marshal/Template.hs rename to pandoc-lua-engine/src/Text/Pandoc/Lua/Marshal/Template.hs index 56878b109050..71134c03f69e 100644 --- a/src/Text/Pandoc/Lua/Marshal/Template.hs +++ b/pandoc-lua-engine/src/Text/Pandoc/Lua/Marshal/Template.hs @@ -1,3 +1,4 @@ +{-# LANGUAGE LambdaCase #-} {-# LANGUAGE OverloadedStrings #-} {-# OPTIONS_GHC -fno-warn-orphans #-} {- | @@ -16,15 +17,26 @@ module Text.Pandoc.Lua.Marshal.Template import Data.Text (Text) import HsLua as Lua -import Text.DocTemplates (Template) +import HsLua.Core.Utf8 as Lua +import Text.Pandoc.Error (PandocError) +import Text.Pandoc.Lua.PandocLua (unPandocLua) +import Text.Pandoc.Templates (Template, compileTemplate, runWithDefaultPartials) -- | Pushes a 'Template' as a an opaque userdata value. pushTemplate :: LuaError e => Pusher e (Template Text) pushTemplate = pushUD typeTemplate -- | Retrieves a 'Template' 'Text' value from the stack. -peekTemplate :: LuaError e => Peeker e (Template Text) -peekTemplate = peekUD typeTemplate +peekTemplate :: Peeker PandocError (Template Text) +peekTemplate idx = liftLua (ltype idx) >>= \case + TypeString -> do + let path = "templates/default.custom" + let liftPM = liftLua . unPandocLua + tmpl <- peekText idx + liftPM (runWithDefaultPartials (compileTemplate path tmpl)) >>= \case + Left e -> failPeek (Lua.fromString e) + Right t -> pure t + _ -> peekUD typeTemplate idx -- | Template object type. typeTemplate :: LuaError e => DocumentedType e (Template Text) diff --git a/src/Text/Pandoc/Lua/Marshal/WriterOptions.hs b/pandoc-lua-engine/src/Text/Pandoc/Lua/Marshal/WriterOptions.hs similarity index 93% rename from src/Text/Pandoc/Lua/Marshal/WriterOptions.hs rename to pandoc-lua-engine/src/Text/Pandoc/Lua/Marshal/WriterOptions.hs index 86df682c5d3b..1b0a9ea1ceae 100644 --- a/src/Text/Pandoc/Lua/Marshal/WriterOptions.hs +++ b/pandoc-lua-engine/src/Text/Pandoc/Lua/Marshal/WriterOptions.hs @@ -21,9 +21,9 @@ module Text.Pandoc.Lua.Marshal.WriterOptions import Control.Applicative (optional) import Data.Default (def) import HsLua as Lua -#if !MIN_VERSION_hslua(2,2,0) -import HsLua.Aeson (peekViaJSON, pushViaJSON) -#endif +import Text.Pandoc.Error (PandocError) +import Text.Pandoc.Lua.Marshal.Context (peekContext, pushContext) +import Text.Pandoc.Lua.Marshal.Format (peekExtensions, pushExtensions) import Text.Pandoc.Lua.Marshal.List (pushPandocList) import Text.Pandoc.Lua.Marshal.Template (peekTemplate, pushTemplate) import Text.Pandoc.Options (WriterOptions (..)) @@ -35,7 +35,7 @@ import Text.Pandoc.Options (WriterOptions (..)) -- | Retrieve a WriterOptions value, either from a normal WriterOptions -- value, from a read-only object, or from a table with the same -- keys as a WriterOptions object. -peekWriterOptions :: LuaError e => Peeker e WriterOptions +peekWriterOptions :: Peeker PandocError WriterOptions peekWriterOptions = retrieving "WriterOptions" . \idx -> liftLua (ltype idx) >>= \case TypeUserdata -> peekUD typeWriterOptions idx @@ -44,11 +44,11 @@ peekWriterOptions = retrieving "WriterOptions" . \idx -> typeMismatchMessage "WriterOptions userdata or table" idx -- | Pushes a WriterOptions value as userdata object. -pushWriterOptions :: LuaError e => Pusher e WriterOptions +pushWriterOptions :: Pusher PandocError WriterOptions pushWriterOptions = pushUD typeWriterOptions -- | 'WriterOptions' object type. -typeWriterOptions :: LuaError e => DocumentedType e WriterOptions +typeWriterOptions :: DocumentedType PandocError WriterOptions typeWriterOptions = deftype "WriterOptions" [ operation Tostring $ lambda ### liftPure show @@ -97,8 +97,8 @@ typeWriterOptions = deftype "WriterOptions" , property "extensions" "Markdown extensions that can be used" - (pushViaJSON, writerExtensions) - (peekViaJSON, \opts x -> opts{ writerExtensions = x }) + (pushExtensions, writerExtensions) + (peekExtensions, \opts x -> opts{ writerExtensions = x }) , property "highlight_style" "Style to use for highlighting (nil = no highlighting)" @@ -208,8 +208,8 @@ typeWriterOptions = deftype "WriterOptions" , property "variables" "Variables to set in template" - (pushViaJSON, writerVariables) - (peekViaJSON, \opts x -> opts{ writerVariables = x }) + (pushContext, writerVariables) + (peekContext, \opts x -> opts{ writerVariables = x }) , property "wrap_text" "Option for wrapping text" @@ -224,7 +224,7 @@ typeWriterOptions = deftype "WriterOptions" -- key/value pair of the table in the userdata value, then retrieves the -- object again. This will update all fields and complain about unknown -- keys. -peekWriterOptionsTable :: LuaError e => Peeker e WriterOptions +peekWriterOptionsTable :: Peeker PandocError WriterOptions peekWriterOptionsTable idx = retrieving "WriterOptions (table)" $ do liftLua $ do absidx <- absindex idx @@ -239,6 +239,3 @@ peekWriterOptionsTable idx = retrieving "WriterOptions (table)" $ do pushnil -- first key setFields peekUD typeWriterOptions top `lastly` pop 1 - -instance Pushable WriterOptions where - push = pushWriterOptions diff --git a/pandoc-lua-engine/src/Text/Pandoc/Lua/Module/Format.hs b/pandoc-lua-engine/src/Text/Pandoc/Lua/Module/Format.hs new file mode 100644 index 000000000000..d9d413e09a2c --- /dev/null +++ b/pandoc-lua-engine/src/Text/Pandoc/Lua/Module/Format.hs @@ -0,0 +1,59 @@ +{-# LANGUAGE OverloadedStrings #-} +{- | + Module : Text.Pandoc.Lua.Module.Format + Copyright : © 2022 Albert Krewinkel + License : GPL-2.0-or-later + Maintainer : Albert Krewinkel + +Lua module to handle pandoc templates. +-} +module Text.Pandoc.Lua.Module.Format + ( documentedModule + ) where + +import HsLua +import Text.Pandoc.Error (PandocError) +import Text.Pandoc.Extensions (getAllExtensions, getDefaultExtensions) +import Text.Pandoc.Lua.Marshal.Format (pushExtensions) +import Text.Pandoc.Lua.PandocLua () + +import qualified Data.Text as T + +-- | The "pandoc.format" module. +documentedModule :: Module PandocError +documentedModule = Module + { moduleName = "pandoc.format" + , moduleDescription = T.unlines + [ "Pandoc formats and their extensions." + ] + , moduleFields = [] + , moduleOperations = [] + , moduleFunctions = functions + } + +-- | Extension module functions. +functions :: [DocumentedFunction PandocError] +functions = + [ defun "default_extensions" + ### liftPure getDefaultExtensions + <#> parameter peekText "string" "format" "format name" + =#> functionResult pushExtensions "FormatExtensions" + "default extensions enabled for `format`" + #? T.unlines + [ "Returns the list of default extensions of the given format; this" + , "function does not check if the format is supported, it will return" + , "a fallback list of extensions even for unknown formats." + ] + + , defun "all_extensions" + ### liftPure getAllExtensions + <#> parameter peekText "string" "format" "format name" + =#> functionResult pushExtensions "FormatExtensions" + "all extensions supported for `format`" + #? T.unlines + [ "Returns the list of all valid extensions for a format." + , "No distinction is made between input and output, and an" + , "extension have an effect when reading a format but not when" + , "writing it, or *vice versa*." + ] + ] diff --git a/src/Text/Pandoc/Lua/Module/MediaBag.hs b/pandoc-lua-engine/src/Text/Pandoc/Lua/Module/MediaBag.hs similarity index 96% rename from src/Text/Pandoc/Lua/Module/MediaBag.hs rename to pandoc-lua-engine/src/Text/Pandoc/Lua/Module/MediaBag.hs index 72e7ff00dbb5..ca028f4441bd 100644 --- a/src/Text/Pandoc/Lua/Module/MediaBag.hs +++ b/pandoc-lua-engine/src/Text/Pandoc/Lua/Module/MediaBag.hs @@ -17,9 +17,8 @@ import Data.Maybe (fromMaybe) import HsLua ( LuaE, DocumentedFunction, Module (..) , (<#>), (###), (=#>), (=?>), (#?), defun, functionResult , opt, parameter, stringParam, textParam) -import Text.Pandoc.Class.CommonState (CommonState (..)) -import Text.Pandoc.Class.PandocMonad (fetchItem, fillMediaBag, getMediaBag, - modifyCommonState, setMediaBag) +import Text.Pandoc.Class ( CommonState (..), fetchItem, fillMediaBag + , getMediaBag, modifyCommonState, setMediaBag) import Text.Pandoc.Error (PandocError) import Text.Pandoc.Lua.Marshal.Pandoc (peekPandoc, pushPandoc) import Text.Pandoc.Lua.Marshal.List (pushPandocList) diff --git a/src/Text/Pandoc/Lua/Module/Pandoc.hs b/pandoc-lua-engine/src/Text/Pandoc/Lua/Module/Pandoc.hs similarity index 83% rename from src/Text/Pandoc/Lua/Module/Pandoc.hs rename to pandoc-lua-engine/src/Text/Pandoc/Lua/Module/Pandoc.hs index e708f43458b1..aaca86b0235a 100644 --- a/src/Text/Pandoc/Lua/Module/Pandoc.hs +++ b/pandoc-lua-engine/src/Text/Pandoc/Lua/Module/Pandoc.hs @@ -14,8 +14,7 @@ Pandoc module for lua. -} module Text.Pandoc.Lua.Module.Pandoc - ( pushModule - , documentedModule + ( documentedModule ) where import Prelude hiding (read) @@ -26,10 +25,11 @@ import Data.Data (Data, dataTypeConstrs, dataTypeOf, showConstr) import Data.Default (Default (..)) import Data.Maybe (fromMaybe) import Data.Proxy (Proxy (Proxy)) -import HsLua hiding (pushModule) +import HsLua import System.Exit (ExitCode (..)) import Text.Pandoc.Definition import Text.Pandoc.Error (PandocError (..)) +import Text.Pandoc.Format (parseFlavoredFormat) import Text.Pandoc.Lua.Orphans () import Text.Pandoc.Lua.Marshal.AST import Text.Pandoc.Lua.Marshal.Filter (peekFilter) @@ -39,7 +39,8 @@ import Text.Pandoc.Lua.Marshal.Sources (peekSources) import Text.Pandoc.Lua.Marshal.WriterOptions ( peekWriterOptions , pushWriterOptions) import Text.Pandoc.Lua.Module.Utils (sha1) -import Text.Pandoc.Lua.PandocLua (PandocLua (unPandocLua), liftPandocLua) +import Text.Pandoc.Lua.PandocLua (PandocLua (unPandocLua)) +import Text.Pandoc.Lua.Writer.Classic (runCustom) import Text.Pandoc.Options ( ReaderOptions (readerExtensions) , WriterOptions (writerExtensions) ) import Text.Pandoc.Process (pipeProcess) @@ -54,13 +55,6 @@ import qualified Data.Set as Set import qualified Data.Text as T import qualified Text.Pandoc.UTF8 as UTF8 --- | Push the "pandoc" package to the Lua stack. Requires the `List` --- module to be loadable. -pushModule :: PandocLua NumResults -pushModule = do - liftPandocLua $ Lua.pushModule documentedModule - return 1 - documentedModule :: Module PandocError documentedModule = Module { moduleName = "pandoc" @@ -137,7 +131,7 @@ pushWithConstructorsSubtable constructors = do rawset (nth 3) pop 1 -- pop constructor table -otherConstructors :: LuaError e => [DocumentedFunction e] +otherConstructors :: [DocumentedFunction PandocError] otherConstructors = [ mkPandoc , mkMeta @@ -205,32 +199,21 @@ functions = =?> "output string, or error triple" , defun "read" - ### (\content mformatspec mreaderOptions -> do - let formatSpec = fromMaybe "markdown" mformatspec - readerOpts = fromMaybe def mreaderOptions - readAction = getReader formatSpec >>= \case - (TextReader r, es) -> - r readerOpts{readerExtensions = es} - (case content of - Left bs -> toSources $ UTF8.toText bs - Right sources -> sources) - (ByteStringReader r, es) -> - case content of - Left bs -> r readerOpts{readerExtensions = es} - (BSL.fromStrict bs) - Right _ -> liftPandocLua $ Lua.failLua - "Cannot use bytestring reader with Sources" - try (unPandocLua readAction) >>= \case - Right pd -> - -- success, got a Pandoc document - return pd - Left (PandocUnknownReaderError f) -> - Lua.failLua . T.unpack $ "Unknown reader: " <> f - Left (PandocUnsupportedExtensionError e f) -> - Lua.failLua . T.unpack $ - "Extension " <> e <> " not supported for " <> f - Left e -> - throwM e) + ### (\content mformatspec mreaderOptions -> unPandocLua $ do + let readerOpts = fromMaybe def mreaderOptions + formatSpec <- parseFlavoredFormat $ fromMaybe "markdown" mformatspec + getReader formatSpec >>= \case + (TextReader r, es) -> + r readerOpts{readerExtensions = es} + (case content of + Left bs -> toSources $ UTF8.toText bs + Right sources -> sources) + (ByteStringReader r, es) -> + case content of + Left bs -> r readerOpts{readerExtensions = es} + (BSL.fromStrict bs) + Right _ -> throwM $ PandocLuaError + "Cannot use bytestring reader with Sources") <#> parameter (\idx -> (Left <$> peekByteString idx) <|> (Right <$> peekSources idx)) "string|Sources" "content" "text to parse" @@ -254,10 +237,10 @@ functions = =#> functionResult pushInline "Inline" "modified Inline" , defun "write" - ### (\doc mformatspec mwriterOpts -> do - let formatSpec = fromMaybe "html" mformatspec - writerOpts = fromMaybe def mwriterOpts - unPandocLua $ getWriter formatSpec >>= \case + ### (\doc mformatspec mwriterOpts -> unPandocLua $ do + let writerOpts = fromMaybe def mwriterOpts + formatSpec <- parseFlavoredFormat $ fromMaybe "html" mformatspec + getWriter formatSpec >>= \case (TextWriter w, es) -> Right <$> w writerOpts{ writerExtensions = es } doc (ByteStringWriter w, es) -> Left <$> @@ -268,6 +251,17 @@ functions = "writer options") =#> functionResult (either pushLazyByteString pushText) "string" "result document" + + , defun "write_classic" + ### (\doc mwopts -> runCustom (fromMaybe def mwopts) doc) + <#> parameter peekPandoc "Pandoc" "doc" "document to convert" + <#> opt (parameter peekWriterOptions "WriterOptions" "writer_options" + "writer options") + =#> functionResult pushText "string" "rendered document" + #? (T.unlines + [ "Runs a classic custom Lua writer, using the functions defined" + , "in the current environment." + ]) ] where walkElement x f = diff --git a/pandoc-lua-engine/src/Text/Pandoc/Lua/Module/Scaffolding.hs b/pandoc-lua-engine/src/Text/Pandoc/Lua/Module/Scaffolding.hs new file mode 100644 index 000000000000..8bafe47cb960 --- /dev/null +++ b/pandoc-lua-engine/src/Text/Pandoc/Lua/Module/Scaffolding.hs @@ -0,0 +1,52 @@ +{-# LANGUAGE OverloadedStrings #-} +{- | + Module : Text.Pandoc.Lua.Module.Scaffolding + Copyright : Copyright © 2022 Albert Krewinkel, John MacFarlane + License : GNU GPL, version 2 or above + Maintainer : Albert Krewinkel + +Scaffolding for custom Writers. +-} +module Text.Pandoc.Lua.Module.Scaffolding + ( documentedModule + ) where + +import HsLua +import Text.Pandoc.Error (PandocError) +import Text.Pandoc.Lua.Writer.Scaffolding (pushWriterScaffolding) +import qualified Data.Text as T + +-- | The "pandoc.template" module. +documentedModule :: Module PandocError +documentedModule = Module + { moduleName = "pandoc.scaffolding" + , moduleDescription = T.unlines + [ "Scaffolding for custom writers." + ] + , moduleFields = [writerScaffolding] + , moduleOperations = [] + , moduleFunctions = [] + } + +-- | Template module functions. +writerScaffolding :: Field PandocError +writerScaffolding = Field + { fieldName = "Writer" + , fieldDescription = T.unlines + [ "An object to be used as a `Writer` function; the construct handles" + , "most of the boilerplate, expecting only render functions for all" + , "AST elements" + ] + , fieldPushValue = do + pushWriterScaffolding + -- pretend that it's a submodule so we get better error messages + getfield registryindex loaded + pushvalue (nth 2) + setfield (nth 2) (submod "Writer") + -- same for fields "Block" and "Inline" + getfield (nth 2) "Inline" *> setfield (nth 2) (submod "Writer.Inline") + getfield (nth 2) "Block" *> setfield (nth 2) (submod "Writer.Block") + + pop 1 -- remove "LOADED_TABLE" + } + where submod x = moduleName documentedModule <> "." <> x diff --git a/src/Text/Pandoc/Lua/Module/System.hs b/pandoc-lua-engine/src/Text/Pandoc/Lua/Module/System.hs similarity index 100% rename from src/Text/Pandoc/Lua/Module/System.hs rename to pandoc-lua-engine/src/Text/Pandoc/Lua/Module/System.hs diff --git a/pandoc-lua-engine/src/Text/Pandoc/Lua/Module/Template.hs b/pandoc-lua-engine/src/Text/Pandoc/Lua/Module/Template.hs new file mode 100644 index 000000000000..d84f0c6d7b83 --- /dev/null +++ b/pandoc-lua-engine/src/Text/Pandoc/Lua/Module/Template.hs @@ -0,0 +1,103 @@ +{-# LANGUAGE OverloadedStrings #-} +{- | + Module : Text.Pandoc.Lua.Module.Template + Copyright : Copyright © 2022 Albert Krewinkel, John MacFarlane + License : GNU GPL, version 2 or above + Maintainer : Albert Krewinkel + +Lua module to handle pandoc templates. +-} +module Text.Pandoc.Lua.Module.Template + ( documentedModule + ) where + +import HsLua +import HsLua.Module.DocLayout (peekDoc, pushDoc) +import Text.Pandoc.Error (PandocError) +import Text.Pandoc.Lua.Marshal.AST (peekMeta, pushBlocks, pushInlines) +import Text.Pandoc.Lua.Marshal.Context (peekContext, pushContext) +import Text.Pandoc.Lua.Marshal.Template (peekTemplate, pushTemplate) +import Text.Pandoc.Lua.PandocLua (PandocLua (unPandocLua), liftPandocLua) +import Text.Pandoc.Writers.Shared (metaToContext') +import Text.Pandoc.Templates + ( compileTemplate, getDefaultTemplate, renderTemplate + , runWithPartials, runWithDefaultPartials ) + +import qualified Data.Text as T + +-- | The "pandoc.template" module. +documentedModule :: Module PandocError +documentedModule = Module + { moduleName = "pandoc.template" + , moduleDescription = T.unlines + [ "Lua functions for pandoc templates." + ] + , moduleFields = [] + , moduleOperations = [] + , moduleFunctions = functions + } + +-- | Template module functions. +functions :: [DocumentedFunction PandocError] +functions = + [ defun "apply" + ### liftPure2 renderTemplate + <#> parameter peekTemplate "Template" "template" "template to apply" + <#> parameter peekContext "table" "context" "variable values" + =#> functionResult pushDoc "Doc" "rendered template" + #? T.unlines + [ "Applies a context with variable assignments to a template," + , "returning the rendered template. The `context` parameter must be a" + , "table with variable names as keys and [Doc], string, boolean, or" + , "table as values, where the table can be either be a list of the" + , "aforementioned types, or a nested context." + ] + + , defun "compile" + ### (\template mfilepath -> unPandocLua $ + case mfilepath of + Just fp -> runWithPartials (compileTemplate fp template) + Nothing -> runWithDefaultPartials + (compileTemplate "templates/default" template)) + <#> parameter peekText "string" "template" "template string" + <#> opt (stringParam "templ_path" "template path") + =#> functionResult (either failLua pushTemplate) "pandoc Template" + "compiled template" + + , defun "default" + ### (\mformat -> unPandocLua $ do + let getFORMAT = liftPandocLua $ do + getglobal "FORMAT" + forcePeek $ peekText top `lastly` pop 1 + format <- maybe getFORMAT pure mformat + getDefaultTemplate format) + <#> opt (textParam "writer" + "writer for which the template should be returned.") + =#> functionResult pushText "string" + "string representation of the writer's default template" + + , defun "meta_to_context" + ### (\meta blockWriterIdx inlineWriterIdx -> unPandocLua $ do + let blockWriter blks = liftPandocLua $ do + pushvalue blockWriterIdx + pushBlocks blks + callTrace 1 1 + forcePeek $ peekDoc top + let inlineWriter blks = liftPandocLua $ do + pushvalue inlineWriterIdx + pushInlines blks + callTrace 1 1 + forcePeek $ peekDoc top + metaToContext' blockWriter inlineWriter meta) + <#> parameter peekMeta "Meta" "meta" "document metadata" + <#> parameter pure "function" "blocks_writer" + "converter from Blocks to Doc values" + <#> parameter pure "function" "inlines_writer" + "converter from Inlines to Doc values" + =#> functionResult pushContext "table" "template context" + #? T.unlines + [ "Creates template context from the document's [Meta]{#type-meta}" + , "data, using the given functions to convert [Blocks] and [Inlines]" + , "to [Doc] values." + ] + ] diff --git a/src/Text/Pandoc/Lua/Module/Types.hs b/pandoc-lua-engine/src/Text/Pandoc/Lua/Module/Types.hs similarity index 97% rename from src/Text/Pandoc/Lua/Module/Types.hs rename to pandoc-lua-engine/src/Text/Pandoc/Lua/Module/Types.hs index b8d45d93e049..7d9ad6784139 100644 --- a/src/Text/Pandoc/Lua/Module/Types.hs +++ b/pandoc-lua-engine/src/Text/Pandoc/Lua/Module/Types.hs @@ -17,7 +17,7 @@ import HsLua ( Module (..), (###), (<#>), (=#>) , defun, functionResult, parameter) import HsLua.Module.Version (peekVersionFuzzy, pushVersion) import Text.Pandoc.Error (PandocError) -import Text.Pandoc.Lua.ErrorConversion () +import Text.Pandoc.Lua.PandocLua () -- | Push the pandoc.types module on the Lua stack. documentedModule :: Module PandocError diff --git a/src/Text/Pandoc/Lua/Module/Utils.hs b/pandoc-lua-engine/src/Text/Pandoc/Lua/Module/Utils.hs similarity index 98% rename from src/Text/Pandoc/Lua/Module/Utils.hs rename to pandoc-lua-engine/src/Text/Pandoc/Lua/Module/Utils.hs index 6c373ae2fcb4..16305b76ed0e 100644 --- a/src/Text/Pandoc/Lua/Module/Utils.hs +++ b/pandoc-lua-engine/src/Text/Pandoc/Lua/Module/Utils.hs @@ -28,6 +28,7 @@ import HsLua.Module.Version (peekVersionFuzzy, pushVersion) import Text.Pandoc.Citeproc (getReferences, processCitations) import Text.Pandoc.Definition import Text.Pandoc.Error (PandocError) +import Text.Pandoc.Filter (applyJSONFilter) import Text.Pandoc.Lua.Marshal.AST import Text.Pandoc.Lua.Marshal.Reference import Text.Pandoc.Lua.PandocLua (PandocLua (unPandocLua)) @@ -37,7 +38,6 @@ import qualified Data.ByteString.Lazy as BSL import qualified Data.Map as Map import qualified Data.Text as T import qualified Text.Pandoc.Builder as B -import qualified Text.Pandoc.Filter.JSON as JSONFilter import qualified Text.Pandoc.Shared as Shared import qualified Text.Pandoc.UTF8 as UTF8 import qualified Text.Pandoc.Writers.Shared as Shared @@ -126,7 +126,7 @@ documentedModule = Module Nothing -> do Lua.getglobal "FORMAT" (forcePeek ((:[]) <$!> peekString top) <* pop 1) - JSONFilter.apply def args filterPath doc + applyJSONFilter def args filterPath doc ) <#> parameter peekPandoc "Pandoc" "doc" "input document" <#> parameter peekString "filepath" "filter_path" "path to filter" @@ -204,7 +204,7 @@ stringify idx = forcePeek . retrieving "stringifyable element" $ -- | Converts an old/simple table into a normal table block element. from_simple_table :: SimpleTable -> LuaE PandocError NumResults from_simple_table (SimpleTable capt aligns widths head' body) = do - Lua.push $ Table + pushBlock $ Table nullAttr (Caption Nothing [Plain capt | not (null capt)]) (zipWith (\a w -> (a, toColWidth w)) aligns widths) diff --git a/src/Text/Pandoc/Lua/Orphans.hs b/pandoc-lua-engine/src/Text/Pandoc/Lua/Orphans.hs similarity index 98% rename from src/Text/Pandoc/Lua/Orphans.hs rename to pandoc-lua-engine/src/Text/Pandoc/Lua/Orphans.hs index 62b54d051f08..db58349d22fa 100644 --- a/src/Text/Pandoc/Lua/Orphans.hs +++ b/pandoc-lua-engine/src/Text/Pandoc/Lua/Orphans.hs @@ -23,7 +23,6 @@ import Text.Pandoc.Lua.Marshal.Context () import Text.Pandoc.Lua.Marshal.PandocError() import Text.Pandoc.Lua.Marshal.ReaderOptions () import Text.Pandoc.Lua.Marshal.Sources (pushSources) -import Text.Pandoc.Lua.ErrorConversion () import Text.Pandoc.Sources (Sources) instance Pushable Pandoc where diff --git a/src/Text/Pandoc/Lua/PandocLua.hs b/pandoc-lua-engine/src/Text/Pandoc/Lua/PandocLua.hs similarity index 65% rename from src/Text/Pandoc/Lua/PandocLua.hs rename to pandoc-lua-engine/src/Text/Pandoc/Lua/PandocLua.hs index 52ace5f6b2c7..64f4bd1b41ec 100644 --- a/src/Text/Pandoc/Lua/PandocLua.hs +++ b/pandoc-lua-engine/src/Text/Pandoc/Lua/PandocLua.hs @@ -4,23 +4,18 @@ {-# LANGUAGE GeneralizedNewtypeDeriving #-} {-# LANGUAGE MultiParamTypeClasses #-} {-# LANGUAGE OverloadedStrings #-} -{-# LANGUAGE RankNTypes #-} -{-# LANGUAGE ScopedTypeVariables #-} {-# OPTIONS_GHC -fno-warn-orphans #-} {- | Module : Text.Pandoc.Lua.PandocLua - Copyright : Copyright © 2020-2022 Albert Krewinkel - License : GNU GPL, version 2 or above - + Copyright : © 2020-2022 Albert Krewinkel + License : GPL-2.0-or-later Maintainer : Albert Krewinkel - Stability : alpha PandocMonad instance which allows execution of Lua operations and which uses Lua to handle state. -} module Text.Pandoc.Lua.PandocLua ( PandocLua (..) - , runPandocLua , liftPandocLua ) where @@ -28,12 +23,13 @@ import Control.Monad.Catch (MonadCatch, MonadMask, MonadThrow) import Control.Monad.Except (MonadError (catchError, throwError)) import Control.Monad.IO.Class (MonadIO) import HsLua as Lua -import Text.Pandoc.Class.PandocMonad (PandocMonad (..)) -import Text.Pandoc.Error (PandocError) -import Text.Pandoc.Lua.Global (Global (..), setGlobals) -import Text.Pandoc.Lua.Marshal.CommonState (peekCommonState) +import Text.Pandoc.Class (PandocMonad (..)) +import Text.Pandoc.Error (PandocError (..)) +import Text.Pandoc.Lua.Marshal.CommonState (peekCommonState, pushCommonState) +import Text.Pandoc.Lua.Marshal.PandocError (peekPandocError, pushPandocError) import qualified Control.Monad.Catch as Catch +import qualified Data.Text as T import qualified Text.Pandoc.Class.IO as IO -- | Type providing access to both, pandoc and Lua operations. @@ -52,37 +48,12 @@ newtype PandocLua a = PandocLua { unPandocLua :: LuaE PandocError a } liftPandocLua :: LuaE PandocError a -> PandocLua a liftPandocLua = PandocLua --- | Evaluate a @'PandocLua'@ computation, running all contained Lua --- operations.. -runPandocLua :: (PandocMonad m, MonadIO m) => PandocLua a -> m a -runPandocLua pLua = do - origState <- getCommonState - globals <- defaultGlobals - (result, newState) <- liftIO . Lua.run . unPandocLua $ do - putCommonState origState - liftPandocLua $ setGlobals globals - r <- pLua - c <- getCommonState - return (r, c) - putCommonState newState - return result - instance {-# OVERLAPPING #-} Exposable PandocError (PandocLua NumResults) where partialApply _narg = liftLua . unPandocLua instance Pushable a => Exposable PandocError (PandocLua a) where partialApply _narg x = 1 <$ (liftLua (unPandocLua x >>= Lua.push)) --- | Global variables which should always be set. -defaultGlobals :: PandocMonad m => m [Global] -defaultGlobals = do - commonState <- getCommonState - return - [ PANDOC_API_VERSION - , PANDOC_STATE commonState - , PANDOC_VERSION - ] - instance MonadError PandocError PandocLua where catchError = Catch.catch throwError = Catch.throwM @@ -108,6 +79,22 @@ instance PandocMonad PandocLua where getCommonState = PandocLua $ do Lua.getglobal "PANDOC_STATE" forcePeek $ peekCommonState Lua.top - putCommonState = PandocLua . setGlobals . (:[]) . PANDOC_STATE + putCommonState cst = PandocLua $ do + pushCommonState cst + Lua.setglobal "PANDOC_STATE" logOutput = IO.logOutput + +-- | Retrieve a @'PandocError'@ from the Lua stack. +popPandocError :: LuaE PandocError PandocError +popPandocError = do + errResult <- runPeek $ peekPandocError top `lastly` pop 1 + case resultToEither errResult of + Right x -> return x + Left err -> return $ PandocLuaError (T.pack err) + +-- | Conversions between Lua errors and 'PandocError' exceptions. +instance LuaError PandocError where + popException = popPandocError + pushException = pushPandocError + luaException = PandocLuaError . T.pack diff --git a/pandoc-lua-engine/src/Text/Pandoc/Lua/Reader.hs b/pandoc-lua-engine/src/Text/Pandoc/Lua/Reader.hs new file mode 100644 index 000000000000..8e411aeb2385 --- /dev/null +++ b/pandoc-lua-engine/src/Text/Pandoc/Lua/Reader.hs @@ -0,0 +1,115 @@ +{-# LANGUAGE LambdaCase #-} +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE TypeApplications #-} +{-# LANGUAGE TupleSections #-} +{- | + Module : Text.Pandoc.Lua.Reader + Copyright : Copyright (C) 2021-2022 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane + Stability : alpha + Portability : portable + +Supports custom parsers written in Lua which produce a Pandoc AST. +-} +module Text.Pandoc.Lua.Reader ( readCustom ) where +import Control.Exception +import Control.Monad ((<=<), (<$!>), when) +import Control.Monad.IO.Class (MonadIO) +import Data.Maybe (fromMaybe) +import HsLua as Lua hiding (Operation (Div)) +import HsLua.Core.Run (GCManagedState, newGCManagedState, withGCManagedState) +import Text.Pandoc.Class (PandocMonad, findFileWithDataFallback, report) +import Text.Pandoc.Error (PandocError) +import Text.Pandoc.Format (ExtensionsConfig (..)) +import Text.Pandoc.Logging +import Text.Pandoc.Lua.Global (Global (..), setGlobals) +import Text.Pandoc.Lua.Init (runLuaWith) +import Text.Pandoc.Lua.PandocLua +import Text.Pandoc.Lua.Marshal.Format (peekExtensionsConfig) +import Text.Pandoc.Lua.Marshal.Pandoc (peekPandoc) +import Text.Pandoc.Readers (Reader (..)) +import Text.Pandoc.Sources (ToSources(..), sourcesToText) +import qualified Data.Text as T + +-- | Convert custom markup to Pandoc. +readCustom :: (PandocMonad m, MonadIO m) + => FilePath -> m (Reader m, ExtensionsConfig) +readCustom luaFile = do + luaState <- liftIO newGCManagedState + luaFile' <- fromMaybe luaFile <$> findFileWithDataFallback "readers" luaFile + either throw pure <=< runLuaWith luaState $ do + let globals = [ PANDOC_SCRIPT_FILE luaFile ] + setGlobals globals + stat <- dofileTrace luaFile' + -- check for error in lua script (later we'll change the return type + -- to handle this more gracefully): + when (stat /= Lua.OK) + Lua.throwErrorAsException + + extsConf <- getglobal "Extensions" >>= \case + TypeNil -> pure $ ExtensionsConfig mempty mempty + _ -> forcePeek $ peekExtensionsConfig top `lastly` pop 1 + + (,extsConf) <$!> getCustomReader luaState + + where + readerField = "PANDOC Reader function" + inLua st = liftIO . withGCManagedState @PandocError st + byteStringReader :: MonadIO m => GCManagedState -> Reader m + byteStringReader st = ByteStringReader $ \ropts input -> inLua st $ do + getfield registryindex readerField + push input + push ropts + callTrace 2 1 + forcePeek $ peekPandoc top + textReader st = TextReader $ \ropts srcs -> inLua st $ do + let input = toSources srcs + getfield registryindex readerField + push input + push ropts + pcallTrace 2 1 >>= \case + OK -> forcePeek $ peekPandoc top + ErrRun -> do + -- Caught a runtime error. Check if parsing might work if we + -- pass a string instead of a Sources list, then retry. + runPeek (peekText top) >>= \case + Failure {} -> + -- not a string error object. Bail! + throwErrorAsException + Success errmsg -> + if "string expected, got pandoc Sources" `T.isInfixOf` errmsg + then do + pop 1 + _ <- unPandocLua $ do + report $ Deprecated "old Reader function signature" $ + T.unlines + [ "Reader functions should accept a sources list; " + , "functions expecting `string` input are deprecated. " + , "Use `tostring` to convert the first argument to a " + , "string." + ] + getglobal "Reader" + push $ sourcesToText input -- push sources as string + push ropts + callTrace 2 1 + forcePeek $ peekPandoc top + else + -- nothing we can do here + throwErrorAsException + _ -> -- not a runtime error, we won't be able to recover from that + throwErrorAsException + getCustomReader st = do + getglobal "Reader" >>= \case + TypeNil -> do + pop 1 + getglobal "ByteStringReader" >>= \case + TypeNil -> failLua $ "No reader function found: either 'Reader' or " + <> "'ByteStringReader' must be defined." + _ -> do + setfield registryindex readerField + pure (byteStringReader st) + _ -> do + setfield registryindex readerField + pure (textReader st) diff --git a/pandoc-lua-engine/src/Text/Pandoc/Lua/Writer.hs b/pandoc-lua-engine/src/Text/Pandoc/Lua/Writer.hs new file mode 100644 index 000000000000..c5e3e2469fa6 --- /dev/null +++ b/pandoc-lua-engine/src/Text/Pandoc/Lua/Writer.hs @@ -0,0 +1,112 @@ +{-# LANGUAGE LambdaCase #-} +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE TupleSections #-} +{-# LANGUAGE TypeApplications #-} +{- | + Module : Text.Pandoc.Lua.Writer + Copyright : Copyright (C) 2012-2022 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane + Stability : alpha + Portability : portable + +Conversion of Pandoc documents using a custom Lua writer. +-} +module Text.Pandoc.Lua.Writer + ( writeCustom + ) where + +import Control.Exception +import Control.Monad ((<=<)) +import Data.Default (def) +import Data.Maybe (fromMaybe) +import Data.Text (Text) +import HsLua +import HsLua.Core.Run (newGCManagedState, withGCManagedState) +import Control.Monad.IO.Class (MonadIO) +import Text.Pandoc.Class (PandocMonad, findFileWithDataFallback) +import Text.Pandoc.Error (PandocError (..)) +import Text.Pandoc.Format (ExtensionsConfig (..)) +import Text.Pandoc.Lua.Global (Global (..), setGlobals) +import Text.Pandoc.Lua.Init (runLuaWith) +import Text.Pandoc.Lua.Marshal.Format (peekExtensionsConfig) +import Text.Pandoc.Lua.Marshal.Template (peekTemplate) +import Text.Pandoc.Lua.Marshal.WriterOptions (pushWriterOptions) +import Text.Pandoc.Templates (Template) +import Text.Pandoc.Writers (Writer (..)) +import qualified Text.Pandoc.Lua.Writer.Classic as Classic + +-- | Convert Pandoc to custom markup. +writeCustom :: (PandocMonad m, MonadIO m) + => FilePath -> m (Writer m, ExtensionsConfig, m (Template Text)) +writeCustom luaFile = do + luaState <- liftIO newGCManagedState + luaFile' <- fromMaybe luaFile <$> findFileWithDataFallback "writers" luaFile + either throw pure <=< runLuaWith luaState $ do + setGlobals [ PANDOC_DOCUMENT mempty + , PANDOC_SCRIPT_FILE luaFile' + , PANDOC_WRITER_OPTIONS def + ] + dofileTrace luaFile' >>= \case + OK -> pure () + _ -> throwErrorAsException + -- Most classic writers contain code that throws an error if a global + -- is not present. This would break our check for the existence of a + -- "Writer" function. We resort to raw access for that reason, but + -- could also catch the error instead. + let rawgetglobal x = do + pushglobaltable + pushName x + rawget (nth 2) <* remove (nth 2) -- remove global table + + let writerField = "Pandoc Writer function" + + extsConf <- rawgetglobal "Extensions" >>= \case + TypeNil -> ExtensionsConfig mempty mempty <$ pop 1 + _ -> forcePeek $ peekExtensionsConfig top `lastly` pop 1 + + -- Store template function in registry + let templateField = "Pandoc Writer Template" + rawgetglobal "Template" *> setfield registryindex templateField + + let getTemplate = liftIO $ withGCManagedState @PandocError luaState $ do + getfield registryindex templateField >>= \case + TypeNil -> failLua $ "No default template for writer; " <> + "the global variable Template is undefined." + _ -> do + callTrace 0 1 + forcePeek $ peekTemplate top `lastly` pop 1 + + let addProperties = (, extsConf, getTemplate) + + rawgetglobal "Writer" >>= \case + TypeNil -> rawgetglobal "ByteStringWriter" >>= \case + TypeNil -> do + -- Neither `Writer` nor `BinaryWriter` are defined. Try to + -- use the file as a classic writer. + pop 1 -- remove nil + pure $ addProperties . TextWriter $ \opts doc -> + liftIO $ withGCManagedState luaState $ do + Classic.runCustom @PandocError opts doc + _ -> do + -- Binary writer. Writer function is on top of the stack. + setfield registryindex writerField + pure $ addProperties . ByteStringWriter $ \opts doc -> + -- Call writer with document and writer options as arguments. + liftIO $ withGCManagedState luaState $ do + getfield registryindex writerField + push doc + pushWriterOptions opts + callTrace 2 1 + forcePeek @PandocError $ peekLazyByteString top + _ -> do + -- New-type text writer. Writer function is on top of the stack. + setfield registryindex writerField + pure $ addProperties . TextWriter $ \opts doc -> + liftIO $ withGCManagedState luaState $ do + getfield registryindex writerField + push doc + pushWriterOptions opts + callTrace 2 1 + forcePeek @PandocError $ peekText top diff --git a/src/Text/Pandoc/Lua/Writer/Classic.hs b/pandoc-lua-engine/src/Text/Pandoc/Lua/Writer/Classic.hs similarity index 99% rename from src/Text/Pandoc/Lua/Writer/Classic.hs rename to pandoc-lua-engine/src/Text/Pandoc/Lua/Writer/Classic.hs index 522bdb651b3c..016d453caa39 100644 --- a/src/Text/Pandoc/Lua/Writer/Classic.hs +++ b/pandoc-lua-engine/src/Text/Pandoc/Lua/Writer/Classic.hs @@ -69,7 +69,7 @@ instance Pushable (Stringify MetaValue) where push (Stringify (MetaBlocks bs)) = Lua.push (Stringify bs) instance Pushable (Stringify Citation) where - push (Stringify cit) = flip pushAsTable cit + push (Stringify cit) = pushAsTable [ ("citationId", push . citationId) , ("citationPrefix", push . Stringify . citationPrefix) , ("citationSuffix", push . Stringify . citationSuffix) @@ -77,6 +77,7 @@ instance Pushable (Stringify Citation) where , ("citationNoteNum", push . citationNoteNum) , ("citationHash", push . citationHash) ] + cit -- | Key-value pair, pushed as a table with @a@ as the only key and @v@ as the -- associated value. diff --git a/pandoc-lua-engine/src/Text/Pandoc/Lua/Writer/Scaffolding.hs b/pandoc-lua-engine/src/Text/Pandoc/Lua/Writer/Scaffolding.hs new file mode 100644 index 000000000000..95ce23c0d9c6 --- /dev/null +++ b/pandoc-lua-engine/src/Text/Pandoc/Lua/Writer/Scaffolding.hs @@ -0,0 +1,311 @@ +{-# LANGUAGE LambdaCase #-} +{-# LANGUAGE OverloadedStrings #-} +{- | + Module : Text.Pandoc.Lua.Writer.Scaffolding + Copyright : © 2022 Albert Krewinkel + License : GPL-2.0-or-later + Maintainer : Albert Krewinkel + +Conversion of Pandoc documents using a custom Lua writer. +-} +module Text.Pandoc.Lua.Writer.Scaffolding + ( pushWriterScaffolding + ) where + +import Control.Monad ((<$!>), void) +import Data.ByteString (ByteString) +import Data.Data (dataTypeConstrs, dataTypeOf, showConstr, toConstr) +import Data.Default (def) +import Data.List (intersperse) +import Data.Maybe (fromMaybe) +import Data.Text (Text) +import Data.String (IsString (fromString)) +import HsLua +import HsLua.Module.DocLayout (peekDoc, pushDoc) +import Text.DocLayout (Doc, blankline, render) +import Text.DocTemplates (Context) +import Text.Pandoc.Definition +import Text.Pandoc.Error (PandocError (..)) +import Text.Pandoc.Options (WriterOptions (..), WrapOption(..)) +import Text.Pandoc.Lua.PandocLua () +import Text.Pandoc.Lua.Marshal.AST +import Text.Pandoc.Lua.Marshal.Context (peekContext) +import Text.Pandoc.Lua.Marshal.WriterOptions ( peekWriterOptions + , pushWriterOptions) +import Text.Pandoc.Templates (renderTemplate) +import Text.Pandoc.Writers.Shared (metaToContext, setField) +import qualified Data.Text as T +import qualified Text.Pandoc.UTF8 as UTF8 + +-- | Convert Pandoc to custom markup. +pushWriterScaffolding :: LuaE PandocError NumResults +pushWriterScaffolding = do + newtable + *> pushWriterMT *> setmetatable (nth 2) + writer <- toWriterTable top + addField "Blocks" $ pushDocumentedFunction (blocksFn writer) + addField "Inlines" $ pushDocumentedFunction (inlinesFn writer) + addField "Block" $ newtable *> pushBlockMT writer *> setmetatable (nth 2) + addField "Inline" $ newtable *> pushInlineMT writer *> setmetatable (nth 2) + addField "Pandoc" $ pushDocumentedFunction $ lambda + ### (\(Pandoc _ blks) -> do + pushWriterTable writer + getfield' top "Blocks" + pushBlocks blks + callTrace 1 1 + pure (NumResults 1)) + <#> parameter peekPandoc "Pandoc" "doc" "" + =?> "rendered doc" + freeWriter writer + return 1 + where + blocksFn w = lambda + ### (\blocks msep -> blockListToCustom w msep blocks) + <#> parameter peekBlocks "Blocks" "blocks" "" + <#> opt (parameter peekDocFuzzy "Doc" "sep" "") + =#> functionResult pushDoc "Doc" "" + inlinesFn w = lambda + ### inlineListToCustom w + <#> parameter peekInlines "Inlines" "inlines" "" + =#> functionResult pushDoc "Doc" "" + pushBlockMT writer = do + newtable + addField "__call" $ pushDocumentedFunction $ lambda + ### blockToCustom + <#> parameter peekWriter "table" "writer" "" + <#> parameter peekBlockFuzzy "Block" "block" "" + =#> functionResult pushDoc "Doc" "rendered blocks" + addField "__index" $ + -- lookup missing fields in the main Writer table + pushWriterTable writer + pushInlineMT writer = do + newtable + addField "__call" $ pushDocumentedFunction $ lambda + ### inlineToCustom + <#> parameter peekWriter "table" "writer" "" + <#> parameter peekInlineFuzzy "Inline" "inline" "" + =#> functionResult pushDoc "Doc" "rendered inline" + addField "__index" $ do + -- lookup missing fields in the main Writer table + pushWriterTable writer + +pushWriterMT :: LuaE PandocError () +pushWriterMT = do + newtable + addField "__call" $ pushDocumentedFunction $ lambda + ### (\writer doc mopts -> runWriter writer doc mopts) + <#> parameter peekWriter "table" "writer" "" + <#> parameter peekPandoc "Pandoc" "doc" "" + <#> opt (parameter peekWriterOptions "WriterOptions" "opts" "") + =#> functionResult pushText "string" "rendered document" + addField "__index" . pushDocumentedFunction $ lambda + ### (\_writer key -> handleMissingField key) + <#> parameter pure "table" "writer" "" + <#> parameter (liftLua . tostring') "string" "key" "" + =#> functionResult (const pushnil) "string" "" + + +addField :: LuaError e => Name -> LuaE e a -> LuaE e () +addField name action = do + pushName name + action + rawset (nth 3) + +getfield' :: LuaError e => StackIndex -> Name -> LuaE e HsLua.Type +getfield' idx name = do + aidx <- absindex idx + pushName name + rawget aidx >>= \case + TypeNil -> pop 1 *> getfield aidx name + ty -> pure ty + +-- | A writer table is just an absolute stack index. +newtype WriterTable = WriterTable Reference + +toWriterTable :: LuaError e => StackIndex -> LuaE e WriterTable +toWriterTable idx = WriterTable <$!> do + pushvalue idx + ref registryindex + +peekWriter :: LuaError e => Peeker e WriterTable +peekWriter = liftLua . toWriterTable + +pushWriterTable :: LuaError e => Pusher e WriterTable +pushWriterTable (WriterTable wref) = void $ getref registryindex wref + +writerOptionsField :: Name +writerOptionsField = "Pandoc Writer WriterOptions" + +freeWriter :: WriterTable -> LuaE e () +freeWriter (WriterTable wref) = unref registryindex wref + +pushOpts :: LuaE PandocError () +pushOpts = void $ getfield' registryindex writerOptionsField + +runWriter :: WriterTable -> Pandoc -> Maybe WriterOptions + -> LuaE PandocError Text +runWriter writer doc@(Pandoc meta _blks) mopts = do + let opts = fromMaybe def mopts + pushWriterOptions opts *> + setfield registryindex writerOptionsField + + (body, mcontext) <- runPeek (pandocToCustom writer doc) >>= force . \case + Failure msg contexts -> Failure (cleanupTrace msg) contexts + s -> s + + -- convert metavalues to a template context (variables) + defaultContext <- metaToContext opts + (blockListToCustom writer Nothing) + (inlineListToCustom writer) + meta + let context = setField "body" body + $ fromMaybe defaultContext mcontext + + let colwidth = if writerWrapText opts == WrapAuto + then Just $ writerColumns opts + else Nothing + + return $ render colwidth $ + case writerTemplate opts of + Nothing -> body + Just tpl -> renderTemplate tpl context + +-- | Keep exactly one traceback and clean it up. This wouldn't be +-- necessary if the @pcallTrace@ function would do nothing whenever the +-- error already included a trace, but that would require some bigger +-- changes; removing the additional traces in this post-process step is +-- much easier (for now). +cleanupTrace :: ByteString -> ByteString +cleanupTrace msg = UTF8.fromText . T.intercalate "\n" $ + let tmsg = T.lines $ UTF8.toText msg + traceStart = (== "stack traceback:") + in case break traceStart tmsg of + (x, t:traces) -> (x <>) . (t:) $ + let (firstTrace, rest) = break traceStart traces + isPeekContext = ("\twhile " `T.isPrefixOf`) + isUnknownCFn = (== "\t[C]: in ?") + in filter (not . isUnknownCFn) firstTrace <> + filter isPeekContext rest + _ -> tmsg + +-- | Pushes the field in the writer table. +getWriterField :: LuaError e + => WriterTable -> Name -> LuaE e HsLua.Type +getWriterField writer name = do + pushWriterTable writer + getfield' top name <* remove (nth 2) + +-- | Looks up @Writer.subtable.field@; tries @Writer.field@ as a fallback if the +-- subtable field is @nil@. +getNestedWriterField :: LuaError e + => WriterTable -> Name -> Name -> LuaE e HsLua.Type +getNestedWriterField writer subtable field = do + pushWriterTable writer + getfield' top subtable >>= \case + TypeNil -> TypeNil <$ remove (nth 2) -- remove Writer table + _ -> getfield' top field + -- remove Writer and subtable + <* remove (nth 3) <* remove (nth 2) + +pandocToCustom :: WriterTable -> Pandoc + -> Peek PandocError (Doc Text, Maybe (Context Text)) +pandocToCustom writer doc = withContext "rendering Pandoc" $ do + callStatus <- liftLua $ do + getWriterField writer "Pandoc" + pushPandoc doc + pushOpts + pcallTrace 2 2 + case callStatus of + OK -> ((,) <$> peekDocFuzzy (nth 2) <*> orNil peekContext top) + `lastly` pop 2 + _ -> failPeek =<< liftLua (tostring' top) + +blockToCustom :: WriterTable -> Block -> LuaE PandocError (Doc Text) +blockToCustom writer blk = forcePeek $ renderBlock writer blk + +renderBlock :: WriterTable -> Block -> Peek PandocError (Doc Text) +renderBlock writer blk = do + let constrName = fromString . showConstr . toConstr $ blk + withContext ("rendering Block `" <> constrName <> "`") $ + liftLua (getNestedWriterField writer "Block" constrName) >>= \case + TypeNil -> failPeek =<< typeMismatchMessage "function or Doc" top + _ -> callOrDoc (pushBlock blk) + +inlineToCustom :: WriterTable -> Inline -> LuaE PandocError (Doc Text) +inlineToCustom writer inln = forcePeek $ renderInline writer inln + +renderInline :: WriterTable -> Inline -> Peek PandocError (Doc Text) +renderInline writer inln = do + let constrName = fromString . showConstr . toConstr $ inln + withContext ("rendering Inline `" <> constrName <> "`") $ do + liftLua (getNestedWriterField writer "Inline" constrName) >>= \case + TypeNil -> failPeek =<< typeMismatchMessage "function or Doc" top + _ -> callOrDoc (pushInline inln) + +-- | If the value at the top of the stack can be called as a function, +-- then push the element and writer options to the stack and call it; +-- otherwise treat it as a plain Doc value +callOrDoc :: LuaE PandocError () + -> Peek PandocError (Doc Text) +callOrDoc pushElement = do + liftLua (ltype top) >>= \case + TypeFunction -> peekCall + _ -> + liftLua (getmetafield top "__call") >>= \case + TypeNil -> peekDocFuzzy top + _ -> liftLua (pop 1) *> peekCall + where + peekCall :: Peek PandocError (Doc Text) + peekCall = + liftLua (pushElement *> pushOpts *> pcallTrace 2 1) >>= \case + OK -> peekDocFuzzy top + _ -> failPeek =<< liftLua (tostring' top) + +blockListToCustom :: WriterTable -> Maybe (Doc Text) -> [Block] + -> LuaE PandocError (Doc Text) +blockListToCustom writer msep blocks = forcePeek $ + renderBlockList writer msep blocks + +inlineListToCustom :: WriterTable -> [Inline] -> LuaE PandocError (Doc Text) +inlineListToCustom writer inlines = forcePeek $ + renderInlineList writer inlines + +renderBlockList :: WriterTable -> Maybe (Doc Text) -> [Block] + -> Peek PandocError (Doc Text) +renderBlockList writer msep blocks = withContext "rendering Blocks" $ do + let addSeps = intersperse $ fromMaybe blankline msep + mconcat . addSeps <$> mapM (renderBlock writer) blocks + +renderInlineList :: WriterTable -> [Inline] -> Peek PandocError (Doc Text) +renderInlineList writer inlines = withContext "rendering Inlines" $ do + mconcat <$> mapM (renderInline writer) inlines + +orNil :: Peeker e a -> Peeker e (Maybe a) +orNil p idx = liftLua (ltype idx) >>= \case + TypeNil -> pure Nothing + TypeNone -> pure Nothing + _ -> Just <$> p idx + +peekDocFuzzy :: LuaError e => Peeker e (Doc Text) +peekDocFuzzy idx = liftLua (ltype idx) >>= \case + TypeTable -> mconcat <$!> peekList peekDoc idx + _ -> peekDoc idx + +handleMissingField :: LuaError e => ByteString -> LuaE e () +handleMissingField key' = + let key = UTF8.toString key' + blockNames = map (fromString . show) . dataTypeConstrs . dataTypeOf + $ HorizontalRule + inlineNames = map (fromString . show) . dataTypeConstrs . dataTypeOf + $ Space + mtypeName = case () of + _ | key `elem` blockNames -> Just "Block" + _ | key `elem` inlineNames -> Just "Inline" + _ -> Nothing + in case mtypeName of + Just typeName -> failLua $ + "No render function for " <> typeName <> " value " <> + "'" <> key <> "';\ndefine a function `Writer." <> + typeName <> "." <> key <> "` that returns " <> + "a string or Doc." + _ -> pure () diff --git a/test/Tests/Lua.hs b/pandoc-lua-engine/test/Tests/Lua.hs similarity index 97% rename from test/Tests/Lua.hs rename to pandoc-lua-engine/test/Tests/Lua.hs index 3381c6dbf594..cff46292204a 100644 --- a/test/Tests/Lua.hs +++ b/pandoc-lua-engine/test/Tests/Lua.hs @@ -28,10 +28,9 @@ import Text.Pandoc.Class (runIOorExplode, setUserDataDir) import Text.Pandoc.Definition (Attr, Block (BlockQuote, Div, Para), Pandoc, Inline (Emph, Str), pandocTypesVersion) import Text.Pandoc.Error (PandocError (PandocLuaError)) -import Text.Pandoc.Filter (Filter (LuaFilter), applyFilters) -import Text.Pandoc.Lua (Global (..), runLua, setGlobals) +import Text.Pandoc.Lua (Global (..), applyFilter, runLua, setGlobals) import Text.Pandoc.Options (def) -import Text.Pandoc.Shared (pandocVersion) +import Text.Pandoc.Version (pandocVersionText) import qualified Control.Monad.Catch as Catch import qualified Data.Text as T @@ -149,7 +148,7 @@ tests = , testCase "Pandoc version is set" . runLuaTest $ do Lua.getglobal "PANDOC_VERSION" Lua.liftIO . - assertEqual "pandoc version is wrong" (TE.encodeUtf8 pandocVersion) + assertEqual "pandoc version is wrong" (TE.encodeUtf8 pandocVersionText) =<< Lua.tostring' Lua.top , testCase "Pandoc types version is set" . runLuaTest $ do @@ -234,7 +233,7 @@ assertFilterConversion :: String -> FilePath -> Pandoc -> Pandoc -> Assertion assertFilterConversion msg filterPath docIn expectedDoc = do actualDoc <- runIOorExplode $ do setUserDataDir (Just "../data") - applyFilters def [LuaFilter ("lua" filterPath)] ["HTML"] docIn + applyFilter def ["HTML"] ("lua" filterPath) docIn assertEqual msg expectedDoc actualDoc runLuaTest :: HasCallStack => Lua.LuaE PandocError a -> IO a diff --git a/test/Tests/Lua/Module.hs b/pandoc-lua-engine/test/Tests/Lua/Module.hs similarity index 92% rename from test/Tests/Lua/Module.hs rename to pandoc-lua-engine/test/Tests/Lua/Module.hs index fd3fc8998a53..c131469a09be 100644 --- a/test/Tests/Lua/Module.hs +++ b/pandoc-lua-engine/test/Tests/Lua/Module.hs @@ -23,6 +23,8 @@ tests = ("lua" "module" "pandoc.lua") , testPandocLua "pandoc.List" ("lua" "module" "pandoc-list.lua") + , testPandocLua "pandoc.format" + ("lua" "module" "pandoc-format.lua") , testPandocLua "pandoc.mediabag" ("lua" "module" "pandoc-mediabag.lua") , testPandocLua "pandoc.path" diff --git a/pandoc-lua-engine/test/Tests/Lua/Reader.hs b/pandoc-lua-engine/test/Tests/Lua/Reader.hs new file mode 100644 index 000000000000..15ad685b4f13 --- /dev/null +++ b/pandoc-lua-engine/test/Tests/Lua/Reader.hs @@ -0,0 +1,34 @@ +{-# LANGUAGE LambdaCase #-} +{- | +Module : Tests.Lua.Reader +Copyright : © 2022 Albert Krewinkel +License : GPL-2.0-or-later +Maintainer : Albert Krewinkel + +Tests for custom Lua readers. +-} +module Tests.Lua.Reader (tests) where + +import Data.Char (chr) +import Data.Default (Default (def)) +import Text.Pandoc.Class (runIOorExplode) +import Text.Pandoc.Lua (readCustom) +import Text.Pandoc.Readers (Reader (ByteStringReader, TextReader)) +import Test.Tasty (TestTree) +import Test.Tasty.HUnit ((@?=), testCase) + +import qualified Data.ByteString.Lazy as BL +import qualified Data.Text as T +import qualified Text.Pandoc.Builder as B + +tests :: [TestTree] +tests = + [ testCase "read binary to code block" $ do + input <- BL.readFile "bytestring.bin" + doc <- runIOorExplode $ + readCustom "bytestring-reader.lua" >>= \case + (ByteStringReader f, _) -> f def input + (TextReader {}, _) -> error "Expected a bytestring reader" + let bytes = mconcat $ map (B.str . T.singleton . chr) [0..255] + doc @?= B.doc (B.plain bytes) + ] diff --git a/pandoc-lua-engine/test/Tests/Lua/Writer.hs b/pandoc-lua-engine/test/Tests/Lua/Writer.hs new file mode 100644 index 000000000000..c9a0d478c6be --- /dev/null +++ b/pandoc-lua-engine/test/Tests/Lua/Writer.hs @@ -0,0 +1,91 @@ +{-# LANGUAGE LambdaCase #-} +{-# LANGUAGE OverloadedStrings #-} +{- | +Module : Tests.Lua.Writer +Copyright : © 2019-2022 Albert Krewinkel +License : GNU GPL, version 2 or above +Maintainer : Albert Krewinkel + +Tests for custom Lua writers. +-} +module Tests.Lua.Writer (tests) where + +import Data.Default (Default (def)) +import Text.Pandoc.Class (runIOorExplode, readFileStrict) +import Text.Pandoc.Extensions (Extension (..)) +import Text.Pandoc.Format (ExtensionsDiff (..), FlavoredFormat (..), + applyExtensionsDiff) +import Text.Pandoc.Lua (writeCustom) +import Text.Pandoc.Options (WriterOptions (..)) +import Text.Pandoc.Readers (readNative) +import Text.Pandoc.Writers (Writer (ByteStringWriter, TextWriter)) +import Test.Tasty (TestTree) +import Test.Tasty.Golden (goldenVsString) +import Test.Tasty.HUnit (testCase, (@?=)) + +import qualified Data.ByteString.Lazy as BL +import qualified Text.Pandoc.Builder as B +import qualified Text.Pandoc.UTF8 as UTF8 + +tests :: [TestTree] +tests = + [ goldenVsString "default testsuite" + "writer.custom" + (runIOorExplode $ do + source <- UTF8.toText <$> readFileStrict "testsuite.native" + doc <- readNative def source + txt <- writeCustom "sample.lua" >>= \case + (TextWriter f, _, _) -> f def doc + _ -> error "Expected a text writer" + pure $ BL.fromStrict (UTF8.fromText txt)) + + , goldenVsString "tables testsuite" + "tables.custom" + (runIOorExplode $ do + source <- UTF8.toText <$> readFileStrict "tables.native" + doc <- readNative def source + txt <- writeCustom "sample.lua" >>= \case + (TextWriter f, _, _) -> f def doc + _ -> error "Expected a text writer" + pure $ BL.fromStrict (UTF8.fromText txt)) + + , goldenVsString "bytestring writer" + "bytestring.bin" + (runIOorExplode $ + writeCustom "bytestring.lua" >>= \case + (ByteStringWriter f, _, _) -> f def mempty + _ -> error "Expected a bytestring writer") + + , goldenVsString "template" + "writer-template.out.txt" + (runIOorExplode $ do + txt <- writeCustom "writer-template.lua" >>= \case + (TextWriter f, _, mt) -> do + template <- mt + let opts = def{ writerTemplate = Just template } + f opts (B.doc (B.plain (B.str "body goes here"))) + _ -> error "Expected a text writer" + pure $ BL.fromStrict (UTF8.fromText txt)) + + , testCase "preset extensions" $ do + let ediff = ExtensionsDiff{extsToEnable = [], extsToDisable = []} + let format = FlavoredFormat "extensions.lua" ediff + result <- runIOorExplode $ writeCustom "extensions.lua" >>= \case + (TextWriter write, extsConf, _) -> do + exts <- applyExtensionsDiff extsConf format + write def{writerExtensions = exts} (B.doc mempty) + _ -> error "Expected a text writer" + result @?= "smart extension is enabled;\ncitations extension is disabled\n" + , testCase "modified extensions" $ do + let ediff = ExtensionsDiff + { extsToEnable = [Ext_citations] + , extsToDisable = [] + } + let format = FlavoredFormat "extensions.lua" ediff + result <- runIOorExplode $ writeCustom "extensions.lua" >>= \case + (TextWriter write, extsConf, _) -> do + exts <- applyExtensionsDiff extsConf format + write def{writerExtensions = exts} (B.doc mempty) + _ -> error "Expected a text writer" + result @?= "smart extension is enabled;\ncitations extension is enabled\n" + ] diff --git a/pandoc-lua-engine/test/bytestring-reader.lua b/pandoc-lua-engine/test/bytestring-reader.lua new file mode 100644 index 000000000000..7908479ecb75 --- /dev/null +++ b/pandoc-lua-engine/test/bytestring-reader.lua @@ -0,0 +1,7 @@ +function ByteStringReader (input, opts) + local chars = pandoc.List{} + for i = 1, #input do + chars:insert(utf8.char(input:byte(i,i))) + end + return pandoc.Pandoc(pandoc.Plain(pandoc.Str(table.concat(chars)))) +end diff --git a/pandoc-lua-engine/test/bytestring.bin b/pandoc-lua-engine/test/bytestring.bin new file mode 100644 index 000000000000..c86626638e0b Binary files /dev/null and b/pandoc-lua-engine/test/bytestring.bin differ diff --git a/pandoc-lua-engine/test/bytestring.lua b/pandoc-lua-engine/test/bytestring.lua new file mode 100644 index 000000000000..f872e28d497d --- /dev/null +++ b/pandoc-lua-engine/test/bytestring.lua @@ -0,0 +1,7 @@ +function ByteStringWriter (doc, opts) + local buffer = {} + for i=0, 255 do + table.insert(buffer, string.char(i)) + end + return table.concat(buffer, '') +end diff --git a/pandoc-lua-engine/test/extensions.lua b/pandoc-lua-engine/test/extensions.lua new file mode 100644 index 000000000000..d286f5c416b0 --- /dev/null +++ b/pandoc-lua-engine/test/extensions.lua @@ -0,0 +1,12 @@ +function Writer (doc, opts) + local output = 'smart extension is %s;\ncitations extension is %s\n' + local status = function (ext) + return opts.extensions:includes(ext) and 'enabled' or 'disabled' + end + return output:format(status('smart'), status('citations')) +end + +Extensions = { + smart = true, + citations = false, +} diff --git a/test/lua/attr-test.lua b/pandoc-lua-engine/test/lua/attr-test.lua similarity index 100% rename from test/lua/attr-test.lua rename to pandoc-lua-engine/test/lua/attr-test.lua diff --git a/test/lua/block-count.lua b/pandoc-lua-engine/test/lua/block-count.lua similarity index 100% rename from test/lua/block-count.lua rename to pandoc-lua-engine/test/lua/block-count.lua diff --git a/test/lua/blocks-filter.lua b/pandoc-lua-engine/test/lua/blocks-filter.lua similarity index 100% rename from test/lua/blocks-filter.lua rename to pandoc-lua-engine/test/lua/blocks-filter.lua diff --git a/test/lua/hello-world-doc.lua b/pandoc-lua-engine/test/lua/hello-world-doc.lua similarity index 100% rename from test/lua/hello-world-doc.lua rename to pandoc-lua-engine/test/lua/hello-world-doc.lua diff --git a/test/lua/implicit-doc-filter.lua b/pandoc-lua-engine/test/lua/implicit-doc-filter.lua similarity index 100% rename from test/lua/implicit-doc-filter.lua rename to pandoc-lua-engine/test/lua/implicit-doc-filter.lua diff --git a/test/lua/inlines-filter.lua b/pandoc-lua-engine/test/lua/inlines-filter.lua similarity index 100% rename from test/lua/inlines-filter.lua rename to pandoc-lua-engine/test/lua/inlines-filter.lua diff --git a/test/lua/markdown-reader.lua b/pandoc-lua-engine/test/lua/markdown-reader.lua similarity index 100% rename from test/lua/markdown-reader.lua rename to pandoc-lua-engine/test/lua/markdown-reader.lua diff --git a/test/lua/math.lua b/pandoc-lua-engine/test/lua/math.lua similarity index 100% rename from test/lua/math.lua rename to pandoc-lua-engine/test/lua/math.lua diff --git a/test/lua/meta.lua b/pandoc-lua-engine/test/lua/meta.lua similarity index 100% rename from test/lua/meta.lua rename to pandoc-lua-engine/test/lua/meta.lua diff --git a/test/lua/metatable-catch-all.lua b/pandoc-lua-engine/test/lua/metatable-catch-all.lua similarity index 100% rename from test/lua/metatable-catch-all.lua rename to pandoc-lua-engine/test/lua/metatable-catch-all.lua diff --git a/test/lua/module/globals.lua b/pandoc-lua-engine/test/lua/module/globals.lua similarity index 100% rename from test/lua/module/globals.lua rename to pandoc-lua-engine/test/lua/module/globals.lua diff --git a/pandoc-lua-engine/test/lua/module/pandoc-format.lua b/pandoc-lua-engine/test/lua/module/pandoc-format.lua new file mode 100644 index 000000000000..86dcd4c6b316 --- /dev/null +++ b/pandoc-lua-engine/test/lua/module/pandoc-format.lua @@ -0,0 +1,34 @@ +local tasty = require 'tasty' + +local test = tasty.test_case +local group = tasty.test_group +local assert = tasty.assert + +local format = require 'pandoc.format' + +return { + group 'default_extensions' { + test('docx', function () + local docx_default_exts = { + 'auto_identifiers', + } + assert.are_same(format.default_extensions('docx'), docx_default_exts) + end), + }, + + group 'all_extensions' { + test('docx', function () + local docx_default_exts = { + 'ascii_identifiers', + 'auto_identifiers', + 'citations', + 'east_asian_line_breaks', + 'empty_paragraphs', + 'gfm_auto_identifiers', + 'native_numbering', + 'styles', + } + assert.are_same(format.all_extensions('docx'), docx_default_exts) + end), + }, +} diff --git a/test/lua/module/pandoc-list.lua b/pandoc-lua-engine/test/lua/module/pandoc-list.lua similarity index 100% rename from test/lua/module/pandoc-list.lua rename to pandoc-lua-engine/test/lua/module/pandoc-list.lua diff --git a/test/lua/module/pandoc-mediabag.lua b/pandoc-lua-engine/test/lua/module/pandoc-mediabag.lua similarity index 100% rename from test/lua/module/pandoc-mediabag.lua rename to pandoc-lua-engine/test/lua/module/pandoc-mediabag.lua diff --git a/test/lua/module/pandoc-path.lua b/pandoc-lua-engine/test/lua/module/pandoc-path.lua similarity index 100% rename from test/lua/module/pandoc-path.lua rename to pandoc-lua-engine/test/lua/module/pandoc-path.lua diff --git a/test/lua/module/pandoc-template.lua b/pandoc-lua-engine/test/lua/module/pandoc-template.lua similarity index 69% rename from test/lua/module/pandoc-template.lua rename to pandoc-lua-engine/test/lua/module/pandoc-template.lua index c288b2016176..ba18986ffc63 100644 --- a/test/lua/module/pandoc-template.lua +++ b/pandoc-lua-engine/test/lua/module/pandoc-template.lua @@ -50,7 +50,7 @@ return { test('fails if template has non-existing partial', function () assert.error_matches( function () return template.compile('${ nosuchpartial() }') end, - 'PandocCouldNotFindDataFileError' + 'Could not find data file' ) end), test('works with default template that uses partials', function () @@ -62,4 +62,31 @@ return { ) end), }, + group 'apply' { + test('is function', function () + assert.are_equal(type(template.apply), 'function') + end), + test('returns a Doc value', function () + local tmpl = template.compile('placeholder') + assert.are_equal( + pandoc.utils.type(template.apply(tmpl, {})), + 'Doc' + ) + end), + test('applies the given context', function () + local tmpl = template.compile('song: $title$') + local context = {title = 'Along Comes Mary'} + assert.are_equal( + template.apply(tmpl, context):render(), + 'song: Along Comes Mary' + ) + end), + test('accepts string as template', function () + local context = {number = '2'} + assert.are_equal( + template.apply('Song $number$', context):render(), + 'Song 2' + ) + end) + }, } diff --git a/test/lua/module/pandoc-types.lua b/pandoc-lua-engine/test/lua/module/pandoc-types.lua similarity index 100% rename from test/lua/module/pandoc-types.lua rename to pandoc-lua-engine/test/lua/module/pandoc-types.lua diff --git a/test/lua/module/pandoc-utils.lua b/pandoc-lua-engine/test/lua/module/pandoc-utils.lua similarity index 100% rename from test/lua/module/pandoc-utils.lua rename to pandoc-lua-engine/test/lua/module/pandoc-utils.lua diff --git a/test/lua/module/pandoc.lua b/pandoc-lua-engine/test/lua/module/pandoc.lua similarity index 99% rename from test/lua/module/pandoc.lua rename to pandoc-lua-engine/test/lua/module/pandoc.lua index 397182438e90..d61bcf3b05cc 100644 --- a/test/lua/module/pandoc.lua +++ b/pandoc-lua-engine/test/lua/module/pandoc.lua @@ -266,7 +266,7 @@ return { test('unsupported extension', function () assert.error_matches( function () pandoc.read('foo', 'gfm+empty_paragraphs') end, - 'Extension empty_paragraphs not supported for gfm' + 'The extension empty_paragraphs is not supported for gfm' ) end), test('read with other indented code classes', function() @@ -290,7 +290,7 @@ return { test('failing read', function () assert.error_matches( function () pandoc.read('foo', 'nosuchreader') end, - 'Unknown reader: nosuchreader' + 'Unknown input format nosuchreader' ) end) }, diff --git a/test/lua/module/partial.test b/pandoc-lua-engine/test/lua/module/partial.test similarity index 100% rename from test/lua/module/partial.test rename to pandoc-lua-engine/test/lua/module/partial.test diff --git a/test/lua/module/tiny.epub b/pandoc-lua-engine/test/lua/module/tiny.epub similarity index 100% rename from test/lua/module/tiny.epub rename to pandoc-lua-engine/test/lua/module/tiny.epub diff --git a/test/lua/plain-to-para.lua b/pandoc-lua-engine/test/lua/plain-to-para.lua similarity index 100% rename from test/lua/plain-to-para.lua rename to pandoc-lua-engine/test/lua/plain-to-para.lua diff --git a/test/lua/require-file.lua b/pandoc-lua-engine/test/lua/require-file.lua similarity index 100% rename from test/lua/require-file.lua rename to pandoc-lua-engine/test/lua/require-file.lua diff --git a/test/lua/script-name.lua b/pandoc-lua-engine/test/lua/script-name.lua similarity index 100% rename from test/lua/script-name.lua rename to pandoc-lua-engine/test/lua/script-name.lua diff --git a/test/lua/single-to-double-quoted.lua b/pandoc-lua-engine/test/lua/single-to-double-quoted.lua similarity index 100% rename from test/lua/single-to-double-quoted.lua rename to pandoc-lua-engine/test/lua/single-to-double-quoted.lua diff --git a/test/lua/smallcaps-title.lua b/pandoc-lua-engine/test/lua/smallcaps-title.lua similarity index 100% rename from test/lua/smallcaps-title.lua rename to pandoc-lua-engine/test/lua/smallcaps-title.lua diff --git a/test/lua/smart-constructors.lua b/pandoc-lua-engine/test/lua/smart-constructors.lua similarity index 100% rename from test/lua/smart-constructors.lua rename to pandoc-lua-engine/test/lua/smart-constructors.lua diff --git a/test/lua/strmacro.lua b/pandoc-lua-engine/test/lua/strmacro.lua similarity index 100% rename from test/lua/strmacro.lua rename to pandoc-lua-engine/test/lua/strmacro.lua diff --git a/test/lua/undiv.lua b/pandoc-lua-engine/test/lua/undiv.lua similarity index 100% rename from test/lua/undiv.lua rename to pandoc-lua-engine/test/lua/undiv.lua diff --git a/test/lua/uppercase-header.lua b/pandoc-lua-engine/test/lua/uppercase-header.lua similarity index 100% rename from test/lua/uppercase-header.lua rename to pandoc-lua-engine/test/lua/uppercase-header.lua diff --git a/data/sample.lua b/pandoc-lua-engine/test/sample.lua similarity index 96% rename from data/sample.lua rename to pandoc-lua-engine/test/sample.lua index c0adae230396..aacc0d2b65e7 100644 --- a/data/sample.lua +++ b/pandoc-lua-engine/test/sample.lua @@ -13,19 +13,19 @@ -- produce informative error messages if your code contains -- syntax errors. +function Writer (doc, opts) + PANDOC_DOCUMENT = doc + PANDOC_WRITER_OPTIONS = opts + loadfile(PANDOC_SCRIPT_FILE)() + return pandoc.write_classic(doc, opts) +end + local pipe = pandoc.pipe local stringify = (require 'pandoc.utils').stringify --- The global variable PANDOC_DOCUMENT contains the full AST of --- the document which is going to be written. It can be used to --- configure the writer. -local meta = PANDOC_DOCUMENT.meta - -- Choose the image format based on the value of the --- `image_format` meta value. -local image_format = meta.image_format - and stringify(meta.image_format) - or 'png' +-- `image_format` environment variable. +local image_format = os.getenv 'image_format' or 'png' local image_mime_type = ({ jpeg = 'image/jpeg', jpg = 'image/jpeg', diff --git a/test/tables.custom b/pandoc-lua-engine/test/tables.custom similarity index 100% rename from test/tables.custom rename to pandoc-lua-engine/test/tables.custom diff --git a/pandoc-lua-engine/test/tables.native b/pandoc-lua-engine/test/tables.native new file mode 120000 index 000000000000..b2585393d479 --- /dev/null +++ b/pandoc-lua-engine/test/tables.native @@ -0,0 +1 @@ +../../test/tables.native \ No newline at end of file diff --git a/pandoc-lua-engine/test/test-pandoc-lua-engine.hs b/pandoc-lua-engine/test/test-pandoc-lua-engine.hs new file mode 100644 index 000000000000..21febddb58e1 --- /dev/null +++ b/pandoc-lua-engine/test/test-pandoc-lua-engine.hs @@ -0,0 +1,18 @@ +module Main (main) where +import Test.Tasty (TestTree, defaultMain, testGroup) +import qualified Tests.Lua +import qualified Tests.Lua.Module +import qualified Tests.Lua.Reader +import qualified Tests.Lua.Writer +import System.Directory (withCurrentDirectory) + +main :: IO () +main = withCurrentDirectory "test" $ defaultMain tests + +tests :: TestTree +tests = testGroup "pandoc Lua engine" + [ testGroup "Lua filters" Tests.Lua.tests + , testGroup "Lua modules" Tests.Lua.Module.tests + , testGroup "Custom writers" Tests.Lua.Writer.tests + , testGroup "Custom readers" Tests.Lua.Reader.tests + ] diff --git a/pandoc-lua-engine/test/testsuite.native b/pandoc-lua-engine/test/testsuite.native new file mode 120000 index 000000000000..7f0fd1f262a0 --- /dev/null +++ b/pandoc-lua-engine/test/testsuite.native @@ -0,0 +1 @@ +../../test/testsuite.native \ No newline at end of file diff --git a/pandoc-lua-engine/test/writer-template.lua b/pandoc-lua-engine/test/writer-template.lua new file mode 100644 index 000000000000..c90f7c1efce7 --- /dev/null +++ b/pandoc-lua-engine/test/writer-template.lua @@ -0,0 +1,7 @@ +function Writer (doc, opts) + return pandoc.write(doc, 'gfm', opts) +end + +function Template () + return pandoc.template.compile '\n$body$\n\n' +end diff --git a/pandoc-lua-engine/test/writer-template.out.txt b/pandoc-lua-engine/test/writer-template.out.txt new file mode 100644 index 000000000000..1fb343c28612 --- /dev/null +++ b/pandoc-lua-engine/test/writer-template.out.txt @@ -0,0 +1,4 @@ + +body goes here + + diff --git a/test/writer.custom b/pandoc-lua-engine/test/writer.custom similarity index 100% rename from test/writer.custom rename to pandoc-lua-engine/test/writer.custom diff --git a/pandoc-server/COPYING.md b/pandoc-server/COPYING.md new file mode 120000 index 000000000000..0c9476f2b40f --- /dev/null +++ b/pandoc-server/COPYING.md @@ -0,0 +1 @@ +../COPYING.md \ No newline at end of file diff --git a/pandoc-server/README.md b/pandoc-server/README.md new file mode 100644 index 000000000000..346b3461a5c8 --- /dev/null +++ b/pandoc-server/README.md @@ -0,0 +1,26 @@ +# pandoc-server + +`pandoc-server` is a Haskell library providing access to +pandoc's document conversions as an HTTP server. + +For a description of the API, see +[pandoc-server.md](https://github.com/jgm/pandoc/blob/master/doc/pandoc-server.md) +in the pandoc source repository. + +Example of use: + +``` hs +module Main where +import Text.Pandoc.Server (app) +import qualified Network.Wai.Handler.Warp as Warp + +main :: IO () +main = Warp.run 3000 app +``` + +## License + +© 2006-2022 John MacFarlane (jgm@berkeley.edu). Released under the +[GPL](https://www.gnu.org/licenses/old-licenses/gpl-2.0.html "GNU General Public License"), +version 2 or greater. This software carries no warranty of any kind. +(See COPYRIGHT for full copyright and warranty notices.) diff --git a/pandoc-server/pandoc-server.cabal b/pandoc-server/pandoc-server.cabal new file mode 100644 index 000000000000..bfe4197600f3 --- /dev/null +++ b/pandoc-server/pandoc-server.cabal @@ -0,0 +1,70 @@ +cabal-version: 2.4 +name: pandoc-server +version: 0.1 +build-type: Simple +license: GPL-2.0-or-later +license-file: COPYING.md +copyright: (c) 2006-2022 John MacFarlane +author: John MacFarlane +maintainer: John MacFarlane +bug-reports: https://github.com/jgm/pandoc/issues +stability: alpha +homepage: https://pandoc.org +category: Text +tested-with: GHC == 8.6.5, GHC == 8.8.4, GHC == 8.10.7, GHC == 9.0.2, + GHC == 9.2.3 +synopsis: Pandoc document conversion as an HTTP servant-server +description: Pandoc-server provides pandoc's document conversion functions + in an HTTP server. +source-repository head + type: git + location: git://github.com/jgm/pandoc.git + +common common-options + default-language: Haskell2010 + build-depends: base >= 4.12 && < 5 + ghc-options: -Wall -fno-warn-unused-do-bind + -Wincomplete-record-updates + -Wnoncanonical-monad-instances + -Wcpp-undef + -Wincomplete-uni-patterns + -Widentities + -Wpartial-fields + -Wmissing-signatures + -fhide-source-paths + -- -Wmissing-export-lists + + if impl(ghc >= 8.10) + ghc-options: -Wunused-packages + + if impl(ghc >= 9.0) + ghc-options: -Winvalid-haddock + + if os(windows) + cpp-options: -D_WINDOWS + +common common-executable + import: common-options + ghc-options: -rtsopts -with-rtsopts=-A8m -threaded + +library + import: common-options + build-depends: pandoc >= 3.0, + pandoc-types >= 1.22.2 && < 1.23, + containers >= 0.6.0.1 && < 0.7, + aeson >= 2.0 && < 2.2, + bytestring >= 0.9 && < 0.12, + base64 >= 0.4 && < 0.5, + doctemplates >= 0.10 && < 0.11, + data-default >= 0.4 && < 0.8, + text >= 1.1.1.0 && < 2.1, + unicode-collation >= 0.1.1 && < 0.2, + servant-server >= 0.19 && < 0.20, + skylighting >= 0.13 && < 0.14, + wai >= 0.3 + + hs-source-dirs: src + + exposed-modules: Text.Pandoc.Server + buildable: True + diff --git a/src/Text/Pandoc/Server.hs b/pandoc-server/src/Text/Pandoc/Server.hs similarity index 77% rename from src/Text/Pandoc/Server.hs rename to pandoc-server/src/Text/Pandoc/Server.hs index 7dfde3aa3ab8..c45dfa381b73 100644 --- a/src/Text/Pandoc/Server.hs +++ b/pandoc-server/src/Text/Pandoc/Server.hs @@ -1,20 +1,24 @@ {-# LANGUAGE DataKinds #-} +{-# LANGUAGE DeriveGeneric #-} {-# LANGUAGE TypeOperators #-} {-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE OverloadedStrings #-} module Text.Pandoc.Server ( app + , API , ServerOpts(..) , Params(..) , Blob(..) - , parseServerOpts + , parseServerOptsFromArgs ) where import Data.Aeson +import qualified Data.Aeson.KeyMap as KeyMap import Network.Wai import Servant import Text.DocTemplates as DocTemplates import Text.Pandoc +import Text.Pandoc.Writers.Shared (lookupMetaString) import Text.Pandoc.Citeproc (processCitations) import Text.Pandoc.Highlighting (lookupHighlightingStyle) import qualified Text.Pandoc.UTF8 as UTF8 @@ -23,24 +27,26 @@ import qualified Data.Text as T import qualified Data.Text.Lazy as TL import qualified Data.Text.Lazy.Encoding as TLE import Data.Maybe (fromMaybe) -import Data.Char (isAlphaNum) import qualified Data.ByteString as BS import qualified Data.ByteString.Lazy as BL import Data.ByteString.Base64 (decodeBase64, encodeBase64) import Data.Default -import Control.Monad (when, foldM) +import Control.Monad (when, unless, foldM) import qualified Data.Set as Set import Skylighting (defaultSyntaxMap) import qualified Data.Map as M +import Text.Collate.Lang (Lang (..), parseLang) import System.Console.GetOpt -import System.Environment (getArgs, getProgName) +import System.Environment (getProgName) import qualified Control.Exception as E import Text.Pandoc.Shared (safeStrRead, headerShift, filterIpynbOutput, - eastAsianLineBreakFilter, stripEmptyParagraphs) -import Text.Pandoc.App.Opt ( IpynbOutput (..), Opt(..), defaultOpts ) + eastAsianLineBreakFilter) +import Text.Pandoc.App ( IpynbOutput (..), Opt(..), defaultOpts ) import Text.Pandoc.Builder (setMeta) +import Text.Pandoc.Format (parseFlavoredFormat, formatName) import Text.Pandoc.SelfContained (makeSelfContained) import System.Exit +import GHC.Generics (Generic) data ServerOpts = ServerOpts @@ -73,21 +79,20 @@ cliOptions = prg <- getProgName let header = "Usage: " <> prg <> " [OPTION...]" putStrLn $ usageInfo header cliOptions - exitWith ExitSuccess)) + exitSuccess)) "help message" , Option ['v'] ["version"] (NoArg (\_ -> do prg <- getProgName - putStrLn $ prg <> " " <> T.unpack pandocVersion - exitWith ExitSuccess)) + putStrLn $ prg <> " " <> T.unpack pandocVersionText + exitSuccess)) "version info" ] -parseServerOpts :: IO ServerOpts -parseServerOpts = do - args <- getArgs +parseServerOptsFromArgs :: [String] -> IO ServerOpts +parseServerOptsFromArgs args = do let handleUnknownOpt x = "Unknown option: " <> x case getOpt' Permute cliOptions args of (os, ns, unrecognizedOpts, es) -> do @@ -95,7 +100,7 @@ parseServerOpts = do E.throwIO $ PandocOptionError $ T.pack $ concat es ++ unlines (map handleUnknownOpt unrecognizedOpts) ++ ("Try --help for more information.") - when (not (null ns)) $ + unless (null ns) $ E.throwIO $ PandocOptionError $ T.pack $ "Unknown arguments: " <> unwords ns foldM (flip ($)) defaultServerOpts os @@ -141,17 +146,51 @@ instance FromJSON Params where <*> o .:? "files" <*> o .:? "citeproc" +instance ToJSON Params where + toJSON params = + case toJSON (options params) of + (Object o) -> Object $ + KeyMap.insert "text" (toJSON $ text params) + . KeyMap.insert "files" (toJSON $ files params) + . KeyMap.insert "citeproc" (toJSON $ citeproc params) + $ o + x -> x + +data Message = + Message + { verbosity :: Verbosity + , message :: Text } + deriving (Generic, Show) + +instance ToJSON Message where + toEncoding = genericToEncoding defaultOptions + +type Base64 = Bool + +data Output = Succeeded Text Base64 [Message] + | Failed Text + deriving (Generic, Show) + +instance ToJSON Output where + toEncoding (Succeeded o b m) = pairs + ( "output" .= o <> + "base64" .= b <> + "messages" .= m ) + toEncoding (Failed errmsg) = pairs + ( "error" .= errmsg ) -- This is the API. The "/convert" endpoint takes a request body -- consisting of a JSON-encoded Params structure and responds to -- Get requests with either plain text or JSON, depending on the -- Accept header. type API = - ReqBody '[JSON] Params :> Post '[PlainText, JSON] Text - :<|> ReqBody '[JSON] Params :> Post '[OctetStream] BS.ByteString :<|> - "batch" :> ReqBody '[JSON] [Params] :> Post '[JSON] [Text] + ReqBody '[JSON] Params :> Post '[PlainText] Text + :<|> + ReqBody '[JSON] Params :> Post '[JSON] Output + :<|> + "batch" :> ReqBody '[JSON] [Params] :> Post '[JSON] [Output] :<|> "babelmark" :> QueryParam' '[Required] "text" Text :> QueryParam "from" Text :> QueryParam "to" Text :> QueryFlag "standalone" :> Get '[JSON] Value :<|> @@ -164,14 +203,16 @@ api :: Proxy API api = Proxy server :: Server API -server = convert - :<|> convertBytes - :<|> mapM convert +server = convertBytes + :<|> convertText + :<|> convertJSON + :<|> mapM convertJSON :<|> babelmark -- for babelmark which expects {"html": "", "version": ""} - :<|> pure pandocVersion + :<|> pure pandocVersionText where babelmark text' from' to' standalone' = do - res <- convert def{ text = text', + res <- convertText def{ + text = text', options = defaultOpts{ optFrom = from', optTo = to', @@ -185,13 +226,26 @@ server = convert -- Changing this to -- handleErr =<< liftIO (runIO (convert' params)) -- will allow the IO operations. - convert params = handleErr $ - runPure (convert' id (encodeBase64 . BL.toStrict) params) + convertText params = handleErr $ + runPure (convert' return (return . encodeBase64 . BL.toStrict) params) convertBytes params = handleErr $ - runPure (convert' UTF8.fromText BL.toStrict params) - - convert' :: (Text -> a) -> (BL.ByteString -> a) -> Params -> PandocPure a + runPure (convert' (return . UTF8.fromText) (return . BL.toStrict) params) + + convertJSON params = handleErrJSON $ + runPure + (convert' + (\t -> Succeeded t False . map toMessage <$> getLog) + (\bs -> Succeeded (encodeBase64 (BL.toStrict bs)) True + . map toMessage <$> getLog) + params) + + toMessage m = Message { verbosity = messageVerbosity m + , message = showLogMessage m } + + convert' :: (Text -> PandocPure a) + -> (BL.ByteString -> PandocPure a) + -> Params -> PandocPure a convert' textHandler bsHandler params = do curtime <- getCurrentTime -- put files params in ersatz file system @@ -206,13 +260,13 @@ server = convert modifyPureState $ \st -> st{ stFiles = filetree } let opts = options params - let readerFormat = fromMaybe "markdown" $ optFrom opts - let writerFormat = fromMaybe "html" $ optTo opts + readerFormat <- parseFlavoredFormat <$> fromMaybe "markdown" $ optFrom opts + writerFormat <- parseFlavoredFormat <$> fromMaybe "html" $ optTo opts (readerSpec, readerExts) <- getReader readerFormat (writerSpec, writerExts) <- getWriter writerFormat let isStandalone = optStandalone opts - let toformat = T.toLower $ T.takeWhile isAlphaNum $ writerFormat + let toformat = formatName writerFormat hlStyle <- traverse (lookupHighlightingStyle . T.unpack) $ optHighlightStyle opts @@ -240,6 +294,7 @@ server = convert , readerTrackChanges = optTrackChanges opts , readerStripComments = optStripComments opts } + let writeropts = def{ writerExtensions = writerExts , writerTabStop = optTabStop opts @@ -274,6 +329,7 @@ server = convert , writerReferenceLocation = optReferenceLocation opts , writerPreferAscii = optAscii opts } + let reader = case readerSpec of TextReader r -> r readeropts ByteStringReader r -> \t -> do @@ -281,22 +337,28 @@ server = convert case eitherbs of Left errt -> throwError $ PandocSomeError errt Right bs -> r readeropts $ BL.fromStrict bs - let writer = case writerSpec of + + let writer d@(Pandoc meta _) = do + case lookupMetaString "lang" meta of + "" -> setTranslations $ + Lang "en" Nothing (Just "US") [] [] [] + l -> case parseLang l of + Left _ -> report $ InvalidLang l + Right l' -> setTranslations l' + case writerSpec of TextWriter w -> - fmap textHandler . - (\d -> w writeropts d >>= - if optEmbedResources opts && htmlFormat (optTo opts) - then makeSelfContained - else return) - ByteStringWriter w -> fmap bsHandler . w writeropts + w writeropts d >>= + (if optEmbedResources opts && htmlFormat (optTo opts) + then makeSelfContained + else return) >>= + textHandler + ByteStringWriter w -> + w writeropts d >>= bsHandler let transforms :: Pandoc -> Pandoc transforms = (case optShiftHeadingLevelBy opts of 0 -> id x -> headerShift x) . - (case optStripEmptyParagraphs opts of - True -> stripEmptyParagraphs - False -> id) . (if extensionEnabled Ext_east_asian_line_breaks readerExts && not (extensionEnabled Ext_east_asian_line_breaks @@ -346,10 +408,13 @@ server = convert handleErr (Left err) = throwError $ err500 { errBody = TLE.encodeUtf8 $ TL.fromStrict $ renderError err } + handleErrJSON (Right o) = return o + handleErrJSON (Left err) = + return $ Failed (renderError err) + compileCustomTemplate toformat t = do res <- runWithPartials $ compileTemplate ("custom." <> T.unpack toformat) (T.pack t) case res of Left e -> throwError $ PandocTemplateError (T.pack e) Right tpl -> return tpl - diff --git a/pandoc.cabal b/pandoc.cabal index ea6a6cb0a64e..8b56a77c386d 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -1,6 +1,6 @@ cabal-version: 2.4 name: pandoc -version: 2.19.2 +version: 3.0 build-type: Simple license: GPL-2.0-or-later license-file: COPYING.md @@ -15,8 +15,7 @@ tested-with: GHC == 8.6.5, GHC == 8.8.4, GHC == 8.10.7, GHC == 9.0.2, GHC == 9.2.3 synopsis: Conversion between markup formats description: Pandoc is a Haskell library for converting from one markup - format to another, and a command-line tool that uses - this library. The formats it can handle include + format to another. The formats it can handle include . - light markup formats (many variants of Markdown, reStructuredText, AsciiDoc, Org-mode, Muse, Textile, @@ -49,6 +48,7 @@ description: Pandoc is a Haskell library for converting from one markup data-files: -- templates data/templates/styles.html + data/templates/styles.citations.html data/templates/default.html4 data/templates/default.html5 data/templates/default.docbook4 @@ -180,8 +180,6 @@ data-files: data/dzslides/template.html -- default abbreviations file data/abbreviations - -- sample lua custom writer - data/sample.lua -- sample lua custom reader data/creole.lua -- lua init script @@ -205,10 +203,6 @@ extra-source-files: man/manfilter.lua man/pandoc.1.before man/pandoc.1.after - -- trypandoc - trypandoc/Makefile - trypandoc/index.html - trypandoc/trypandoc.js -- tests test/bodybg.gif test/*.native @@ -220,6 +214,8 @@ extra-source-files: test/command/B.txt test/command/C.txt test/command/D.txt + test/command/file1.txt + test/command/file2.txt test/command/three.txt test/command/01.csv test/command/chap1/spider.png @@ -245,7 +241,6 @@ extra-source-files: test/command/SVG_logo.svg test/command/corrupt.svg test/command/inkscape-cube.svg - test/command/lua-pandoc-state.lua test/command/sub-file-chapter-1.tex test/command/sub-file-chapter-2.tex test/command/bar.tex @@ -327,7 +322,6 @@ extra-source-files: test/tables.txt test/tables.fb2 test/tables.muse - test/tables.custom test/tables.xwiki test/tables/*.html4 test/tables/*.html5 @@ -368,7 +362,6 @@ extra-source-files: test/writer.zimwiki test/writer.xwiki test/writer.muse - test/writer.custom test/writers-lang-and-dir.latex test/writers-lang-and-dir.context test/dokuwiki_inline_formatting.dokuwiki @@ -414,10 +407,6 @@ extra-source-files: test/odt/odt/*.odt test/odt/markdown/*.md test/odt/native/*.native - test/lua/*.lua - test/lua/module/*.lua - test/lua/module/partial.test - test/lua/module/tiny.epub source-repository head type: git location: git://github.com/jgm/pandoc.git @@ -426,14 +415,6 @@ flag embed_data_files Description: Embed data files in binary for relocatable executable. Default: False -flag lua53 - Description: Embed Lua 5.3 instead of 5.4. - Default: False - -flag nightly - Description: Add '-nightly-COMPILEDATE' to the output of '--version'. - Default: False - common common-options default-language: Haskell2010 build-depends: base >= 4.12 && < 5 @@ -462,13 +443,27 @@ common common-executable build-depends: pandoc ghc-options: -rtsopts -with-rtsopts=-A8m -threaded +library xml-light + import: common-options + build-depends: xml >= 1.3.12 && < 1.4, + xml-conduit >= 1.9.1.1 && < 1.10, + xml-types >= 0.3 && < 0.4, + containers >= 0.6.0.1 && < 0.7, + text >= 1.1.1.0 && < 2.1 + + hs-source-dirs: xml-light + exposed-modules: Text.Pandoc.XML.Light, + Text.Pandoc.XML.Light.Types, + Text.Pandoc.XML.Light.Proc, + Text.Pandoc.XML.Light.Output library import: common-options - build-depends: Glob >= 0.7 && < 0.11, + build-depends: xml-light, + Glob >= 0.7 && < 0.11, JuicyPixels >= 3.1.6.1 && < 3.4, SHA >= 1.6 && < 1.7, - aeson >= 0.7 && < 2.2, + aeson >= 2.0.1.0 && < 2.2, aeson-pretty >= 0.8.9 && < 0.9, array >= 0.5 && < 0.6, attoparsec >= 0.12 && < 0.15, @@ -477,9 +472,9 @@ library blaze-markup >= 0.8 && < 0.9, bytestring >= 0.9 && < 0.12, case-insensitive >= 1.2 && < 1.3, - citeproc >= 0.8.0.1 && < 0.9, + citeproc >= 0.8.0.2 && < 0.9, commonmark >= 0.2.2 && < 0.3, - commonmark-extensions >= 0.2.3.1 && < 0.3, + commonmark-extensions >= 0.2.3.3 && < 0.3, commonmark-pandoc >= 0.2.1.2 && < 0.3, connection >= 0.3.1, containers >= 0.6.0.1 && < 0.7, @@ -493,23 +488,17 @@ library exceptions >= 0.8 && < 0.11, file-embed >= 0.0 && < 0.1, filepath >= 1.1 && < 1.5, - gridtables >= 0.0.3 && < 0.1, + gridtables >= 0.1 && < 0.2, haddock-library >= 1.10 && < 1.12, - hslua-module-doclayout>= 1.0.4 && < 1.1, - hslua-module-path >= 1.0.3 && < 1.1, - hslua-module-system >= 1.0 && < 1.1, - hslua-module-text >= 1.0 && < 1.1, - hslua-module-version >= 1.0 && < 1.1, http-client >= 0.4.30 && < 0.8, http-client-tls >= 0.2.4 && < 0.4, http-types >= 0.8 && < 0.13, ipynb >= 0.2 && < 0.3, jira-wiki-markup >= 1.4 && < 1.5, - lpeg >= 1.0.1 && < 1.1, - mtl >= 2.2 && < 2.3, + mime-types >= 0.1.1 && < 0.2, + mtl >= 2.2 && < 2.4, network >= 2.6, network-uri >= 2.6 && < 2.8, - pandoc-lua-marshal >= 0.1.7 && < 0.2, pandoc-types >= 1.22.2 && < 1.23, parsec >= 3.1 && < 3.2, pretty >= 1.1 && < 1.2, @@ -518,53 +507,43 @@ library random >= 1 && < 1.3, safe >= 0.3.18 && < 0.4, scientific >= 0.3 && < 0.4, - skylighting >= 0.13 && < 0.14, - skylighting-core >= 0.13 && < 0.14, + skylighting >= 0.13.1.1 && < 0.14, + skylighting-core >= 0.13.1.1 && < 0.14, split >= 0.2 && < 0.3, syb >= 0.1 && < 0.8, tagsoup >= 0.14.6 && < 0.15, temporary >= 1.1 && < 1.4, - texmath >= 0.12.5.1 && < 0.12.6, + texmath >= 0.12.5.4 && < 0.12.6, text >= 1.1.1.0 && < 2.1, text-conversions >= 0.3 && < 0.4, time >= 1.5 && < 1.14, unicode-collation >= 0.1.1 && < 0.2, unicode-transforms >= 0.3 && < 0.5, - xml >= 1.3.12 && < 1.4, - xml-conduit >= 1.9.1.1 && < 1.10, - xml-types >= 0.3 && < 0.4, yaml >= 0.11 && < 0.12, zip-archive >= 0.2.3.4 && < 0.5, zlib >= 0.5 && < 0.7, - servant-server, - wai >= 0.3 + xml >= 1.3.12 && < 1.4 if !os(windows) build-depends: unix >= 2.4 && < 2.9 - if flag(nightly) - cpp-options: -DNIGHTLY - build-depends: template-haskell - if flag(lua53) - build-depends: hslua >= 2.1 && < 2.2, - hslua-aeson >= 2.2.1 && < 2.3 - else - build-depends: hslua >= 2.2.1 && < 2.3 - , hslua-aeson >= 2.2.1 && < 2.3 if flag(embed_data_files) cpp-options: -DEMBED_DATA_FILES - other-modules: Text.Pandoc.Data + other-modules: Text.Pandoc.Data.BakedIn hs-source-dirs: src exposed-modules: Text.Pandoc, Text.Pandoc.App, + Text.Pandoc.Data, Text.Pandoc.Options, Text.Pandoc.Extensions, + Text.Pandoc.Format, Text.Pandoc.Shared, Text.Pandoc.Sources, Text.Pandoc.MediaBag, Text.Pandoc.Error, Text.Pandoc.Filter, - Text.Pandoc.Server, + Text.Pandoc.Translations, + Text.Pandoc.Translations.Types, Text.Pandoc.Readers, Text.Pandoc.Readers.HTML, Text.Pandoc.Readers.LaTeX, @@ -590,7 +569,7 @@ library Text.Pandoc.Readers.TikiWiki, Text.Pandoc.Readers.Txt2Tags, Text.Pandoc.Readers.Docx, - Text.Pandoc.Readers.Odt, + Text.Pandoc.Readers.ODT, Text.Pandoc.Readers.EPUB, Text.Pandoc.Readers.Muse, Text.Pandoc.Readers.Man, @@ -599,10 +578,9 @@ library Text.Pandoc.Readers.Ipynb, Text.Pandoc.Readers.CSV, Text.Pandoc.Readers.RTF, - Text.Pandoc.Readers.Custom, Text.Pandoc.Writers, Text.Pandoc.Writers.Native, - Text.Pandoc.Writers.Docbook, + Text.Pandoc.Writers.DocBook, Text.Pandoc.Writers.JATS, Text.Pandoc.Writers.OPML, Text.Pandoc.Writers.HTML, @@ -621,7 +599,6 @@ library Text.Pandoc.Writers.RST, Text.Pandoc.Writers.Org, Text.Pandoc.Writers.AsciiDoc, - Text.Pandoc.Writers.Custom, Text.Pandoc.Writers.Textile, Text.Pandoc.Writers.MediaWiki, Text.Pandoc.Writers.DokuWiki, @@ -641,9 +618,9 @@ library Text.Pandoc.Writers.OOXML, Text.Pandoc.Writers.AnnotatedTable, Text.Pandoc.Writers.BibTeX, - Text.Pandoc.Lua, Text.Pandoc.PDF, Text.Pandoc.UTF8, + Text.Pandoc.Scripting, Text.Pandoc.Templates, Text.Pandoc.XML, Text.Pandoc.SelfContained, @@ -656,20 +633,21 @@ library Text.Pandoc.Emoji, Text.Pandoc.ImageSize, Text.Pandoc.Class, - Text.Pandoc.Citeproc + Text.Pandoc.Class.IO, + Text.Pandoc.Citeproc, + Text.Pandoc.Version other-modules: Text.Pandoc.App.CommandLineOptions, Text.Pandoc.App.FormatHeuristics, + Text.Pandoc.App.Input, Text.Pandoc.App.Opt, Text.Pandoc.App.OutputSettings, Text.Pandoc.Class.CommonState, - Text.Pandoc.Class.IO, Text.Pandoc.Class.PandocMonad, Text.Pandoc.Class.PandocIO, Text.Pandoc.Class.PandocPure, Text.Pandoc.Class.Sandbox, Text.Pandoc.Filter.Environment, Text.Pandoc.Filter.JSON, - Text.Pandoc.Filter.Lua, Text.Pandoc.Parsing.Capabilities, Text.Pandoc.Parsing.Citations, Text.Pandoc.Parsing.General, @@ -678,7 +656,7 @@ library Text.Pandoc.Parsing.Math, Text.Pandoc.Parsing.Smart, Text.Pandoc.Parsing.State, - Text.Pandoc.Parsing.Types, + Text.Pandoc.Parsing.Future, Text.Pandoc.Readers.Docx.Lists, Text.Pandoc.Readers.Docx.Combine, Text.Pandoc.Readers.Docx.Parse, @@ -697,18 +675,17 @@ library Text.Pandoc.Readers.LaTeX.Parsing, Text.Pandoc.Readers.LaTeX.SIunitx, Text.Pandoc.Readers.LaTeX.Table, - Text.Pandoc.Readers.LaTeX.Types, - Text.Pandoc.Readers.Odt.Base, - Text.Pandoc.Readers.Odt.Namespaces, - Text.Pandoc.Readers.Odt.StyleReader, - Text.Pandoc.Readers.Odt.ContentReader, - Text.Pandoc.Readers.Odt.Generic.Fallible, - Text.Pandoc.Readers.Odt.Generic.SetMap, - Text.Pandoc.Readers.Odt.Generic.Utils, - Text.Pandoc.Readers.Odt.Generic.Namespaces, - Text.Pandoc.Readers.Odt.Generic.XMLConverter, - Text.Pandoc.Readers.Odt.Arrows.State, - Text.Pandoc.Readers.Odt.Arrows.Utils, + Text.Pandoc.Readers.ODT.Base, + Text.Pandoc.Readers.ODT.Namespaces, + Text.Pandoc.Readers.ODT.StyleReader, + Text.Pandoc.Readers.ODT.ContentReader, + Text.Pandoc.Readers.ODT.Generic.Fallible, + Text.Pandoc.Readers.ODT.Generic.SetMap, + Text.Pandoc.Readers.ODT.Generic.Utils, + Text.Pandoc.Readers.ODT.Generic.Namespaces, + Text.Pandoc.Readers.ODT.Generic.XMLConverter, + Text.Pandoc.Readers.ODT.Arrows.State, + Text.Pandoc.Readers.ODT.Arrows.Utils, Text.Pandoc.Readers.Org.BlockStarts, Text.Pandoc.Readers.Org.Blocks, Text.Pandoc.Readers.Org.DocumentTree, @@ -741,40 +718,16 @@ library Text.Pandoc.Writers.Blaze, Text.Pandoc.Writers.Powerpoint.Presentation, Text.Pandoc.Writers.Powerpoint.Output, - Text.Pandoc.Lua.ErrorConversion, - Text.Pandoc.Lua.Filter, - Text.Pandoc.Lua.Global, - Text.Pandoc.Lua.Init, - Text.Pandoc.Lua.Marshal.CommonState, - Text.Pandoc.Lua.Marshal.Context, - Text.Pandoc.Lua.Marshal.PandocError, - Text.Pandoc.Lua.Marshal.ReaderOptions, - Text.Pandoc.Lua.Marshal.Reference, - Text.Pandoc.Lua.Marshal.Sources, - Text.Pandoc.Lua.Marshal.Template, - Text.Pandoc.Lua.Marshal.WriterOptions, - Text.Pandoc.Lua.Module.MediaBag, - Text.Pandoc.Lua.Module.Pandoc, - Text.Pandoc.Lua.Module.System, - Text.Pandoc.Lua.Module.Template, - Text.Pandoc.Lua.Module.Types, - Text.Pandoc.Lua.Module.Utils, - Text.Pandoc.Lua.Orphans, - Text.Pandoc.Lua.PandocLua, - Text.Pandoc.Lua.Writer.Classic, - Text.Pandoc.XML.Light, - Text.Pandoc.XML.Light.Types, - Text.Pandoc.XML.Light.Proc, - Text.Pandoc.XML.Light.Output, - Text.Pandoc.Network.HTTP, + Text.Pandoc.TeX, + Text.Pandoc.URI, Text.Pandoc.CSS, Text.Pandoc.CSV, Text.Pandoc.RoffChar, Text.Pandoc.UUID, - Text.Pandoc.Translations, Text.Pandoc.Slides, Text.Pandoc.Image, Text.Pandoc.Citeproc.BibTeX, + Text.Pandoc.Citeproc.Name, Text.Pandoc.Citeproc.CslJson, Text.Pandoc.Citeproc.Data, Text.Pandoc.Citeproc.Locator, @@ -784,16 +737,6 @@ library autogen-modules: Paths_pandoc buildable: True -executable pandoc - import: common-executable - hs-source-dirs: app - main-is: pandoc.hs - buildable: True - other-modules: Paths_pandoc - build-depends: wai-extra >= 3.0.24, - warp, - safe - test-suite test-pandoc import: common-executable type: exitcode-stdio-1.0 @@ -806,16 +749,13 @@ test-suite test-pandoc containers >= 0.4.2.1 && < 0.7, directory >= 1.2.3 && < 1.4, doctemplates >= 0.10 && < 0.11, - exceptions >= 0.8 && < 0.11, filepath >= 1.1 && < 1.5, - hslua >= 2.1 && < 2.3, - mtl >= 2.2 && < 2.3, + mtl >= 2.2 && < 2.4, pandoc-types >= 1.22.2 && < 1.23, process >= 1.2.3 && < 1.7, tasty >= 0.11 && < 1.5, tasty-golden >= 2.3 && < 2.4, tasty-hunit >= 0.9 && < 0.11, - tasty-lua >= 1.0 && < 1.1, tasty-quickcheck >= 0.8 && < 0.11, text >= 1.1.1.0 && < 2.1, time >= 1.5 && < 1.14, @@ -824,8 +764,6 @@ test-suite test-pandoc other-modules: Tests.Old Tests.Command Tests.Helpers - Tests.Lua - Tests.Lua.Module Tests.Shared Tests.Readers.LaTeX Tests.Readers.HTML @@ -849,7 +787,7 @@ test-suite test-pandoc Tests.Readers.RST Tests.Readers.RTF Tests.Readers.Docx - Tests.Readers.Odt + Tests.Readers.ODT Tests.Readers.Txt2Tags Tests.Readers.EPUB Tests.Readers.Muse @@ -859,7 +797,7 @@ test-suite test-pandoc Tests.Readers.DokuWiki Tests.Writers.Native Tests.Writers.ConTeXt - Tests.Writers.Docbook + Tests.Writers.DocBook Tests.Writers.HTML Tests.Writers.JATS Tests.Writers.Jira @@ -886,7 +824,7 @@ benchmark benchmark-pandoc hs-source-dirs: benchmark build-depends: bytestring, tasty-bench >= 0.2 && <= 0.4, - mtl >= 2.2 && < 2.3, + mtl >= 2.2 && < 2.4, text >= 1.1.1.0 && < 2.1, deepseq -- we increase heap size to avoid benchmarking garbage collection: diff --git a/shell.nix b/shell.nix index f911f8699d74..04f468553f10 100644 --- a/shell.nix +++ b/shell.nix @@ -6,18 +6,12 @@ let haskellDeps = ps: with ps; [ Diff Glob - HTTP - HTTP - HsYAML - JuicyPixels - QuickCheck - SHA aeson aeson-pretty + array attoparsec base - base-compat - base64-bytestring + base64 binary blaze-html blaze-markup @@ -27,8 +21,6 @@ let commonmark commonmark-extensions commonmark-pandoc - conduit-extra - connection connection containers data-default @@ -40,38 +32,44 @@ let exceptions file-embed filepath + Glob + gridtables haddock-library - haskell-language-server - hsc2hs hslua + hslua-aeson + hslua-module-doclayout + hslua-module-path hslua-module-system hslua-module-text - http-client + hslua-module-version http-client http-client-tls - http-client-tls http-types ipynb jira-wiki-markup + JuicyPixels + lpeg mtl network - network network-uri pandoc-lua-marshal pandoc-types parsec + pretty + pretty-show process random safe scientific + servant-server + SHA skylighting skylighting-core - socks split - streaming-commons syb tagsoup tasty + tasty-bench tasty-golden tasty-hunit tasty-lua @@ -81,12 +79,16 @@ let text text-conversions time - tls + unicode-collation unicode-transforms - unordered-containers - weigh + unix + wai + wai-extra + warp xml xml-conduit + xml-types + yaml zip-archive zlib ]; @@ -98,8 +100,11 @@ let ghc pkgs.gdb haskellPackages.ghcid + haskellPackages.haskell-language-server haskellPackages.cabal2nix haskellPackages.cabal-install + haskellPackages.hlint + haskellPackages.stylish-haskell ]; in pkgs.stdenv.mkDerivation { diff --git a/src/Text/Pandoc.hs b/src/Text/Pandoc.hs index 239e6d5e972d..fb8a49334ac2 100644 --- a/src/Text/Pandoc.hs +++ b/src/Text/Pandoc.hs @@ -47,6 +47,8 @@ module Text.Pandoc , module Text.Pandoc.Logging -- * Typeclass , module Text.Pandoc.Class + -- * Internal data files + , module Text.Pandoc.Data -- * Error handling , module Text.Pandoc.Error -- * Readers: converting /to/ Pandoc format @@ -55,17 +57,23 @@ module Text.Pandoc , module Text.Pandoc.Writers -- * Rendering templates and default templates , module Text.Pandoc.Templates - -- * Miscellaneous + -- * Localization + , setTranslations + , translateTerm + -- * Version information , pandocVersion + , pandocVersionText ) where import Text.Pandoc.Class import Text.Pandoc.Definition +import Text.Pandoc.Data import Text.Pandoc.Error import Text.Pandoc.Generic import Text.Pandoc.Logging import Text.Pandoc.Options import Text.Pandoc.Readers -import Text.Pandoc.Shared (pandocVersion) +import Text.Pandoc.Version (pandocVersion, pandocVersionText) import Text.Pandoc.Templates +import Text.Pandoc.Translations (setTranslations, translateTerm) import Text.Pandoc.Writers diff --git a/src/Text/Pandoc/App.hs b/src/Text/Pandoc/App.hs index 66c37b0e0b9a..a87fece665dc 100644 --- a/src/Text/Pandoc/App.hs +++ b/src/Text/Pandoc/App.hs @@ -1,5 +1,3 @@ -{-# LANGUAGE TupleSections #-} -{-# LANGUAGE LambdaCase #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE CPP #-} {-# LANGUAGE ScopedTypeVariables #-} @@ -16,8 +14,11 @@ Does a pandoc conversion based on command-line options. -} module Text.Pandoc.App ( convertWithOpts + , handleOptInfo , Opt(..) + , OptInfo(..) , LineEnding(..) + , IpynbOutput (..) , Filter(..) , defaultOpts , parseOptions @@ -29,9 +30,7 @@ import qualified Control.Exception as E import Control.Monad ( (>=>), when, forM_ ) import Control.Monad.Trans ( MonadIO(..) ) import Control.Monad.Catch ( MonadMask ) -import Control.Monad.Except (throwError, catchError) -import qualified Data.ByteString as BS -import qualified Data.ByteString.Char8 as B8 +import Control.Monad.Except (throwError) import qualified Data.ByteString.Lazy as BL import Data.Maybe (fromMaybe, isJust, isNothing) import qualified Data.Set as Set @@ -39,10 +38,7 @@ import Data.Text (Text) import qualified Data.Text as T import qualified Data.Text.Lazy as TL import qualified Data.Text.Lazy.Encoding as TE -import qualified Data.Text.Encoding as TSE import qualified Data.Text.Encoding.Error as TE -import qualified Data.Text.Encoding.Error as TSE -import Network.URI (URI (..), parseURI) import System.Directory (doesDirectoryExist) import System.Exit (exitSuccess) import System.FilePath ( takeBaseName, takeExtension) @@ -51,33 +47,34 @@ import qualified System.IO as IO (Newline (..)) import Text.Pandoc import Text.Pandoc.Builder (setMeta) import Text.Pandoc.MediaBag (mediaItems) -import Text.Pandoc.MIME (getCharset, MimeType) import Text.Pandoc.Image (svgToPng) import Text.Pandoc.App.FormatHeuristics (formatFromFilePaths) import Text.Pandoc.App.Opt (Opt (..), LineEnding (..), defaultOpts, - IpynbOutput (..)) + IpynbOutput (..), OptInfo(..)) import Text.Pandoc.App.CommandLineOptions (parseOptions, parseOptionsFromArgs, - options) + options, handleOptInfo) +import Text.Pandoc.App.Input (InputParameters (..), readInput) import Text.Pandoc.App.OutputSettings (OutputSettings (..), optToOutputSettings) import Text.Collate.Lang (Lang (..), parseLang) import Text.Pandoc.Filter (Filter (JSONFilter, LuaFilter), Environment (..), applyFilters) +import qualified Text.Pandoc.Format as Format import Text.Pandoc.PDF (makePDF) +import Text.Pandoc.Scripting (ScriptingEngine (..)) import Text.Pandoc.SelfContained (makeSelfContained) -import Text.Pandoc.Shared (eastAsianLineBreakFilter, stripEmptyParagraphs, - headerShift, isURI, tabFilter, uriPathToPath, filterIpynbOutput, - defaultUserDataDir, tshow) +import Text.Pandoc.Shared (eastAsianLineBreakFilter, + headerShift, filterIpynbOutput, tshow) +import Text.Pandoc.URI (isURI) import Text.Pandoc.Writers.Shared (lookupMetaString) import Text.Pandoc.Readers.Markdown (yamlToMeta) -import Text.Pandoc.Readers.Custom (readCustom) import qualified Text.Pandoc.UTF8 as UTF8 #ifndef _WINDOWS import System.Posix.IO (stdOutput) import System.Posix.Terminal (queryTerminal) #endif -convertWithOpts :: Opt -> IO () -convertWithOpts opts = do +convertWithOpts :: ScriptingEngine -> Opt -> IO () +convertWithOpts scriptingEngine opts = do let outputFile = fromMaybe "-" (optOutputFile opts) datadir <- case optDataDir opts of Nothing -> do @@ -100,7 +97,7 @@ convertWithOpts opts = do istty <- liftIO $ queryTerminal stdOutput #endif - res <- runIO $ convertWithOpts' istty datadir opts + res <- runIO $ convertWithOpts' scriptingEngine istty datadir opts case res of Left e -> E.throwIO e Right (output, reports) -> do @@ -119,11 +116,12 @@ convertWithOpts opts = do BinaryOutput bs -> writeFnBinary outputFile bs convertWithOpts' :: (PandocMonad m, MonadIO m, MonadMask m) - => Bool + => ScriptingEngine + -> Bool -> Maybe FilePath -> Opt -> m (PandocOutput, [LogMessage]) -convertWithOpts' istty datadir opts = do +convertWithOpts' scriptingEngine istty datadir opts = do let outputFile = fromMaybe "-" (optOutputFile opts) let filters = optFilters opts let verbosity = optVerbosity opts @@ -151,8 +149,8 @@ convertWithOpts' istty datadir opts = do (map (T.pack . takeExtension) sources) "markdown" return "markdown" - let readerNameBase = T.takeWhile (\c -> c /= '+' && c /= '-') readerName - + flvrd@(Format.FlavoredFormat readerNameBase _extsDiff) <- + Format.parseFlavoredFormat readerName let makeSandboxed pureReader = let files = maybe id (:) (optReferenceDoc opts) . maybe id (:) (optEpubMetadata opts) . @@ -168,14 +166,18 @@ convertWithOpts' istty datadir opts = do (reader, readerExts) <- if ".lua" `T.isSuffixOf` readerName - then return (TextReader (readCustom (T.unpack readerName)), mempty) + then do + let scriptPath = T.unpack readerNameBase + (r, extsConf) <- engineReadCustom scriptingEngine scriptPath + rexts <- Format.applyExtensionsDiff extsConf flvrd + return (r, rexts) else if optSandbox opts - then case runPure (getReader readerName) of + then case runPure (getReader flvrd) of Left e -> throwError e Right (r, rexts) -> return (makeSandboxed r, rexts) - else getReader readerName + else getReader flvrd - outputSettings <- optToOutputSettings opts + outputSettings <- optToOutputSettings scriptingEngine opts let format = outputFormat outputSettings let writer = outputWriter outputSettings let writerName = outputWriterName outputSettings @@ -256,9 +258,6 @@ convertWithOpts' istty datadir opts = do let transforms = (case optShiftHeadingLevelBy opts of 0 -> id x -> (headerShift x :)) . - (if optStripEmptyParagraphs opts - then (stripEmptyParagraphs :) - else id) . (if extensionEnabled Ext_east_asian_line_breaks readerExts && not (extensionEnabled Ext_east_asian_line_breaks @@ -280,14 +279,6 @@ convertWithOpts' istty datadir opts = do _ -> Format format) :)) $ [] - let convertTabs = tabFilter (if optPreserveTabs opts || - readerNameBase == "t2t" || - readerNameBase == "man" || - readerNameBase == "tsv" - then 0 - else optTabStop opts) - - when (readerNameBase == "markdown_github" || writerNameBase == "markdown_github") $ report $ Deprecated "markdown_github" "Use gfm instead." @@ -313,28 +304,23 @@ convertWithOpts' istty datadir opts = do let filterEnv = Environment readerOpts writerOptions - inputs <- readSources sources - - doc <- (case reader of - TextReader r - | readerNameBase == "json" -> - mconcat <$> - mapM (inputToText convertTabs - >=> r readerOpts . (:[])) inputs - | optFileScope opts -> - mconcat <$> mapM - (inputToText convertTabs - >=> r readerOpts . (:[])) - inputs - | otherwise -> mapM (inputToText convertTabs) inputs - >>= r readerOpts - ByteStringReader r -> - mconcat <$> mapM (r readerOpts . inputToLazyByteString) inputs) + let inputParams = InputParameters + { inputReader = reader + , inputReaderName = readerNameBase + , inputReaderOptions = readerOpts + , inputSources = sources + , inputFileScope = optFileScope opts + , inputSpacesPerTab = if optPreserveTabs opts + then Nothing + else Just (optTabStop opts) + } + + doc <- readInput inputParams >>= ( return . adjustMetadata (metadataFromFile <>) >=> return . adjustMetadata (<> optMetadata opts) >=> return . adjustMetadata (<> cslMetadata) >=> applyTransforms transforms - >=> applyFilters filterEnv filters [T.unpack format] + >=> applyFilters scriptingEngine filterEnv filters [T.unpack format] >=> (if not (optSandbox opts) && (isJust (optExtractMedia opts) || writerNameBase == "docx") -- for fallback pngs @@ -400,64 +386,6 @@ adjustMetadata f (Pandoc meta bs) = Pandoc (f meta) bs applyTransforms :: Monad m => [Transform] -> Pandoc -> m Pandoc applyTransforms transforms d = return $ foldr ($) d transforms -readSources :: PandocMonad m - => [FilePath] -> m [(FilePath, (BS.ByteString, Maybe MimeType))] -readSources srcs = - mapM (\fp -> do t <- readSource fp - return (if fp == "-" then "" else fp, t)) srcs - -readSource :: PandocMonad m - => FilePath -> m (BS.ByteString, Maybe MimeType) -readSource "-" = (,Nothing) <$> readStdinStrict -readSource src = - case parseURI src of - Just u | uriScheme u `elem` ["http:","https:"] -> openURL (T.pack src) - | uriScheme u == "file:" -> - (,Nothing) <$> - readFileStrict (uriPathToPath $ T.pack $ uriPath u) - _ -> (,Nothing) <$> readFileStrict src - -utf8ToText :: PandocMonad m => FilePath -> BS.ByteString -> m Text -utf8ToText fp bs = - case TSE.decodeUtf8' . dropBOM $ bs of - Left (TSE.DecodeError _ (Just w)) -> - case BS.elemIndex w bs of - Just offset -> throwError $ PandocUTF8DecodingError (T.pack fp) offset w - Nothing -> throwError $ PandocUTF8DecodingError (T.pack fp) 0 w - Left e -> throwError $ PandocAppError (tshow e) - Right t -> return t - where - dropBOM bs' = - if "\xEF\xBB\xBF" `BS.isPrefixOf` bs' - then BS.drop 3 bs' - else bs' - - -inputToText :: PandocMonad m - => (Text -> Text) - -> (FilePath, (BS.ByteString, Maybe MimeType)) - -> m (FilePath, Text) -inputToText convTabs (fp, (bs,mt)) = - (fp,) . convTabs . T.filter (/='\r') <$> - case mt >>= getCharset of - Just "UTF-8" -> utf8ToText fp bs - Just "ISO-8859-1" -> return $ T.pack $ B8.unpack bs - Just charset -> throwError $ PandocUnsupportedCharsetError charset - Nothing -> catchError - (utf8ToText fp bs) - (\case - PandocUTF8DecodingError{} -> do - report $ NotUTF8Encoded - (if null fp - then "input" - else fp) - return $ T.pack $ B8.unpack bs - e -> throwError e) - -inputToLazyByteString :: (FilePath, (BS.ByteString, Maybe MimeType)) - -> BL.ByteString -inputToLazyByteString (_, (bs,_)) = BL.fromStrict bs - writeFnBinary :: FilePath -> BL.ByteString -> IO () writeFnBinary "-" = BL.putStr writeFnBinary f = BL.writeFile (UTF8.encodePath f) diff --git a/src/Text/Pandoc/App/CommandLineOptions.hs b/src/Text/Pandoc/App/CommandLineOptions.hs index 27300f369bf6..4e581e187b6a 100644 --- a/src/Text/Pandoc/App/CommandLineOptions.hs +++ b/src/Text/Pandoc/App/CommandLineOptions.hs @@ -1,11 +1,9 @@ {-# LANGUAGE CPP #-} -{-# LANGUAGE TemplateHaskell #-} {-# LANGUAGE LambdaCase #-} {-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE TupleSections #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE FlexibleContexts #-} -{-# LANGUAGE TypeApplications #-} {- | Module : Text.Pandoc.App.CommandLineOptions Copyright : Copyright (C) 2006-2022 John MacFarlane @@ -20,13 +18,14 @@ Does a pandoc conversion based on command-line options. module Text.Pandoc.App.CommandLineOptions ( parseOptions , parseOptionsFromArgs + , handleOptInfo , options , engines , setVariable ) where -import Control.Monad import Control.Monad.Trans import Control.Monad.State.Strict +import Data.Containers.ListUtils (nubOrd) import Data.Aeson.Encode.Pretty (encodePretty', Config(..), keyOrder, defConfig, Indent(..), NumberFormat(..)) import Data.Bifunctor (second) @@ -37,7 +36,6 @@ import Data.List (isPrefixOf) #endif import Data.Maybe (fromMaybe, isJust) import Data.Text (Text) -import HsLua (Exception, getglobal, openlibs, peek, run, top) import Safe (tailDef) import Skylighting (Syntax (..), defaultSyntaxMap) import System.Console.GetOpt @@ -50,49 +48,30 @@ import Text.Pandoc import Text.Pandoc.Builder (setMeta) import Text.Pandoc.App.Opt (Opt (..), LineEnding (..), IpynbOutput (..), DefaultsState (..), applyDefaults, - fullDefaultsPath) + fullDefaultsPath, OptInfo(..)) import Text.Pandoc.Filter (Filter (..)) import Text.Pandoc.Highlighting (highlightingStyles, lookupHighlightingStyle) -import Text.Pandoc.Shared (ordNub, elemText, safeStrRead, defaultUserDataDir) +import Text.Pandoc.Shared (safeStrRead) import Text.Printf - -#ifdef EMBED_DATA_FILES -import Text.Pandoc.Data (dataFiles) -#else -import Paths_pandoc (getDataDir) -import System.Directory (getDirectoryContents) -#endif - import qualified Control.Exception as E +import Control.Monad.Except (ExceptT(..), runExceptT, throwError) import qualified Data.ByteString as BS import qualified Data.ByteString.Lazy as B import qualified Data.Map as M import qualified Data.Text as T import qualified Text.Pandoc.UTF8 as UTF8 +import Text.Pandoc.Scripting (ScriptingEngine(..)) -#ifdef NIGHTLY -import qualified Language.Haskell.TH as TH -import Data.Time -#endif - -#ifdef NIGHTLY -versionSuffix :: String -versionSuffix = "-nightly-" ++ - $(TH.stringE =<< - TH.runIO (formatTime defaultTimeLocale "%F" <$> Data.Time.getCurrentTime)) -#else -versionSuffix :: String -versionSuffix = "" -#endif - -parseOptions :: [OptDescr (Opt -> IO Opt)] -> Opt -> IO Opt +parseOptions :: [OptDescr (Opt -> ExceptT OptInfo IO Opt)] + -> Opt -> IO (Either OptInfo Opt) parseOptions options' defaults = do - rawArgs <- map UTF8.decodeArg <$> getArgs - prg <- getProgName + rawArgs <- map UTF8.decodeArg <$> liftIO getArgs + prg <- liftIO getProgName parseOptionsFromArgs options' defaults prg rawArgs parseOptionsFromArgs - :: [OptDescr (Opt -> IO Opt)] -> Opt -> String -> [String] -> IO Opt + :: [OptDescr (Opt -> ExceptT OptInfo IO Opt)] + -> Opt -> String -> [String] -> IO (Either OptInfo Opt) parseOptionsFromArgs options' defaults prg rawArgs = do let (actions, args, unrecognizedOpts, errors) = getOpt' Permute options' (map UTF8.decodeArg rawArgs) @@ -101,17 +80,12 @@ parseOptionsFromArgs options' defaults prg rawArgs = do foldr (handleUnrecognizedOption . takeWhile (/= '=')) [] unrecognizedOpts - unless (null errors && null unknownOptionErrors) $ - E.throwIO $ PandocOptionError $ T.pack $ - concat errors ++ unlines unknownOptionErrors ++ - ("Try " ++ prg ++ " --help for more information.") - - -- thread option data structure through all supplied option actions - opts <- foldl' (>>=) (return defaults) actions let mbArgs = case args of [] -> Nothing xs -> Just xs - return $ opts{ optInputFiles = + + let adjustOpts opts = + opts{ optInputFiles = map normalizePath <$> (optInputFiles opts <> mbArgs) , optStandalone = -- certain other options imply standalone optStandalone opts || @@ -121,6 +95,108 @@ parseOptionsFromArgs options' defaults prg rawArgs = do not (null (optIncludeBeforeBody opts)) || not (null (optIncludeAfterBody opts)) } + if (null errors && null unknownOptionErrors) + then -- thread option data structure through all supplied option actions + runExceptT $ adjustOpts <$> (foldl' (>>=) (return defaults) actions) + else return $ Left $ OptError $ PandocOptionError $ T.pack $ + concat errors ++ unlines unknownOptionErrors ++ + ("Try " ++ prg ++ " --help for more information.") + +-- | React to an 'OptInfo' by printing the requested information +-- and exiting or (if there was a parsing error) raising an error. +handleOptInfo :: ScriptingEngine -> OptInfo -> IO () +handleOptInfo _engine info = E.handle (handleError . Left) $ do + case info of + BashCompletion -> do + datafiles <- getDataFileNames + tpl <- runIOorExplode $ + UTF8.toString <$> + readDefaultDataFile "bash_completion.tpl" + let optnames (Option shorts longs _ _) = + map (\c -> ['-',c]) shorts ++ + map ("--" ++) longs + let allopts = unwords (concatMap optnames options) + UTF8.hPutStrLn stdout $ T.pack $ printf tpl allopts + (T.unpack $ T.unwords readersNames) + (T.unpack $ T.unwords writersNames) + (T.unpack $ T.unwords $ map fst highlightingStyles) + (unwords datafiles) + ListInputFormats -> mapM_ (UTF8.hPutStrLn stdout) readersNames + ListOutputFormats -> mapM_ (UTF8.hPutStrLn stdout) writersNames + ListExtensions mbfmt -> do + let formatName = fromMaybe "markdown" mbfmt + let allExts = getAllExtensions formatName + if formatName `notElem` + (map fst (readers :: [(Text, Reader PandocPure)]) ++ + map fst (writers :: [(Text, Writer PandocPure)])) + then E.throwIO $ PandocOptionError $ formatName <> + " is not a recognized reader or writer format" + else do + let defExts = getDefaultExtensions formatName + let showExt x = + (if extensionEnabled x defExts + then '+' + else if extensionEnabled x allExts + then '-' + else ' ') : drop 4 (show x) + mapM_ (UTF8.hPutStrLn stdout . T.pack . showExt) + (extensionsToList allExts) + ListHighlightLanguages -> do + let langs = [ T.unpack (T.toLower (sShortname s)) + | s <- M.elems defaultSyntaxMap + , sShortname s `notElem` + [T.pack "Alert", T.pack "Alert_indent"] + ] + mapM_ (UTF8.hPutStrLn stdout . T.pack) (sort langs) + ListHighlightStyles -> do + mapM_ (UTF8.hPutStrLn stdout . fst) highlightingStyles + PrintDefaultTemplate mbout fmt -> do + let write = maybe (UTF8.hPutStr stdout) (UTF8.writeFile) mbout + templ <- runIO $ do + setUserDataDir Nothing + getDefaultTemplate fmt + case templ of + Right t + | T.null t -> -- e.g. for docx, odt, json: + E.throwIO $ PandocCouldNotFindDataFileError $ T.pack + ("templates/default." ++ T.unpack fmt) + | otherwise -> write t + Left e -> E.throwIO e + PrintDefaultDataFile mbout f -> do + let write = maybe BS.putStr BS.writeFile mbout + runIOorExplode $ readDefaultDataFile (T.unpack f) >>= liftIO . write + PrintHighlightStyle mbout styleName -> do + let write = maybe B.putStr B.writeFile mbout + sty <- runIOorExplode $ lookupHighlightingStyle (T.unpack styleName) + write $ encodePretty' + defConfig{confIndent = Spaces 4 + ,confCompare = keyOrder + (map T.pack + ["text-color" + ,"background-color" + ,"line-number-color" + ,"line-number-background-color" + ,"bold" + ,"italic" + ,"underline" + ,"text-styles"]) + ,confNumFormat = Generic + ,confTrailingNewline = True} sty + VersionInfo -> do + prg <- getProgName + defaultDatadir <- defaultUserDataDir + UTF8.hPutStrLn stdout + $ T.pack + $ prg ++ " " ++ T.unpack pandocVersionText ++ + compileInfo ++ + "\nUser data directory: " ++ defaultDatadir ++ + ('\n':copyrightMessage) + Help -> do + prg <- getProgName + UTF8.hPutStr stdout (T.pack $ usageMessage prg options) + OptError e -> E.throwIO e + exitSuccess + -- | Supported LaTeX engines; the first item is used as default engine -- when going through LaTeX. latexEngines :: [String] @@ -141,11 +217,11 @@ engines = map ("html",) htmlEngines ++ ] pdfEngines :: [String] -pdfEngines = ordNub $ map snd engines +pdfEngines = nubOrd $ map snd engines -- | A list of functions, each transforming the options data structure -- in response to a command-line option. -options :: [OptDescr (Opt -> IO Opt)] +options :: [OptDescr (Opt -> ExceptT OptInfo IO Opt)] options = [ Option "fr" ["from","read"] (ReqArg @@ -192,11 +268,16 @@ options = , Option "d" ["defaults"] (ReqArg - (\arg opt -> runIOorExplode $ do - let defsState = DefaultsState { curDefaults = Nothing, - inheritanceGraph = [] } - fp <- fullDefaultsPath (optDataDir opt) arg - evalStateT (applyDefaults opt fp) defsState + (\arg opt -> do + res <- liftIO $ runIO $ do + let defsState = + DefaultsState { curDefaults = Nothing, + inheritanceGraph = [] } + fp <- fullDefaultsPath (optDataDir opt) arg + evalStateT (applyDefaults opt fp) defsState + case res of + Left e -> optError e + Right x -> return x ) "FILE") "" @@ -240,7 +321,7 @@ options = "auto" -> return opt{ optWrap = WrapAuto } "none" -> return opt{ optWrap = WrapNone } "preserve" -> return opt{ optWrap = WrapPreserve } - _ -> E.throwIO $ PandocOptionError + _ -> optError $ PandocOptionError "--wrap must be auto, none, or preserve") "auto|none|preserve") "" -- "Option for wrapping text in output" @@ -261,7 +342,7 @@ options = case safeStrRead arg of Just t | t >= 1 && t <= 6 -> return opt { optTOCDepth = t } - _ -> E.throwIO $ PandocOptionError + _ -> optError $ PandocOptionError "TOC level must be a number 1-6") "NUMBER") "" -- "Number of levels to include in TOC" @@ -277,7 +358,7 @@ options = case safeStrRead ("[" <> arg <> "]") of Just ns -> return opt { optNumberOffset = ns, optNumberSections = True } - _ -> E.throwIO $ PandocOptionError + _ -> optError $ PandocOptionError "could not parse number-offset") "NUMBERS") "" -- "Starting number for sections, subsections, etc." @@ -294,7 +375,7 @@ options = TopLevelPart } "default" -> return opt{ optTopLevelDivision = TopLevelDefault } - _ -> E.throwIO $ PandocOptionError $ + _ -> optError $ PandocOptionError $ "Top-level division must be " <> "section, chapter, part, or default" ) "section|chapter|part") @@ -366,7 +447,7 @@ options = (\arg opt -> case safeStrRead arg of Just t | t > 0 -> return opt { optDpi = t } - _ -> E.throwIO $ PandocOptionError + _ -> optError $ PandocOptionError "dpi must be a number greater than 0") "NUMBER") "" -- "Dpi (default 96)" @@ -379,7 +460,7 @@ options = "lf" -> return opt { optEol = LF } "native" -> return opt { optEol = Native } -- mac-syntax (cr) is not supported in ghc-base. - _ -> E.throwIO $ PandocOptionError + _ -> optError $ PandocOptionError "--eol must be crlf, lf, or native") "crlf|lf|native") "" -- "EOL (default OS-dependent)" @@ -389,7 +470,7 @@ options = (\arg opt -> case safeStrRead arg of Just t | t > 0 -> return opt { optColumns = t } - _ -> E.throwIO $ PandocOptionError + _ -> optError $ PandocOptionError "columns must be a number greater than 0") "NUMBER") "" -- "Length of line in characters" @@ -404,7 +485,7 @@ options = (\arg opt -> case safeStrRead arg of Just t | t > 0 -> return opt { optTabStop = t } - _ -> E.throwIO $ PandocOptionError + _ -> optError $ PandocOptionError "tab-stop must be a number greater than 0") "NUMBER") "" -- "Tab stop (default 4)" @@ -415,7 +496,9 @@ options = let b = takeBaseName arg if b `elem` pdfEngines then return opt { optPdfEngine = Just arg } - else E.throwIO $ PandocOptionError $ T.pack $ "pdf-engine must be one of " + else optError $ + PandocOptionError $ T.pack $ + "pdf-engine must be one of " ++ intercalate ", " pdfEngines) "PROGRAM") "" -- "Name of program to use in generating PDF" @@ -502,7 +585,7 @@ options = case safeStrRead arg of Just t -> return opt{ optShiftHeadingLevelBy = t } - _ -> E.throwIO $ PandocOptionError + _ -> optError $ PandocOptionError "shift-heading-level-by takes an integer argument") "NUMBER") "" -- "Shift heading level" @@ -515,19 +598,11 @@ options = case safeStrRead arg of Just t | t > 0 && t < 6 -> return opt{ optShiftHeadingLevelBy = t - 1 } - _ -> E.throwIO $ PandocOptionError + _ -> optError $ PandocOptionError "base-header-level must be 1-5") "NUMBER") "" -- "Headers base level" - , Option "" ["strip-empty-paragraphs"] - (NoArg - (\opt -> do - deprecatedOption "--strip-empty-paragraphs" - "Use +empty_paragraphs extension." - return opt{ optStripEmptyParagraphs = True })) - "" -- "Strip empty paragraphs" - , Option "" ["track-changes"] (ReqArg (\arg opt -> do @@ -535,7 +610,7 @@ options = "accept" -> return AcceptChanges "reject" -> return RejectChanges "all" -> return AllChanges - _ -> E.throwIO $ PandocOptionError $ T.pack + _ -> optError $ PandocOptionError $ T.pack ("Unknown option for track-changes: " ++ arg) return opt { optTrackChanges = action }) "accept|reject|all") @@ -558,27 +633,19 @@ options = "block" -> return EndOfBlock "section" -> return EndOfSection "document" -> return EndOfDocument - _ -> E.throwIO $ PandocOptionError $ T.pack + _ -> optError $ PandocOptionError $ T.pack ("Unknown option for reference-location: " ++ arg) return opt { optReferenceLocation = action }) "block|section|document") "" -- "Accepting or reject MS Word track-changes."" - , Option "" ["atx-headers"] - (NoArg - (\opt -> do - deprecatedOption "--atx-headers" - "Use --markdown-headings=atx instead." - return opt { optSetextHeaders = False } )) - "" -- "Use atx-style headers for markdown" - , Option "" ["markdown-headings"] (ReqArg (\arg opt -> do headingFormat <- case arg of "setext" -> pure True "atx" -> pure False - _ -> E.throwIO $ PandocOptionError $ T.pack + _ -> optError $ PandocOptionError $ T.pack ("Unknown markdown heading format: " ++ arg ++ ". Expecting atx or setext") pure opt { optSetextHeaders = headingFormat } @@ -586,6 +653,12 @@ options = "setext|atx") "" + , Option "" ["list-tables"] + (NoArg + (\opt -> do + return opt { optListTables = True } )) + "" -- "Use list tables for RST" + , Option "" ["listings"] (NoArg (\opt -> return opt { optListings = True })) @@ -602,7 +675,7 @@ options = case safeStrRead arg of Just t | t >= 0 && t <= 6 -> return opt { optSlideLevel = Just t } - _ -> E.throwIO $ PandocOptionError + _ -> optError $ PandocOptionError "slide level must be a number between 0 and 6") "NUMBER") "" -- "Force header level for slides" @@ -625,7 +698,7 @@ options = "references" -> return ReferenceObfuscation "javascript" -> return JavascriptObfuscation "none" -> return NoObfuscation - _ -> E.throwIO $ PandocOptionError $ T.pack + _ -> optError $ PandocOptionError $ T.pack ("Unknown obfuscation method: " ++ arg) return opt { optEmailObfuscation = method }) "none|javascript|references") @@ -693,7 +766,7 @@ options = case safeStrRead arg of Just t | t >= 1 && t <= 6 -> return opt { optEpubChapterLevel = t } - _ -> E.throwIO $ PandocOptionError + _ -> optError $ PandocOptionError "chapter level must be a number between 1 and 6") "NUMBER") "" -- "Header level at which to split chapters in EPUB" @@ -705,7 +778,7 @@ options = "all" -> return opt{ optIpynbOutput = IpynbOutputAll } "best" -> return opt{ optIpynbOutput = IpynbOutputBest } "none" -> return opt{ optIpynbOutput = IpynbOutputNone } - _ -> E.throwIO $ PandocOptionError + _ -> optError $ PandocOptionError "ipynb-output must be all, none, or best") "all|none|best") "" -- "Starting number for sections, subsections, etc." @@ -727,10 +800,12 @@ options = , Option "" ["csl"] (ReqArg - (\arg opt -> - return opt{ optMetadata = - addMeta "csl" (normalizePath arg) $ - optMetadata opt }) + (\arg opt -> do + case lookupMeta (T.pack "csl") $ optMetadata opt of + Just _ -> optError $ PandocOptionError + "Only one CSL file can be specified." + Nothing -> return opt{ optMetadata = addMeta "csl" (normalizePath arg) $ + optMetadata opt }) "FILE") "" @@ -828,181 +903,69 @@ options = "" -- "Log messages in JSON format to this file." , Option "" ["bash-completion"] - (NoArg - (\_ -> do - datafiles <- getDataFileNames - tpl <- runIOorExplode $ - UTF8.toString <$> - readDefaultDataFile "bash_completion.tpl" - let optnames (Option shorts longs _ _) = - map (\c -> ['-',c]) shorts ++ - map ("--" ++) longs - let allopts = unwords (concatMap optnames options) - UTF8.hPutStrLn stdout $ T.pack $ printf tpl allopts - (T.unpack $ T.unwords readersNames) - (T.unpack $ T.unwords writersNames) - (T.unpack $ T.unwords $ map fst highlightingStyles) - (unwords datafiles) - exitSuccess )) + (NoArg (\_ -> optInfo BashCompletion)) "" -- "Print bash completion script" , Option "" ["list-input-formats"] - (NoArg - (\_ -> do - mapM_ (UTF8.hPutStrLn stdout) readersNames - exitSuccess )) + (NoArg (\_ -> optInfo ListInputFormats)) "" , Option "" ["list-output-formats"] - (NoArg - (\_ -> do - mapM_ (UTF8.hPutStrLn stdout) writersNames - exitSuccess )) + (NoArg (\_ -> optInfo ListOutputFormats)) "" , Option "" ["list-extensions"] - (OptArg - (\arg _ -> do - let extList :: [Extension] - extList = [minBound..maxBound] - let allExts = - case arg of - Nothing -> extensionsFromList extList - Just fmt -> getAllExtensions $ T.pack fmt - let formatName = maybe "markdown" T.pack arg - if formatName `notElem` - (map fst (readers :: [(Text, Reader PandocPure)]) ++ - map fst (writers :: [(Text, Writer PandocPure)])) - then E.throwIO $ PandocOptionError $ formatName <> - " is not a recognized reader or writer format" - else do - let defExts = getDefaultExtensions formatName - let showExt x = - (if extensionEnabled x defExts - then '+' - else if extensionEnabled x allExts - then '-' - else ' ') : drop 4 (show x) - mapM_ (UTF8.hPutStrLn stdout . T.pack . showExt) - [ex | ex <- extList, extensionEnabled ex allExts] - exitSuccess ) - "FORMAT") + (OptArg (\arg _ -> optInfo $ ListExtensions $ T.pack <$> arg) + "FORMAT") "" , Option "" ["list-highlight-languages"] - (NoArg - (\_ -> do - let langs = [ T.unpack (T.toLower (sShortname s)) - | s <- M.elems defaultSyntaxMap - , sShortname s `notElem` - [T.pack "Alert", T.pack "Alert_indent"] - ] - mapM_ (UTF8.hPutStrLn stdout . T.pack) (sort langs) - exitSuccess )) + (NoArg (\_ -> optInfo ListHighlightLanguages)) "" , Option "" ["list-highlight-styles"] - (NoArg - (\_ -> do - mapM_ (UTF8.hPutStrLn stdout . fst) highlightingStyles - exitSuccess )) + (NoArg (\_ -> optInfo ListHighlightStyles)) "" , Option "D" ["print-default-template"] (ReqArg - (\arg opt -> do - let write = case optOutputFile opt of - Just f -> UTF8.writeFile f - Nothing -> UTF8.hPutStr stdout - templ <- runIO $ do - setUserDataDir Nothing - getDefaultTemplate (T.pack arg) - case templ of - Right t - | T.null t -> -- e.g. for docx, odt, json: - E.throwIO $ PandocCouldNotFindDataFileError $ T.pack - ("templates/default." ++ arg) - | otherwise -> write t - Left e -> E.throwIO e - exitSuccess) - "FORMAT") + (\arg opts -> optInfo $ + PrintDefaultTemplate (optOutputFile opts) (T.pack arg)) + "FORMAT") "" -- "Print default template for FORMAT" , Option "" ["print-default-data-file"] (ReqArg - (\arg opt -> do - let write = case optOutputFile opt of - Just f -> BS.writeFile f - Nothing -> BS.hPutStr stdout - runIOorExplode $ - readDefaultDataFile arg >>= liftIO . write - exitSuccess) - "FILE") + (\arg opts -> optInfo $ + PrintDefaultDataFile (optOutputFile opts) (T.pack arg)) + "FILE") "" -- "Print default data file" , Option "" ["print-highlight-style"] (ReqArg - (\arg opt -> do - let write = maybe B.putStr B.writeFile $ optOutputFile opt - sty <- runIOorExplode $ lookupHighlightingStyle arg - write $ encodePretty' - defConfig{confIndent = Spaces 4 - ,confCompare = keyOrder - (map T.pack - ["text-color" - ,"background-color" - ,"line-number-color" - ,"line-number-background-color" - ,"bold" - ,"italic" - ,"underline" - ,"text-styles"]) - ,confNumFormat = Generic - ,confTrailingNewline = True} sty - exitSuccess) + (\arg opts -> + optInfo $ PrintDefaultDataFile (optOutputFile opts) + (T.pack arg)) "STYLE|FILE") "" -- "Print default template for FORMAT" - , Option "v" ["version"] - (NoArg - (\_ -> do - prg <- getProgName - defaultDatadir <- defaultUserDataDir - luaVersion <- HsLua.run @HsLua.Exception $ do - openlibs - getglobal "_VERSION" - peek top - UTF8.hPutStrLn stdout - $ T.pack - $ prg ++ " " ++ T.unpack pandocVersion ++ versionSuffix ++ - compileInfo ++ "\nScripting engine: " ++ luaVersion ++ - "\nUser data directory: " ++ defaultDatadir ++ - ('\n':copyrightMessage) - exitSuccess )) + (NoArg (\_ -> optInfo VersionInfo)) "" -- "Print version" , Option "h" ["help"] - (NoArg - (\_ -> do - prg <- getProgName - UTF8.hPutStr stdout (T.pack $ usageMessage prg options) - exitSuccess )) + (NoArg (\_ -> optInfo Help)) "" -- "Show help" ] -getDataFileNames :: IO [FilePath] -getDataFileNames = do -#ifdef EMBED_DATA_FILES - let allDataFiles = map fst dataFiles -#else - allDataFiles <- filter (\x -> x /= "." && x /= "..") <$> - (getDataDir >>= getDirectoryContents) -#endif - return $ "reference.docx" : "reference.odt" : "reference.pptx" : allDataFiles +optError :: PandocError -> ExceptT OptInfo IO a +optError = throwError . OptError + +optInfo :: OptInfo -> ExceptT OptInfo IO a +optInfo = throwError -- Returns usage message -usageMessage :: String -> [OptDescr (Opt -> IO Opt)] -> String +usageMessage :: String -> [OptDescr (Opt -> ExceptT OptInfo IO Opt)] -> String usageMessage programName = usageInfo (programName ++ " [OPTIONS] [FILES]") copyrightMessage :: String @@ -1016,7 +979,7 @@ compileInfo = "\nCompiled with pandoc-types " ++ VERSION_pandoc_types ++ ", texmath " ++ VERSION_texmath ++ ", skylighting " ++ VERSION_skylighting ++ ",\nciteproc " ++ VERSION_citeproc ++ - ", ipynb " ++ VERSION_ipynb ++ ", hslua " ++ VERSION_hslua + ", ipynb " ++ VERSION_ipynb handleUnrecognizedOption :: String -> [String] -> [String] handleUnrecognizedOption "--smart" = @@ -1055,14 +1018,14 @@ writersNames = sort ("pdf" : map fst (writers :: [(Text, Writer PandocIO)])) splitField :: String -> (String, String) -splitField = second (tailDef "true") . break (`elemText` ":=") +splitField = second (tailDef "true") . break (\c -> c == ':' || c == '=') -deprecatedOption :: String -> String -> IO () -deprecatedOption o msg = - runIO (report $ Deprecated (T.pack o) (T.pack msg)) >>= - \case +deprecatedOption :: String -> String -> ExceptT OptInfo IO () +deprecatedOption o msg = do + res <- liftIO $ runIO (report $ Deprecated (T.pack o) (T.pack msg)) + case res of Right () -> return () - Left e -> E.throwIO e + Left e -> optError e -- | Set text value in text context. setVariable :: Text -> Text -> Context Text -> Context Text diff --git a/src/Text/Pandoc/App/Input.hs b/src/Text/Pandoc/App/Input.hs new file mode 100644 index 000000000000..fa9c6cf880a2 --- /dev/null +++ b/src/Text/Pandoc/App/Input.hs @@ -0,0 +1,192 @@ +{-# LANGUAGE LambdaCase #-} +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE TupleSections #-} +{- | + Module : Text.Pandoc.App.Input + Copyright : © 2006-2022 John MacFarlane + License : GPL-2.0-or-later + Maintainer : John MacFarlane + +Read from the file system into a pandoc document. +-} +module Text.Pandoc.App.Input + ( InputParameters (..) + , readInput + ) where + +import Control.Monad ((>=>)) +import Control.Monad.Except (throwError, catchError) +import Data.Text (Text) +import Network.URI (URI (..), parseURI) +import Text.Pandoc.Class ( PandocMonad, openURL + , readFileStrict, readStdinStrict, report) +import Text.Pandoc.Definition (Pandoc (..), Attr, Block (..), Inline (..)) +import Text.Pandoc.Error (PandocError (..)) +import Text.Pandoc.Logging (LogMessage (..)) +import Text.Pandoc.MIME (getCharset, MimeType) +import Text.Pandoc.Options (Extensions, ReaderOptions (..)) +import Text.Pandoc.Readers (Reader (..)) +import Text.Pandoc.Shared (tabFilter, textToIdentifier, tshow) +import Text.Pandoc.URI (uriPathToPath) +import Text.Pandoc.Walk (walk) +import qualified Data.ByteString as BS +import qualified Data.ByteString.Char8 as B8 +import qualified Data.ByteString.Lazy as BL +import qualified Data.Text as T +import qualified Data.Text.Encoding as TSE +import qualified Data.Text.Encoding.Error as TSE + +-- | Settings specifying how and which input should be processed. +data InputParameters m = InputParameters + { inputReader :: Reader m + , inputReaderName :: Text + , inputReaderOptions :: ReaderOptions + , inputSources :: [FilePath] + , inputSpacesPerTab :: Maybe Int + , inputFileScope :: Bool + } + +-- | Read all input into a pandoc document. +readInput :: PandocMonad m => InputParameters m -> m Pandoc +readInput params = do + let sources = inputSources params + let readerName = inputReaderName params + let readerOpts = inputReaderOptions params + let convertTabs :: Text -> Text + convertTabs = tabFilter $ case inputSpacesPerTab params of + Nothing -> 0 + Just ts -> if readerName `elem` ["t2t", "man", "tsv"] + then 0 + else ts + + inputs <- readSources sources + + case inputReader params of + TextReader r + | readerName == "json" -> + mconcat <$> mapM (inputToText convertTabs >=> r readerOpts . (:[])) + inputs + | inputFileScope params -> + mconcat <$> mapM + (\source -> do + (fp, txt) <- inputToText convertTabs source + adjustLinksAndIds (readerExtensions readerOpts) + (T.pack fp) (map (T.pack . fst) inputs) + <$> r readerOpts [(fp, txt)]) + inputs + | otherwise -> mapM (inputToText convertTabs) inputs >>= r readerOpts + ByteStringReader r -> + mconcat <$> mapM (r readerOpts . inputToLazyByteString) inputs + +readSources :: PandocMonad m + => [FilePath] -> m [(FilePath, (BS.ByteString, Maybe MimeType))] +readSources srcs = + mapM (\fp -> do t <- readSource fp + return (if fp == "-" then "" else fp, t)) srcs + +-- | Read input from a resource, i.e., either a file, a URL, or stdin +-- (@-@). +readSource :: PandocMonad m + => FilePath -> m (BS.ByteString, Maybe MimeType) +readSource "-" = (,Nothing) <$> readStdinStrict +readSource src = + case parseURI src of + Just u | uriScheme u `elem` ["http:","https:"] -> openURL (T.pack src) + | uriScheme u == "file:" -> + (,Nothing) <$> + readFileStrict (uriPathToPath $ T.pack $ uriPath u) + _ -> (,Nothing) <$> readFileStrict src + +utf8ToText :: PandocMonad m => FilePath -> BS.ByteString -> m Text +utf8ToText fp bs = + case TSE.decodeUtf8' . dropBOM $ bs of + Left (TSE.DecodeError _ (Just w)) -> + case BS.elemIndex w bs of + Just offset -> throwError $ PandocUTF8DecodingError (T.pack fp) offset w + Nothing -> throwError $ PandocUTF8DecodingError (T.pack fp) 0 w + Left e -> throwError $ PandocAppError (tshow e) + Right t -> return t + where + dropBOM bs' = + if "\xEF\xBB\xBF" `BS.isPrefixOf` bs' + then BS.drop 3 bs' + else bs' + +inputToText :: PandocMonad m + => (Text -> Text) + -> (FilePath, (BS.ByteString, Maybe MimeType)) + -> m (FilePath, Text) +inputToText convTabs (fp, (bs,mt)) = + (fp,) . convTabs . T.filter (/='\r') <$> + case mt >>= getCharset of + Just "UTF-8" -> utf8ToText fp bs + Just "ISO-8859-1" -> return $ T.pack $ B8.unpack bs + Just charset -> throwError $ PandocUnsupportedCharsetError charset + Nothing -> catchError + (utf8ToText fp bs) + (\case + PandocUTF8DecodingError{} -> do + report $ NotUTF8Encoded + (if null fp + then "input" + else fp) + return $ T.pack $ B8.unpack bs + e -> throwError e) + +inputToLazyByteString :: (FilePath, (BS.ByteString, Maybe MimeType)) + -> BL.ByteString +inputToLazyByteString (_, (bs,_)) = BL.fromStrict bs + +adjustLinksAndIds :: Extensions -> Text -> [Text] -> Pandoc -> Pandoc +adjustLinksAndIds exts thisfile allfiles + | length allfiles > 1 = addDiv . walk fixInline . walk fixBlock + | otherwise = id + where + toIdent :: Text -> Text + toIdent = textToIdentifier exts . T.intercalate "__" . + T.split (\c -> c == '/' || c == '\\') + + addDiv :: Pandoc -> Pandoc + addDiv (Pandoc m bs) + | T.null thisfile = Pandoc m bs + | otherwise = Pandoc m [Div (toIdent thisfile,[],[]) bs] + + fixBlock :: Block -> Block + fixBlock (CodeBlock attr t) = CodeBlock (fixAttrs attr) t + fixBlock (Header lev attr ils) = Header lev (fixAttrs attr) ils + fixBlock (Table attr cap cols th tbs tf) = + Table (fixAttrs attr) cap cols th tbs tf + fixBlock (Div attr bs) = Div (fixAttrs attr) bs + fixBlock x = x + + -- add thisfile as prefix of identifier + fixAttrs :: Attr -> Attr + fixAttrs (i,cs,kvs) + | T.null i = (i,cs,kvs) + | otherwise = + (T.intercalate "__" + (filter (not . T.null) [toIdent thisfile, i]), + cs, kvs) + + -- if URL begins with file from allfiles, convert to + -- an internal link with the appropriate identifier + fixURL :: Text -> Text + fixURL u = + let (a,b) = T.break (== '#') u + filepart = if T.null a + then toIdent thisfile + else toIdent a + fragpart = T.dropWhile (== '#') b + in if T.null a || a `elem` allfiles + then "#" <> T.intercalate "__" + (filter (not . T.null) [filepart, fragpart]) + else u + + fixInline :: Inline -> Inline + fixInline (Code attr t) = Code (fixAttrs attr) t + fixInline (Link attr ils (url,tit)) = + Link (fixAttrs attr) ils (fixURL url,tit) + fixInline (Image attr ils (url,tit)) = + Image (fixAttrs attr) ils (fixURL url,tit) + fixInline (Span attr ils) = Span (fixAttrs attr) ils + fixInline x = x diff --git a/src/Text/Pandoc/App/Opt.hs b/src/Text/Pandoc/App/Opt.hs index 57951347dfe6..a8b08e7a8b28 100644 --- a/src/Text/Pandoc/App/Opt.hs +++ b/src/Text/Pandoc/App/Opt.hs @@ -18,6 +18,7 @@ Options for pandoc when used as an app. -} module Text.Pandoc.App.Opt ( Opt(..) + , OptInfo(..) , LineEnding (..) , IpynbOutput (..) , DefaultsState (..) @@ -25,7 +26,9 @@ module Text.Pandoc.App.Opt ( , applyDefaults , fullDefaultsPath ) where -import Control.Monad.Except (MonadIO, liftIO, throwError, (>=>), foldM) +import Control.Monad.Except (throwError) +import Control.Monad.Trans (MonadIO, liftIO) +import Control.Monad ((>=>), foldM) import Control.Monad.State.Strict (StateT, modify, gets) import System.FilePath ( addExtension, (), takeExtension, takeDirectory ) import System.Directory ( canonicalizePath ) @@ -43,7 +46,8 @@ import Text.Pandoc.Options (TopLevelDivision (TopLevelDefault), import Text.Pandoc.Class (readFileStrict, fileExists, setVerbosity, report, PandocMonad(lookupEnv), getUserDataDir) import Text.Pandoc.Error (PandocError (PandocParseError, PandocSomeError)) -import Text.Pandoc.Shared (defaultUserDataDir, findM, ordNub) +import Data.Containers.ListUtils (nubOrd) +import Text.Pandoc.Data (defaultUserDataDir) import qualified Text.Pandoc.Parsing as P import Text.Pandoc.Readers.Metadata (yamlMap) import Text.Pandoc.Class.PandocPure @@ -54,8 +58,8 @@ import qualified Data.Text as T import qualified Data.Map as M import qualified Data.ByteString.Char8 as B8 import Text.Pandoc.Definition (Meta(..), MetaValue(..)) -import Data.Aeson (defaultOptions, Options(..), Result(..), camelTo2, - genericToJSON, fromJSON) +import Data.Aeson (defaultOptions, Options(..), Result(..), + genericToJSON, fromJSON, camelTo2) import Data.Aeson.TH (deriveJSON) import Control.Applicative ((<|>)) import Data.Yaml @@ -78,6 +82,22 @@ data IpynbOutput = $(deriveJSON defaultOptions{ fieldLabelModifier = map toLower . drop 11 } ''IpynbOutput) +-- | Option parser results requesting informational output. +data OptInfo = + BashCompletion + | ListInputFormats + | ListOutputFormats + | ListExtensions (Maybe Text) + | ListHighlightLanguages + | ListHighlightStyles + | PrintDefaultTemplate (Maybe FilePath) Text + | PrintDefaultDataFile (Maybe FilePath) Text + | PrintHighlightStyle (Maybe FilePath) Text + | VersionInfo + | Help + | OptError PandocError + deriving (Show, Generic) + -- | Data structure for command line options. data Opt = Opt { optTabStop :: Int -- ^ Number of spaces per tab @@ -126,7 +146,6 @@ data Opt = Opt , optFilters :: [Filter] -- ^ Filters to apply , optEmailObfuscation :: ObfuscationMethod , optIdentifierPrefix :: Text - , optStripEmptyParagraphs :: Bool -- ^ Strip empty paragraphs , optIndentedCodeClasses :: [Text] -- ^ Default classes for indented code blocks , optDataDir :: Maybe FilePath , optCiteMethod :: CiteMethod -- ^ Method to output cites @@ -135,6 +154,7 @@ data Opt = Opt , optPdfEngineOpts :: [String] -- ^ Flags to pass to the engine , optSlideLevel :: Maybe Int -- ^ Header level that creates slides , optSetextHeaders :: Bool -- ^ Use atx headers for markdown level 1-2 + , optListTables :: Bool -- ^ Use list tables for RST , optAscii :: Bool -- ^ Prefer ascii output , optDefaultImageExtension :: Text -- ^ Default image extension , optExtractMedia :: Maybe FilePath -- ^ Path to extract embedded media @@ -206,7 +226,6 @@ instance FromJSON Opt where <*> o .:? "filters" .!= optFilters defaultOpts <*> o .:? "email-obfuscation" .!= optEmailObfuscation defaultOpts <*> o .:? "identifier-prefix" .!= optIdentifierPrefix defaultOpts - <*> o .:? "strip-empty-paragraphs" .!= optStripEmptyParagraphs defaultOpts <*> o .:? "indented-code-classes" .!= optIndentedCodeClasses defaultOpts <*> o .:? "data-dir" <*> o .:? "cite-method" .!= optCiteMethod defaultOpts @@ -215,6 +234,7 @@ instance FromJSON Opt where <*> o .:? "pdf-engine-opts" .!= optPdfEngineOpts defaultOpts <*> o .:? "slide-level" <*> o .:? "setext-headers" .!= optSetextHeaders defaultOpts + <*> o .:? "list-tables" .!= optListTables defaultOpts <*> o .:? "ascii" .!= optAscii defaultOpts <*> o .:? "default-image-extension" .!= optDefaultImageExtension defaultOpts <*> o .:? "extract-media" @@ -577,8 +597,6 @@ doOpt (k,v) = do "identifier-prefix" -> parseJSON v >>= \x -> return (\o -> o{ optIdentifierPrefix = x }) - "strip-empty-paragraphs" -> - parseJSON v >>= \x -> return (\o -> o{ optStripEmptyParagraphs = x }) "indented-code-classes" -> parseJSON v >>= \x -> return (\o -> o{ optIndentedCodeClasses = x }) @@ -601,14 +619,14 @@ doOpt (k,v) = do return (\o -> o{ optPdfEngineOpts = [unpack x] })) "slide-level" -> parseJSON v >>= \x -> return (\o -> o{ optSlideLevel = x }) - "atx-headers" -> - parseJSON v >>= \x -> return (\o -> o{ optSetextHeaders = not x }) "markdown-headings" -> parseJSON v >>= \x -> return (\o -> case T.toLower x of "atx" -> o{ optSetextHeaders = False } "setext" -> o{ optSetextHeaders = True } _ -> o) + "list-tables" -> + parseJSON v >>= \x -> return (\o -> o{ optListTables = x }) "ascii" -> parseJSON v >>= \x -> return (\o -> o{ optAscii = x }) "default-image-extension" -> @@ -736,7 +754,6 @@ defaultOpts = Opt , optFilters = [] , optEmailObfuscation = NoObfuscation , optIdentifierPrefix = "" - , optStripEmptyParagraphs = False , optIndentedCodeClasses = [] , optDataDir = Nothing , optCiteMethod = Citeproc @@ -745,6 +762,7 @@ defaultOpts = Opt , optPdfEngineOpts = [] , optSlideLevel = Nothing , optSetextHeaders = False + , optListTables = False , optAscii = False , optDefaultImageExtension = "" , optExtractMedia = Nothing @@ -802,7 +820,14 @@ fullDefaultsPath dataDir file = do else file defaultDataDir <- liftIO defaultUserDataDir let defaultFp = fromMaybe defaultDataDir dataDir "defaults" fp - fromMaybe fp <$> findM fileExists [fp, defaultFp] + fpExists <- fileExists fp + if fpExists + then return fp + else do + defaultFpExists <- fileExists defaultFp + if defaultFpExists + then return defaultFp + else return fp -- | In a list of lists, append another list in front of every list which -- starts with specific element. @@ -817,4 +842,4 @@ expand ps ns n = concatMap (ext n ns) ps cyclic :: Ord a => [[a]] -> Bool cyclic = any hasDuplicate where - hasDuplicate xs = length (ordNub xs) /= length xs + hasDuplicate xs = length (nubOrd xs) /= length xs diff --git a/src/Text/Pandoc/App/OutputSettings.hs b/src/Text/Pandoc/App/OutputSettings.hs index ccdb5311280b..56020cd1ff58 100644 --- a/src/Text/Pandoc/App/OutputSettings.hs +++ b/src/Text/Pandoc/App/OutputSettings.hs @@ -1,5 +1,6 @@ {-# LANGUAGE CPP #-} {-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE LambdaCase #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE TupleSections #-} @@ -38,8 +39,9 @@ import Text.Pandoc import Text.Pandoc.App.FormatHeuristics (formatFromFilePaths) import Text.Pandoc.App.Opt (Opt (..)) import Text.Pandoc.App.CommandLineOptions (engines, setVariable) +import qualified Text.Pandoc.Format as Format import Text.Pandoc.Highlighting (lookupHighlightingStyle) -import Text.Pandoc.Writers.Custom (writeCustom) +import Text.Pandoc.Scripting (ScriptingEngine (engineWriteCustom)) import qualified Text.Pandoc.UTF8 as UTF8 readUtf8File :: PandocMonad m => FilePath -> m T.Text @@ -55,8 +57,9 @@ data OutputSettings m = OutputSettings } -- | Get output settings from command line options. -optToOutputSettings :: (PandocMonad m, MonadIO m) => Opt -> m (OutputSettings m) -optToOutputSettings opts = do +optToOutputSettings :: (PandocMonad m, MonadIO m) + => ScriptingEngine -> Opt -> m (OutputSettings m) +optToOutputSettings scriptingEngine opts = do let outputFile = fromMaybe "-" (optOutputFile opts) when (optDumpArgs opts) . liftIO $ do @@ -87,10 +90,6 @@ optToOutputSettings opts = do return ("html", Nothing) Just f -> return (f, Nothing) - let format = if ".lua" `T.isSuffixOf` writerName - then writerName - else T.toLower $ baseWriterName writerName - let makeSandboxed pureWriter = let files = maybe id (:) (optReferenceDoc opts) . maybe id (:) (optEpubMetadata opts) . @@ -101,23 +100,45 @@ optToOutputSettings opts = do optBibliography opts in case pureWriter of TextWriter w -> TextWriter $ \o d -> sandbox files (w o d) - ByteStringWriter w - -> ByteStringWriter $ \o d -> sandbox files (w o d) - + ByteStringWriter w -> + ByteStringWriter $ \o d -> sandbox files (w o d) - (writer, writerExts) <- - if ".lua" `T.isSuffixOf` format - then return (TextWriter - (\o d -> writeCustom (T.unpack writerName) o d), mempty) - else if optSandbox opts - then - case runPure (getWriter writerName) of - Left e -> throwError e - Right (w, wexts) -> - return (makeSandboxed w, wexts) - else getWriter (T.toLower writerName) + flvrd@(Format.FlavoredFormat format _extsDiff) <- + Format.parseFlavoredFormat writerName let standalone = optStandalone opts || not (isTextFormat format) || pdfOutput + let processCustomTemplate getDefault = + case optTemplate opts of + _ | not standalone -> return Nothing + Nothing -> Just <$> getDefault + Just tp -> do + -- strip off extensions + let tp' = case takeExtension tp of + "" -> tp <.> T.unpack format + _ -> tp + getTemplate tp' + >>= runWithPartials . compileTemplate tp' + >>= (\case + Left e -> throwError $ PandocTemplateError (T.pack e) + Right t -> return $ Just t) + + (writer, writerExts, mtemplate) <- + if "lua" `T.isSuffixOf` format + then do + (w, extsConf, mt) <- engineWriteCustom scriptingEngine (T.unpack format) + wexts <- Format.applyExtensionsDiff extsConf flvrd + templ <- processCustomTemplate mt + return (w, wexts, templ) + else do + tmpl <- processCustomTemplate (compileDefaultTemplate format) + if optSandbox opts + then case runPure (getWriter flvrd) of + Right (w, wexts) -> return (makeSandboxed w, wexts, tmpl) + Left e -> throwError e + else do + (w, wexts) <- getWriter flvrd + return (w, wexts, tmpl) + let addSyntaxMap existingmap f = do res <- liftIO (parseSyntaxDefinition f) @@ -160,6 +181,8 @@ optToOutputSettings opts = do >>= setVariableM "outputfile" (T.pack outputFile) >>= + setVariableM "pandoc-version" pandocVersionText + >>= setFilesVariableM "include-before" (optIncludeBeforeBody opts) >>= setFilesVariableM "include-after" (optIncludeAfterBody opts) @@ -170,8 +193,8 @@ optToOutputSettings opts = do >>= maybe return (setVariableM "title-prefix") (optTitlePrefix opts) >>= - maybe return (setVariableM "epub-cover-image") - (T.pack <$> optEpubCoverImage opts) + maybe return (setVariableM "epub-cover-image" . T.pack) + (optEpubCoverImage opts) >>= setVariableM "curdir" (T.pack curdir) >>= @@ -186,21 +209,8 @@ optToOutputSettings opts = do setVariableM "dzslides-core" dzcore vars else return vars) - templ <- case optTemplate opts of - _ | not standalone -> return Nothing - Nothing -> Just <$> compileDefaultTemplate format - Just tp -> do - -- strip off extensions - let tp' = case takeExtension tp of - "" -> tp <.> T.unpack format - _ -> tp - res <- getTemplate tp' >>= runWithPartials . compileTemplate tp' - case res of - Left e -> throwError $ PandocTemplateError $ T.pack e - Right t -> return $ Just t - let writerOpts = def { - writerTemplate = templ + writerTemplate = mtemplate , writerVariables = variables , writerTabStop = optTabStop opts , writerTableOfContents = optTableOfContents opts @@ -224,6 +234,7 @@ optToOutputSettings opts = do , writerSlideLevel = optSlideLevel opts , writerHighlightStyle = hlStyle , writerSetextHeaders = optSetextHeaders opts + , writerListTables = optListTables opts , writerEpubSubdirectory = T.pack $ optEpubSubdirectory opts , writerEpubMetadata = epubMetadata , writerEpubFonts = optEpubFonts opts diff --git a/src/Text/Pandoc/CSS.hs b/src/Text/Pandoc/CSS.hs index 03065cc9b71c..9a8f0bd057d5 100644 --- a/src/Text/Pandoc/CSS.hs +++ b/src/Text/Pandoc/CSS.hs @@ -22,8 +22,9 @@ import Data.Either (fromRight) import Data.Maybe (mapMaybe, listToMaybe) import Data.Text (Text, pack) import Text.Pandoc.Shared (trim) -import Text.Parsec -import Text.Parsec.Text +import Text.Pandoc.Parsing + +type Parser = Parsec Text () ruleParser :: Parser (Text, Text) ruleParser = do diff --git a/src/Text/Pandoc/CSV.hs b/src/Text/Pandoc/CSV.hs index 963fead0dec4..03fb24ffa619 100644 --- a/src/Text/Pandoc/CSV.hs +++ b/src/Text/Pandoc/CSV.hs @@ -19,8 +19,9 @@ module Text.Pandoc.CSV ( import Control.Monad (unless, void, mzero) import Data.Text (Text) import qualified Data.Text as T -import Text.Parsec -import Text.Parsec.Text (Parser) +import Text.Pandoc.Parsing hiding (escaped) + +type Parser = Parsec Text () data CSVOptions = CSVOptions{ csvDelim :: Char diff --git a/src/Text/Pandoc/Citeproc.hs b/src/Text/Pandoc/Citeproc.hs index d8c6d6cc390a..4830c4ef02e3 100644 --- a/src/Text/Pandoc/Citeproc.hs +++ b/src/Text/Pandoc/Citeproc.hs @@ -16,6 +16,7 @@ import Text.Pandoc.Citeproc.Locator (parseLocator, toLocatorMap, LocatorInfo(..)) import Text.Pandoc.Citeproc.CslJson (cslJsonToReferences) import Text.Pandoc.Citeproc.BibTeX (readBibtexString, Variant(..)) +import Text.Pandoc.MIME (MimeType) import Text.Pandoc.Readers.RIS (readRIS) import Text.Pandoc.Citeproc.MetaValue (metaValueToReference, metaValueToText) import Text.Pandoc.Readers.Markdown (yamlToRefs) @@ -23,12 +24,14 @@ import Text.Pandoc.Builder (Inlines, Many(..), deleteMeta, setMeta) import qualified Text.Pandoc.Builder as B import Text.Pandoc.Definition as Pandoc import Text.Pandoc.Class (PandocMonad(..), getResourcePath, getUserDataDir, - fetchItem, readDataFile, report, setResourcePath) + fetchItem, report, setResourcePath) +import Text.Pandoc.Data (readDataFile) import Text.Pandoc.Error (PandocError(..)) import Text.Pandoc.Extensions (pandocExtensions) import Text.Pandoc.Logging (LogMessage(..)) import Text.Pandoc.Options (ReaderOptions(..)) -import Text.Pandoc.Shared (stringify, ordNub, tshow) +import Text.Pandoc.Shared (stringify, tshow) +import Data.Containers.ListUtils (nubOrd) import qualified Text.Pandoc.UTF8 as UTF8 import Text.Pandoc.Walk (query, walk, walkM) import Control.Applicative ((<|>)) @@ -235,8 +238,8 @@ insertSpace ils = getRefsFromBib :: PandocMonad m => Locale -> (Text -> Bool) -> Text -> m [Reference Inlines] getRefsFromBib locale idpred fp = do - (raw, _) <- fetchItem fp - case formatFromExtension (T.unpack fp) of + (raw, mt) <- fetchItem fp + case getBibliographyFormat (T.unpack fp) mt of Just f -> getRefs locale f idpred (Just fp) raw Nothing -> throwError $ PandocAppError $ "Could not determine bibliography format for " <> fp @@ -352,17 +355,28 @@ data BibFormat = | Format_ris deriving (Show, Eq, Ord) -formatFromExtension :: FilePath -> Maybe BibFormat -formatFromExtension fp = case dropWhile (== '.') $ takeExtension fp of - "biblatex" -> Just Format_biblatex - "bibtex" -> Just Format_bibtex - "bib" -> Just Format_biblatex - "json" -> Just Format_json - "yaml" -> Just Format_yaml - "yml" -> Just Format_yaml - "ris" -> Just Format_ris - _ -> Nothing - +getBibliographyFormat :: FilePath -> Maybe MimeType -> Maybe BibFormat +getBibliographyFormat fp mbmime = do + let ext = takeExtension fp + case ext of + ".biblatex" -> pure Format_biblatex + ".bibtex" -> pure Format_bibtex + ".bib" -> pure Format_biblatex + ".json" -> pure Format_json + ".yaml" -> pure Format_yaml + ".yml" -> pure Format_yaml + ".ris" -> pure Format_ris + _ -> do + mime <- mbmime + case T.takeWhile (/= ';') mime of + "application/x-bibtex" -> pure Format_biblatex + "application/x-reseach-info-systems" -> pure Format_ris + "application/vnd.citationstyles.csl+json" -> pure Format_json + "application/json" -> pure Format_json + "application/x-yaml" -> pure Format_yaml + "text/x-yaml" -> pure Format_yaml + "text/yaml" -> pure Format_yaml + _ -> Nothing isNote :: Inline -> Bool isNote (Cite _ [Note _]) = True @@ -488,8 +502,8 @@ insertRefs refkvs refclasses refs (Pandoc meta bs) = go (Div ("refs",cs,kvs) xs) = do put True -- refHeader isn't used if you have an explicit references div - let cs' = ordNub $ cs ++ refclasses - let kvs' = ordNub $ kvs ++ refkvs + let cs' = nubOrd $ cs ++ refclasses + let kvs' = nubOrd $ kvs ++ refkvs return $ Div ("refs",cs',kvs') (xs ++ refs) go x = return x diff --git a/src/Text/Pandoc/Citeproc/BibTeX.hs b/src/Text/Pandoc/Citeproc/BibTeX.hs index 5db99d66be8e..3c787595d1ec 100644 --- a/src/Text/Pandoc/Citeproc/BibTeX.hs +++ b/src/Text/Pandoc/Citeproc/BibTeX.hs @@ -11,7 +11,8 @@ -- License : BSD-style (see LICENSE) -- -- Maintainer : John MacFarlane --- Stability : unstable-- Portability : unportable +-- Stability : unstable +-- Portability : portable -- ----------------------------------------------------------------------------- @@ -19,7 +20,6 @@ module Text.Pandoc.Citeproc.BibTeX ( Variant(..) , readBibtexString , writeBibtexString - , toName ) where @@ -35,8 +35,10 @@ import Text.Pandoc.Class (runPure) import qualified Text.Pandoc.Walk as Walk import Citeproc.Types import Citeproc.Pandoc () +import Data.List.Split (splitOn) import Text.Pandoc.Citeproc.Util (toIETF, splitStrWhen) import Text.Pandoc.Citeproc.Data (biblatexStringMap) +import Text.Pandoc.Citeproc.Name (toName, NameOpts(..), emptyName) import Data.Default import Data.Text (Text) import qualified Data.Text as T @@ -44,8 +46,8 @@ import qualified Data.Map as Map import Data.Maybe import Text.Pandoc.Parsing hiding ((<|>), many) import Control.Applicative -import Data.List.Split (splitOn, splitWhen, wordsBy) -import Control.Monad.RWS hiding ((<>)) +import Control.Monad ( guard, MonadPlus(..), void ) +import Control.Monad.RWS ( asks, RWST, gets, modify, evalRWST ) import qualified Data.Sequence as Seq import Data.Char (isAlphaNum, isDigit, isLetter, isUpper, toLower, toUpper, @@ -179,6 +181,8 @@ writeBibtexString opts variant mblang ref = , "type" , "note" , "annote" + , "url" -- not officially supported, but supported by + -- some styles (#8287) ] valToInlines (TextVal t) = B.text t @@ -345,7 +349,7 @@ defaultLang = Lang "en" Nothing (Just "US") [] [] [] -- a map of bibtex "string" macros type StringMap = Map.Map Text Text -type BibParser = Parser Sources (Lang, StringMap) +type BibParser = Parsec Sources (Lang, StringMap) data Item = Item{ identifier :: Text , sourcePos :: SourcePos @@ -404,9 +408,7 @@ itemToReference locale variant item = do -- names let getNameList' f = Just <$> - getNameList (("bibtex", case variant of - Bibtex -> "true" - Biblatex -> "false") : opts) f + getNameList opts f author' <- getNameList' "author" <|> return Nothing containerAuthor' <- getNameList' "bookauthor" <|> return Nothing @@ -793,14 +795,6 @@ parseLaTeX lang t = latex :: Text -> Bib Inlines latex = fmap blocksToInlines . latex' . T.strip -type Options = [(Text, Text)] - -parseOptions :: Text -> Options -parseOptions = map breakOpt . T.splitOn "," - where breakOpt x = case T.break (=='=') x of - (w,v) -> (T.toLower $ T.strip w, - T.toLower $ T.strip $ T.drop 1 v) - bibEntries :: BibParser [Item] bibEntries = do skipMany nonEntry @@ -839,17 +833,26 @@ bibString = do updateState (\(l,m) -> (l, Map.insert k v m)) return () -take1WhileP :: Monad m => (Char -> Bool) -> ParserT Sources u m Text +take1WhileP :: Monad m => (Char -> Bool) -> ParsecT Sources u m Text take1WhileP f = T.pack <$> many1 (satisfy f) inBraces :: BibParser Text inBraces = do + char '{' + res <- manyTill + ( take1WhileP (\c -> c /= '{' && c /= '}' && c /= '\\' && c /= '%') + <|> (char '\\' >> T.cons '\\' . T.singleton <$> anyChar) + <|> ("" <$ (char '%' >> anyLine)) + <|> (braced <$> inBraces) + ) (char '}') + return $ T.concat res + +inBracesURL :: BibParser Text +inBracesURL = do char '{' res <- manyTill ( take1WhileP (\c -> c /= '{' && c /= '}' && c /= '\\') - <|> (char '\\' >> (do c <- oneOf "{}" - return $ T.pack ['\\',c]) - <|> return "\\") + <|> (char '\\' >> T.cons '\\' . T.singleton <$> anyChar) <|> (braced <$> inBraces) ) (char '}') return $ T.concat res @@ -867,6 +870,14 @@ inQuotes = do <|> braced <$> inBraces ) (char '"') +inQuotesURL :: BibParser Text +inQuotesURL = do + char '"' + T.concat <$> manyTill + ( take1WhileP (\c -> c /= '{' && c /= '"' && c /= '\\') + <|> (char '\\' >> T.cons '\\' . T.singleton <$> anyChar) + ) (char '"') + fieldName :: BibParser Text fieldName = resolveAlias . T.toLower <$> take1WhileP (\c -> @@ -902,7 +913,9 @@ entField = do spaces' char '=' spaces' - vs <- (expandString <|> inQuotes <|> inBraces <|> rawWord) `sepBy` + let inQ = if k == "url" then inQuotesURL else inQuotes + let inB = if k == "url" then inBracesURL else inBraces + vs <- (expandString <|> inQ <|> inB <|> rawWord) `sepBy` try (spaces' >> char '#' >> spaces') spaces' return (k, T.concat vs) @@ -1132,21 +1145,37 @@ concatWith sep = foldl' go mempty B.space <> s -getNameList :: Options -> Text -> Bib [Name] +parseOptions :: Text -> [(Text, Text)] +parseOptions = map breakOpt . T.splitOn "," + where breakOpt x = case T.break (=='=') x of + (w,v) -> (T.toLower $ T.strip w, + T.toLower $ T.strip $ T.drop 1 v) + +optionSet :: Text -> [(Text, Text)] -> Bool +optionSet key opts = case lookup key opts of + Just "true" -> True + Just s -> s == mempty + _ -> False + +getNameList :: [(Text, Text)] -> Text -> Bib [Name] getNameList opts f = do fs <- asks fields case Map.lookup f fs of - Just x -> latexNames opts x + Just x -> latexNames nameopts x Nothing -> notFound f + where + nameopts = NameOpts{ + nameOptsPrefixIsNonDroppingParticle = optionSet "useprefix" opts, + nameOptsUseJuniorComma = optionSet "juniorcomma" opts} -toNameList :: Options -> [Block] -> Bib [Name] +toNameList :: NameOpts -> [Block] -> Bib [Name] toNameList opts [Para xs] = filter (/= emptyName) <$> mapM (toName opts . addSpaceAfterPeriod) (splitByAnd xs) toNameList opts [Plain xs] = toNameList opts [Para xs] toNameList _ _ = mzero -latexNames :: Options -> Text -> Bib [Name] +latexNames :: NameOpts -> Text -> Bib [Name] latexNames opts t = latex' (T.strip t) >>= toNameList opts -- see issue 392 for motivation. We want to treat @@ -1163,109 +1192,6 @@ addSpaceAfterPeriod = go . splitStrWhen (=='.') = Str (T.singleton c):Str ".":Space:go (Str (T.singleton d):xs) go (x:xs) = x:go xs -emptyName :: Name -emptyName = - Name { nameFamily = Nothing - , nameGiven = Nothing - , nameDroppingParticle = Nothing - , nameNonDroppingParticle = Nothing - , nameSuffix = Nothing - , nameLiteral = Nothing - , nameCommaSuffix = False - , nameStaticOrdering = False - } - -toName :: MonadPlus m => Options -> [Inline] -> m Name -toName _ [Str "others"] = - return emptyName{ nameLiteral = Just "others" } -toName _ [Span ("",[],[]) ils] = -- corporate author - return emptyName{ nameLiteral = Just $ stringify ils } - -- extended BibLaTeX name format - see #266 -toName _ ils@(Str ys:_) | T.any (== '=') ys = do - let commaParts = splitWhen (== Str ",") - . splitStrWhen (\c -> c == ',' || c == '=' || c == '\160') - $ ils - let addPart ag (Str "given" : Str "=" : xs) = - ag{ nameGiven = case nameGiven ag of - Nothing -> Just $ stringify xs - Just t -> Just $ t <> " " <> stringify xs } - addPart ag (Str "family" : Str "=" : xs) = - ag{ nameFamily = Just $ stringify xs } - addPart ag (Str "prefix" : Str "=" : xs) = - ag{ nameDroppingParticle = Just $ stringify xs } - addPart ag (Str "useprefix" : Str "=" : Str "true" : _) = - ag{ nameNonDroppingParticle = nameDroppingParticle ag - , nameDroppingParticle = Nothing } - addPart ag (Str "suffix" : Str "=" : xs) = - ag{ nameSuffix = Just $ stringify xs } - addPart ag (Space : xs) = addPart ag xs - addPart ag _ = ag - return $ foldl' addPart emptyName commaParts --- First von Last --- von Last, First --- von Last, Jr ,First --- NOTE: biblatex and bibtex differ on: --- Drummond de Andrade, Carlos --- bibtex takes "Drummond de" as the von; --- biblatex takes the whole as a last name. --- See https://github.com/plk/biblatex/issues/236 --- Here we implement the more sensible biblatex behavior. -toName opts ils = do - let useprefix = optionSet "useprefix" opts - let usecomma = optionSet "juniorcomma" opts - let bibtex = optionSet "bibtex" opts - let words' = wordsBy (\x -> x == Space || x == Str "\160") - let commaParts = map words' $ splitWhen (== Str ",") - $ splitStrWhen - (\c -> c == ',' || c == '\160') ils - let (first, vonlast, jr) = - case commaParts of - --- First is the longest sequence of white-space separated - -- words starting with an uppercase and that is not the - -- whole string. von is the longest sequence of whitespace - -- separated words whose last word starts with lower case - -- and that is not the whole string. - [fvl] -> let (caps', rest') = span isCapitalized fvl - in if null rest' && not (null caps') - then (init caps', [last caps'], []) - else (caps', rest', []) - [vl,f] -> (f, vl, []) - (vl:j:f:_) -> (f, vl, j ) - [] -> ([], [], []) - - let (von, lastname) = - if bibtex - then case span isCapitalized $ reverse vonlast of - ([],w:ws) -> (reverse ws, [w]) - (vs, ws) -> (reverse ws, reverse vs) - else case break isCapitalized vonlast of - (vs@(_:_), []) -> (init vs, [last vs]) - (vs, ws) -> (vs, ws) - let prefix = T.unwords $ map stringify von - let family = T.unwords $ map stringify lastname - let suffix = T.unwords $ map stringify jr - let given = T.unwords $ map stringify first - return - Name { nameFamily = if T.null family - then Nothing - else Just family - , nameGiven = if T.null given - then Nothing - else Just given - , nameDroppingParticle = if useprefix || T.null prefix - then Nothing - else Just prefix - , nameNonDroppingParticle = if useprefix && not (T.null prefix) - then Just prefix - else Nothing - , nameSuffix = if T.null suffix - then Nothing - else Just suffix - , nameLiteral = Nothing - , nameCommaSuffix = usecomma - , nameStaticOrdering = False - } - ordinalize :: Locale -> Text -> Text ordinalize locale n = let terms = localeTerms locale @@ -1279,20 +1205,6 @@ ordinalize locale n = Just [] -> n Just (t:_) -> n <> snd t -isCapitalized :: [Inline] -> Bool -isCapitalized (Str (T.uncons -> Just (c,cs)) : rest) - | isUpper c = True - | isDigit c = isCapitalized (Str cs : rest) - | otherwise = False -isCapitalized (_:rest) = isCapitalized rest -isCapitalized [] = True - -optionSet :: Text -> Options -> Bool -optionSet key opts = case lookup key opts of - Just "true" -> True - Just s -> s == mempty - _ -> False - getTypeAndGenre :: Bib (Text, Maybe Text) getTypeAndGenre = do lang <- gets localeLang diff --git a/src/Text/Pandoc/Citeproc/CslJson.hs b/src/Text/Pandoc/Citeproc/CslJson.hs index 43c1a87ecd39..2a7fc6c185d1 100644 --- a/src/Text/Pandoc/Citeproc/CslJson.hs +++ b/src/Text/Pandoc/Citeproc/CslJson.hs @@ -6,7 +6,8 @@ where import Citeproc.CslJson import Citeproc.Types import Control.Monad.Identity (runIdentity) -import Data.Aeson (eitherDecodeStrict') +import Data.Aeson (eitherDecodeStrict', FromJSON(parseJSON), (.:), Value(..)) +import Data.Aeson.Types (parseEither) import Data.ByteString (ByteString) import Text.Pandoc.Builder as B import Data.Text (Text) @@ -31,8 +32,11 @@ fromCslJson (CslDiv t x) = B.spanWith ("",["csl-" <> t],[]) (fromCslJson x) fromCslJson (CslLink u x) = B.link u "" (fromCslJson x) cslJsonToReferences :: ByteString -> Either String [Reference Inlines] -cslJsonToReferences raw = - case eitherDecodeStrict' raw of - Left e -> Left e - Right cslrefs -> Right $ - map (runIdentity . traverse (return . fromCslJson)) cslrefs +cslJsonToReferences raw = do + items <- + case eitherDecodeStrict' raw of + Left e -> Left e + Right (Object o) -> parseEither (.: "items") o + Right val@(Array _) -> parseEither parseJSON val + Right _ -> Left "expecting Object or Array" + pure $ map (runIdentity . traverse (return . fromCslJson)) items diff --git a/src/Text/Pandoc/Citeproc/Locator.hs b/src/Text/Pandoc/Citeproc/Locator.hs index 35d5388b0065..ee5459e3d7ac 100644 --- a/src/Text/Pandoc/Citeproc/Locator.hs +++ b/src/Text/Pandoc/Citeproc/Locator.hs @@ -12,9 +12,8 @@ import Text.Pandoc.Citeproc.Util (splitStrWhen) import Data.Text (Text) import qualified Data.Text as T import Data.List (foldl') -import Text.Parsec import Text.Pandoc.Definition -import Text.Pandoc.Parsing (romanNumeral) +import Text.Pandoc.Parsing import Text.Pandoc.Shared (stringify) import Control.Monad (mzero) import qualified Data.Map as M @@ -80,9 +79,9 @@ pLocatorDelimited locMap = try $ do pLocatorLabelDelimited :: LocatorMap -> LocatorParser (Text, Text, Bool) pLocatorLabelDelimited locMap - = pLocatorLabel' locMap lim <|> return ("", "page", True) - where - lim = stringify <$> anyToken + = pLocatorLabel' locMap (stringify <$> anyToken) + <|> (("", "page", True) <$ lookAhead (pMatchChar "digit" isDigit)) + <|> (pure ("", "", True)) pLocatorIntegrated :: LocatorMap -> LocatorParser LocatorInfo pLocatorIntegrated locMap = try $ do @@ -170,7 +169,7 @@ pBalancedBraces braces p = try $ do isc c = stringify <$> pMatchChar [c] (== c) sur c c' m = try $ do - (d, mid) <- between (isc c) (isc c') (option (False, "") m) + (d, mid) <- isc c *> option (False, "") m <* isc c' return (d, T.cons c . flip T.snoc c' $ mid) flattened = concatMap (\(o, c) -> [o, c]) braces @@ -296,6 +295,6 @@ locatorTerms = , "paragraph" , "part" , "section" - , "sub verbo" + , "sub-verbo" , "verse" , "volume" ] diff --git a/src/Text/Pandoc/Citeproc/MetaValue.hs b/src/Text/Pandoc/Citeproc/MetaValue.hs index b4de794260bb..ce7b1285c486 100644 --- a/src/Text/Pandoc/Citeproc/MetaValue.hs +++ b/src/Text/Pandoc/Citeproc/MetaValue.hs @@ -3,7 +3,6 @@ module Text.Pandoc.Citeproc.MetaValue ( referenceToMetaValue , metaValueToReference , metaValueToText - , metaValueToPath ) where @@ -28,9 +27,6 @@ metaValueToText (MetaBlocks bls) = Just $ stringify bls metaValueToText (MetaList xs) = T.unwords <$> mapM metaValueToText xs metaValueToText _ = Nothing -metaValueToPath :: MetaValue -> Maybe FilePath -metaValueToPath = fmap T.unpack . metaValueToText - metaValueToBool :: MetaValue -> Maybe Bool metaValueToBool (MetaBool b) = Just b metaValueToBool (MetaString "true") = Just True diff --git a/src/Text/Pandoc/Citeproc/Name.hs b/src/Text/Pandoc/Citeproc/Name.hs new file mode 100644 index 000000000000..d62a9e4a1578 --- /dev/null +++ b/src/Text/Pandoc/Citeproc/Name.hs @@ -0,0 +1,153 @@ +{-# LANGUAGE ViewPatterns #-} +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE ScopedTypeVariables #-} +{-# LANGUAGE FlexibleContexts #-} +{-# OPTIONS_GHC -fno-warn-unused-do-bind #-} +----------------------------------------------------------------------------- +-- | +-- Module : Text.CSL.Input.Name +-- Copyright : (c) John MacFarlane +-- License : BSD-style (see LICENSE) +-- +-- Maintainer : John MacFarlane +-- Stability : unstable +-- Portability : portable +-- +----------------------------------------------------------------------------- + +module Text.Pandoc.Citeproc.Name + ( toName + , NameOpts(..) + , emptyName + ) + where + +import Text.Pandoc.Definition +import Text.Pandoc.Shared (stringify) +import Citeproc.Types +import Citeproc.Pandoc () +import Text.Pandoc.Citeproc.Util (splitStrWhen) +import qualified Data.Text as T +import Data.List.Split (splitWhen, wordsBy) +import Data.Char (isUpper, isDigit) +import Data.List (foldl') + +emptyName :: Name +emptyName = + Name { nameFamily = Nothing + , nameGiven = Nothing + , nameDroppingParticle = Nothing + , nameNonDroppingParticle = Nothing + , nameSuffix = Nothing + , nameLiteral = Nothing + , nameCommaSuffix = False + , nameStaticOrdering = False + } + +-- | Options for 'toName'. +data NameOpts = + NameOpts + { nameOptsPrefixIsNonDroppingParticle :: Bool + -- ^ Treat a prefix on the last name as a non-dropping particle + -- (default is to treat it as a dropping particle). This corresponds + -- to the biblatex option @useprefix@. + , nameOptsUseJuniorComma :: Bool + -- ^ Put a comma before a suffix like "Jr." This corresponds to the + -- biblatex option @juniorcomma@. + } deriving (Show) + +-- | Parse a list of 'Inline's into a citeproc 'Name', identifying +-- first and last name, particles, suffixes. +toName :: Monad m => NameOpts -> [Inline] -> m Name +toName _ [Str "others"] = + return emptyName{ nameLiteral = Just "others" } +toName _ [Span ("",[],[]) ils] = -- corporate author + return emptyName{ nameLiteral = Just $ stringify ils } +-- extended BibLaTeX name format - see #266 +toName _ ils@(Str ys:_) | T.any (== '=') ys = do + let commaParts = splitWhen (== Str ",") + . splitStrWhen (\c -> c == ',' || c == '=' || c == '\160') + $ ils + let addPart ag (Str "given" : Str "=" : xs) = + ag{ nameGiven = case nameGiven ag of + Nothing -> Just $ stringify xs + Just t -> Just $ t <> " " <> stringify xs } + addPart ag (Str "family" : Str "=" : xs) = + ag{ nameFamily = Just $ stringify xs } + addPart ag (Str "prefix" : Str "=" : xs) = + ag{ nameDroppingParticle = Just $ stringify xs } + addPart ag (Str "useprefix" : Str "=" : Str "true" : _) = + ag{ nameNonDroppingParticle = nameDroppingParticle ag + , nameDroppingParticle = Nothing } + addPart ag (Str "suffix" : Str "=" : xs) = + ag{ nameSuffix = Just $ stringify xs } + addPart ag (Space : xs) = addPart ag xs + addPart ag _ = ag + return $ foldl' addPart emptyName commaParts +-- First von Last +-- von Last, First +-- von Last, Jr ,First +-- NOTE: biblatex and bibtex differ on: +-- Drummond de Andrade, Carlos +-- bibtex takes "Drummond de" as the von; +-- biblatex takes the whole as a last name. +-- See https://github.com/plk/biblatex/issues/236 +-- Here we implement the more sensible biblatex behavior. +toName opts ils = do + let words' = wordsBy (\x -> x == Space || x == Str "\160") + let commaParts = map words' $ splitWhen (== Str ",") + $ splitStrWhen + (\c -> c == ',' || c == '\160') ils + let (first, vonlast, jr) = + case commaParts of + --- First is the longest sequence of white-space separated + -- words starting with an uppercase and that is not the + -- whole string. von is the longest sequence of whitespace + -- separated words whose last word starts with lower case + -- and that is not the whole string. + [fvl] -> let (caps', rest') = span isCapitalized fvl + in if null rest' && not (null caps') + then (init caps', [last caps'], []) + else (caps', rest', []) + [vl,f] -> (f, vl, []) + (vl:j:f:_) -> (f, vl, j ) + [] -> ([], [], []) + + let (von, lastname) = + case break isCapitalized vonlast of + (vs@(_:_), []) -> (init vs, [last vs]) + (vs, ws) -> (vs, ws) + let prefix = T.unwords $ map stringify von + let family = T.unwords $ map stringify lastname + let suffix = T.unwords $ map stringify jr + let given = T.unwords $ map stringify first + return + Name { nameFamily = if T.null family + then Nothing + else Just family + , nameGiven = if T.null given + then Nothing + else Just given + , nameDroppingParticle = if nameOptsPrefixIsNonDroppingParticle opts || + T.null prefix + then Nothing + else Just prefix + , nameNonDroppingParticle = if nameOptsPrefixIsNonDroppingParticle opts && + not (T.null prefix) + then Just prefix + else Nothing + , nameSuffix = if T.null suffix + then Nothing + else Just suffix + , nameLiteral = Nothing + , nameCommaSuffix = nameOptsUseJuniorComma opts + , nameStaticOrdering = False + } + +isCapitalized :: [Inline] -> Bool +isCapitalized (Str (T.uncons -> Just (c,cs)) : rest) + | isUpper c = True + | isDigit c = isCapitalized (Str cs : rest) + | otherwise = False +isCapitalized (_:rest) = isCapitalized rest +isCapitalized [] = True diff --git a/src/Text/Pandoc/Citeproc/Util.hs b/src/Text/Pandoc/Citeproc/Util.hs index 8bffc0f328ae..791041bcd902 100644 --- a/src/Text/Pandoc/Citeproc/Util.hs +++ b/src/Text/Pandoc/Citeproc/Util.hs @@ -79,4 +79,3 @@ toIETF "ukrainian" = "uk-UA" toIETF "vietnamese" = "vi-VN" toIETF "latin" = "la" toIETF x = x - diff --git a/src/Text/Pandoc/Class.hs b/src/Text/Pandoc/Class.hs index 6394df251244..a1e51e0cf899 100644 --- a/src/Text/Pandoc/Class.hs +++ b/src/Text/Pandoc/Class.hs @@ -27,5 +27,5 @@ import Text.Pandoc.Class.CommonState (CommonState (..)) import Text.Pandoc.Class.PandocMonad import Text.Pandoc.Class.PandocIO import Text.Pandoc.Class.PandocPure -import Text.Pandoc.Translations (Translations) +import Text.Pandoc.Translations.Types (Translations) import Text.Pandoc.Class.Sandbox diff --git a/src/Text/Pandoc/Class/CommonState.hs b/src/Text/Pandoc/Class/CommonState.hs index 796a4afd59fd..4e04b5add2b6 100644 --- a/src/Text/Pandoc/Class/CommonState.hs +++ b/src/Text/Pandoc/Class/CommonState.hs @@ -22,7 +22,7 @@ import Data.Text (Text) import Text.Collate.Lang (Lang) import Text.Pandoc.MediaBag (MediaBag) import Text.Pandoc.Logging (LogMessage, Verbosity (WARNING)) -import Text.Pandoc.Translations (Translations) +import Text.Pandoc.Translations.Types (Translations) -- | 'CommonState' represents state that is used by all -- instances of 'PandocMonad'. Normally users should not diff --git a/src/Text/Pandoc/Class/PandocMonad.hs b/src/Text/Pandoc/Class/PandocMonad.hs index e108dd13e8c1..534ce74a9a9c 100644 --- a/src/Text/Pandoc/Class/PandocMonad.hs +++ b/src/Text/Pandoc/Class/PandocMonad.hs @@ -23,6 +23,7 @@ and writers. module Text.Pandoc.Class.PandocMonad ( PandocMonad(..) + , getTimestamp , getPOSIXTime , getZonedTime , readFileFromDirs @@ -45,22 +46,17 @@ module Text.Pandoc.Class.PandocMonad , setOutputFile , setResourcePath , getResourcePath - , readDefaultDataFile - , readDataFile , readMetadataFile , fillMediaBag , toLang - , setTranslations - , translateTerm , makeCanonical , findFileWithDataFallback - , getTimestamp + , checkUserDataDir ) where -import Codec.Archive.Zip -import Control.Monad.Except (MonadError (catchError, throwError), - MonadTrans, lift, when) -import Data.List (foldl') +import Control.Monad.Except (MonadError (catchError, throwError)) +import Control.Monad.Trans (MonadTrans, lift) +import Control.Monad (when) import Data.Time (UTCTime) import Data.Time.Clock.POSIX (POSIXTime, utcTimeToPOSIXSeconds, posixSecondsToUTCTime) @@ -69,30 +65,25 @@ import Network.URI ( escapeURIString, nonStrictRelativeTo, unEscapeString, parseURIReference, isAllowedInURI, parseURI, URI(..) ) import System.FilePath ((), takeExtension, dropExtension, - isRelative, splitDirectories, makeRelative) + isRelative, makeRelative) import System.Random (StdGen) -import Text.Collate.Lang (Lang(..), parseLang, renderLang) +import Text.Collate.Lang (Lang(..), parseLang) import Text.Pandoc.Class.CommonState (CommonState (..)) import Text.Pandoc.Definition import Text.Pandoc.Error import Text.Pandoc.Logging import Text.Pandoc.MIME (MimeType, getMimeType) import Text.Pandoc.MediaBag (MediaBag, lookupMedia, MediaItem(..)) -import Text.Pandoc.Shared (uriPathToPath, safeRead) -import Text.Pandoc.Translations (Term(..), Translations, lookupTerm, - readTranslations) +import Text.Pandoc.Shared (safeRead, makeCanonical) +import Text.Pandoc.URI (uriPathToPath) import Text.Pandoc.Walk (walkM) import Text.Parsec (ParsecT, getPosition, sourceLine, sourceName) import qualified Data.ByteString as B import qualified Data.ByteString.Lazy as BL import qualified Data.Text as T import qualified Debug.Trace -import qualified System.FilePath.Posix as Posix import qualified Text.Pandoc.MediaBag as MB import qualified Text.Pandoc.UTF8 as UTF8 -#ifdef EMBED_DATA_FILES -import Text.Pandoc.Data (dataFiles) -#endif -- | The PandocMonad typeclass contains all the potentially -- IO-related functions used in pandoc's readers and writers. @@ -182,21 +173,6 @@ report msg = do when (level <= verbosity) $ logOutput msg modifyCommonState $ \st -> st{ stLog = msg : stLog st } --- | Get the time from the @SOURCE_DATE_EPOCH@ --- environment variable. The variable should contain a --- unix time stamp, the number of seconds since midnight Jan 01 --- 1970 UTC. If the variable is not set or cannot be --- parsed as a unix time stamp, the current time is returned. --- This function is designed to make possible reproducible --- builds in formats that include a creation timestamp. -getTimestamp :: PandocMonad m => m UTCTime -getTimestamp = do - mbSourceDateEpoch <- lookupEnv "SOURCE_DATE_EPOCH" - case mbSourceDateEpoch >>= safeRead of - Just (epoch :: Integer) -> - return $ posixSecondsToUTCTime $ fromIntegral epoch - Nothing -> getCurrentTime - -- | Determine whether tracing is enabled. This affects -- the behavior of 'trace'. If tracing is not enabled, -- 'trace' does nothing. @@ -266,14 +242,28 @@ getResourcePath = getsCommonState stResourcePath setResourcePath :: PandocMonad m => [FilePath] -> m () setResourcePath ps = modifyCommonState $ \st -> st{stResourcePath = ps} --- | Get the POSIX time. +-- | Get the current UTC time. If the @SOURCE_DATE_EPOCH@ environment +-- variable is set to a unix time (number of seconds since midnight +-- Jan 01 1970 UTC), it is used instead of the current time, to support +-- reproducible builds. +getTimestamp :: PandocMonad m => m UTCTime +getTimestamp = do + mbSourceDateEpoch <- lookupEnv "SOURCE_DATE_EPOCH" + case mbSourceDateEpoch >>= safeRead of + Just (epoch :: Integer) -> + return $ posixSecondsToUTCTime $ fromIntegral epoch + Nothing -> getCurrentTime + +-- | Get the POSIX time. If @SOURCE_DATE_EPOCH@ is set to a unix time, +-- it is used instead of the current time. getPOSIXTime :: PandocMonad m => m POSIXTime -getPOSIXTime = utcTimeToPOSIXSeconds <$> getCurrentTime +getPOSIXTime = utcTimeToPOSIXSeconds <$> getTimestamp --- | Get the zoned time. +-- | Get the zoned time. If @SOURCE_DATE_EPOCH@ is set to a unix time, +-- value (POSIX time), it is used instead of the current time. getZonedTime :: PandocMonad m => m ZonedTime getZonedTime = do - t <- getCurrentTime + t <- getTimestamp tz <- getCurrentTimeZone return $ utcToZonedTime tz t @@ -295,62 +285,6 @@ toLang (Just s) = return Nothing Right l -> return (Just l) --- | Select the language to use with 'translateTerm'. --- Note that this does not read a translation file; --- that is only done the first time 'translateTerm' is --- used. -setTranslations :: PandocMonad m => Lang -> m () -setTranslations lang = - modifyCommonState $ \st -> st{ stTranslations = Just (lang, Nothing) } - --- | Load term map. -getTranslations :: PandocMonad m => m Translations -getTranslations = do - mbtrans <- getsCommonState stTranslations - case mbtrans of - Nothing -> return mempty -- no language defined - Just (_, Just t) -> return t - Just (lang, Nothing) -> do -- read from file - let translationFile = "translations/" <> renderLang lang <> ".yaml" - let fallbackFile = "translations/" <> langLanguage lang <> ".yaml" - let getTrans fp = do - bs <- readDataFile fp - case readTranslations (UTF8.toText bs) of - Left e -> do - report $ CouldNotLoadTranslations (renderLang lang) - (T.pack fp <> ": " <> e) - -- make sure we don't try again... - modifyCommonState $ \st -> - st{ stTranslations = Nothing } - return mempty - Right t -> do - modifyCommonState $ \st -> - st{ stTranslations = Just (lang, Just t) } - return t - catchError (getTrans $ T.unpack translationFile) - (\_ -> - catchError (getTrans $ T.unpack fallbackFile) - (\e -> do - report $ CouldNotLoadTranslations (renderLang lang) - $ case e of - PandocCouldNotFindDataFileError _ -> - "data file " <> fallbackFile <> " not found" - _ -> "" - -- make sure we don't try again... - modifyCommonState $ \st -> st{ stTranslations = Nothing } - return mempty)) - --- | Get a translation from the current term map. --- Issue a warning if the term is not defined. -translateTerm :: PandocMonad m => Term -> m T.Text -translateTerm term = do - translations <- getTranslations - case lookupTerm term translations of - Just s -> return s - Nothing -> do - report $ NoTranslation $ T.pack $ show term - return "" - -- | Specialized version of parseURIReference that disallows -- single-letter schemes. Reason: these are usually windows absolute -- paths. @@ -436,146 +370,6 @@ downloadOrRead s = do convertSlash '\\' = '/' convertSlash x = x --- | Retrieve default reference.docx. -getDefaultReferenceDocx :: PandocMonad m => m Archive -getDefaultReferenceDocx = do - let paths = ["[Content_Types].xml", - "_rels/.rels", - "docProps/app.xml", - "docProps/core.xml", - "docProps/custom.xml", - "word/document.xml", - "word/fontTable.xml", - "word/footnotes.xml", - "word/comments.xml", - "word/numbering.xml", - "word/settings.xml", - "word/webSettings.xml", - "word/styles.xml", - "word/_rels/document.xml.rels", - "word/_rels/footnotes.xml.rels", - "word/theme/theme1.xml"] - let toLazy = BL.fromChunks . (:[]) - let pathToEntry path = do - epochtime <- floor . utcTimeToPOSIXSeconds <$> getTimestamp - contents <- toLazy <$> readDataFile ("docx/" ++ path) - return $ toEntry path epochtime contents - datadir <- getUserDataDir - mbArchive <- case datadir of - Nothing -> return Nothing - Just d -> do - exists <- fileExists (d "reference.docx") - if exists - then return (Just (d "reference.docx")) - else return Nothing - case mbArchive of - Just arch -> toArchive <$> readFileLazy arch - Nothing -> foldr addEntryToArchive emptyArchive <$> - mapM pathToEntry paths - --- | Retrieve default reference.odt. -getDefaultReferenceODT :: PandocMonad m => m Archive -getDefaultReferenceODT = do - let paths = ["mimetype", - "manifest.rdf", - "styles.xml", - "content.xml", - "meta.xml", - "settings.xml", - "Configurations2/accelerator/current.xml", - "Thumbnails/thumbnail.png", - "META-INF/manifest.xml"] - let pathToEntry path = do epochtime <- floor `fmap` getPOSIXTime - contents <- (BL.fromChunks . (:[])) `fmap` - readDataFile ("odt/" ++ path) - return $ toEntry path epochtime contents - datadir <- getUserDataDir - mbArchive <- case datadir of - Nothing -> return Nothing - Just d -> do - exists <- fileExists (d "reference.odt") - if exists - then return (Just (d "reference.odt")) - else return Nothing - case mbArchive of - Just arch -> toArchive <$> readFileLazy arch - Nothing -> foldr addEntryToArchive emptyArchive <$> - mapM pathToEntry paths - --- | Retrieve default reference.pptx. -getDefaultReferencePptx :: PandocMonad m => m Archive -getDefaultReferencePptx = do - -- We're going to narrow this down substantially once we get it - -- working. - let paths = [ "[Content_Types].xml" - , "_rels/.rels" - , "docProps/app.xml" - , "docProps/core.xml" - , "ppt/_rels/presentation.xml.rels" - , "ppt/presProps.xml" - , "ppt/presentation.xml" - , "ppt/slideLayouts/_rels/slideLayout1.xml.rels" - , "ppt/slideLayouts/_rels/slideLayout2.xml.rels" - , "ppt/slideLayouts/_rels/slideLayout3.xml.rels" - , "ppt/slideLayouts/_rels/slideLayout4.xml.rels" - , "ppt/slideLayouts/_rels/slideLayout5.xml.rels" - , "ppt/slideLayouts/_rels/slideLayout6.xml.rels" - , "ppt/slideLayouts/_rels/slideLayout7.xml.rels" - , "ppt/slideLayouts/_rels/slideLayout8.xml.rels" - , "ppt/slideLayouts/_rels/slideLayout9.xml.rels" - , "ppt/slideLayouts/_rels/slideLayout10.xml.rels" - , "ppt/slideLayouts/_rels/slideLayout11.xml.rels" - , "ppt/slideLayouts/slideLayout1.xml" - , "ppt/slideLayouts/slideLayout10.xml" - , "ppt/slideLayouts/slideLayout11.xml" - , "ppt/slideLayouts/slideLayout2.xml" - , "ppt/slideLayouts/slideLayout3.xml" - , "ppt/slideLayouts/slideLayout4.xml" - , "ppt/slideLayouts/slideLayout5.xml" - , "ppt/slideLayouts/slideLayout6.xml" - , "ppt/slideLayouts/slideLayout7.xml" - , "ppt/slideLayouts/slideLayout8.xml" - , "ppt/slideLayouts/slideLayout9.xml" - , "ppt/slideMasters/_rels/slideMaster1.xml.rels" - , "ppt/slideMasters/slideMaster1.xml" - , "ppt/slides/_rels/slide1.xml.rels" - , "ppt/slides/slide1.xml" - , "ppt/slides/_rels/slide2.xml.rels" - , "ppt/slides/slide2.xml" - , "ppt/slides/_rels/slide3.xml.rels" - , "ppt/slides/slide3.xml" - , "ppt/slides/_rels/slide4.xml.rels" - , "ppt/slides/slide4.xml" - , "ppt/tableStyles.xml" - , "ppt/theme/theme1.xml" - , "ppt/viewProps.xml" - -- These relate to notes slides. - , "ppt/notesMasters/notesMaster1.xml" - , "ppt/notesMasters/_rels/notesMaster1.xml.rels" - , "ppt/notesSlides/notesSlide1.xml" - , "ppt/notesSlides/_rels/notesSlide1.xml.rels" - , "ppt/notesSlides/notesSlide2.xml" - , "ppt/notesSlides/_rels/notesSlide2.xml.rels" - , "ppt/theme/theme2.xml" - ] - let toLazy = BL.fromChunks . (:[]) - let pathToEntry path = do - epochtime <- floor . utcTimeToPOSIXSeconds <$> getCurrentTime - contents <- toLazy <$> readDataFile ("pptx/" ++ path) - return $ toEntry path epochtime contents - datadir <- getUserDataDir - mbArchive <- case datadir of - Nothing -> return Nothing - Just d -> do - exists <- fileExists (d "reference.pptx") - if exists - then return (Just (d "reference.pptx")) - else return Nothing - case mbArchive of - Just arch -> toArchive <$> readFileLazy arch - Nothing -> foldr addEntryToArchive emptyArchive <$> - mapM pathToEntry paths - -- | Checks if the file path is relative to a parent directory. isRelativeToParentDir :: FilePath -> Bool isRelativeToParentDir fname = @@ -590,19 +384,6 @@ checkUserDataDir fname = then getUserDataDir else return Nothing ---- | Read file from user data directory or, ---- if not found there, from the default data files. -readDataFile :: PandocMonad m => FilePath -> m B.ByteString -readDataFile fname = do - datadir <- checkUserDataDir fname - case datadir of - Nothing -> readDefaultDataFile fname - Just userDir -> do - exists <- fileExists (userDir fname) - if exists - then readFileStrict (userDir fname) - else readDefaultDataFile fname - -- | Read metadata file from the working directory or, if not found there, from -- the metadata subdirectory of the user data directory. readMetadataFile :: PandocMonad m => FilePath -> m B.ByteString @@ -610,42 +391,6 @@ readMetadataFile fname = findFileWithDataFallback "metadata" fname >>= \case Nothing -> throwError $ PandocCouldNotFindMetadataFileError (T.pack fname) Just metadataFile -> readFileStrict metadataFile --- | Read file from from the default data files. -readDefaultDataFile :: PandocMonad m => FilePath -> m B.ByteString -readDefaultDataFile "reference.docx" = - B.concat . BL.toChunks . fromArchive <$> getDefaultReferenceDocx -readDefaultDataFile "reference.pptx" = - B.concat . BL.toChunks . fromArchive <$> getDefaultReferencePptx -readDefaultDataFile "reference.odt" = - B.concat . BL.toChunks . fromArchive <$> getDefaultReferenceODT -readDefaultDataFile fname = -#ifdef EMBED_DATA_FILES - case lookup (makeCanonical fname) dataFiles of - Nothing -> throwError $ PandocCouldNotFindDataFileError $ T.pack fname - Just contents -> return contents -#else - getDataFileName fname' >>= checkExistence >>= readFileStrict - where fname' = if fname == "MANUAL.txt" then fname else "data" fname - --- | Returns the input filename unchanged if the file exits, and throws --- a `PandocCouldNotFindDataFileError` if it doesn't. -checkExistence :: PandocMonad m => FilePath -> m FilePath -checkExistence fn = do - exists <- fileExists fn - if exists - then return fn - else throwError $ PandocCouldNotFindDataFileError $ T.pack fn -#endif - --- | Canonicalizes a file path by removing redundant @.@ and @..@. -makeCanonical :: FilePath -> FilePath -makeCanonical = Posix.joinPath . transformPathParts . splitDirectories - where transformPathParts = reverse . foldl' go [] - go as "." = as - go ("..":as) ".." = ["..", ".."] <> as - go (_:as) ".." = as - go as x = x : as - -- | Tries to run an action on a file: for each directory given, a -- filepath is created from the given filename, and the action is run on -- that filepath. Returns the result of the first successful execution diff --git a/src/Text/Pandoc/Class/PandocPure.hs b/src/Text/Pandoc/Class/PandocPure.hs index 290a6d97c50f..c86b20a05bcb 100644 --- a/src/Text/Pandoc/Class/PandocPure.hs +++ b/src/Text/Pandoc/Class/PandocPure.hs @@ -29,8 +29,17 @@ module Text.Pandoc.Class.PandocPure ) where import Codec.Archive.Zip +import Control.Monad.Trans ( MonadTrans(lift) ) import Control.Monad.Except + ( ExceptT(..), MonadError(throwError), runExceptT ) import Control.Monad.State.Strict + ( StateT(StateT), + State, + MonadState(put, get), + modify, + evalState, + evalStateT ) +import Control.Monad (foldM) import Data.Default import Data.Text (Text) import Data.Time (UTCTime) diff --git a/src/Text/Pandoc/Class/Sandbox.hs b/src/Text/Pandoc/Class/Sandbox.hs index 4243a7be5bb4..64547be924f2 100644 --- a/src/Text/Pandoc/Class/Sandbox.hs +++ b/src/Text/Pandoc/Class/Sandbox.hs @@ -47,4 +47,3 @@ sandbox files action = do mapM_ logOutput (filter ((<= verbosity) . messageVerbosity) newMessages) return result - diff --git a/src/Text/Pandoc/Data.hs b/src/Text/Pandoc/Data.hs index fe543edfac0f..6644428d7740 100644 --- a/src/Text/Pandoc/Data.hs +++ b/src/Text/Pandoc/Data.hs @@ -1,4 +1,8 @@ +{-# LANGUAGE CPP #-} +{-# LANGUAGE ScopedTypeVariables #-} +#ifdef EMBED_DATA_FILES {-# LANGUAGE TemplateHaskell #-} +#endif {- | Module : Text.Pandoc.Data Copyright : Copyright (C) 2013-2022 John MacFarlane @@ -8,25 +12,236 @@ Maintainer : John MacFarlane Stability : alpha Portability : portable -Provide contents data files as Haskell values. +Access to pandoc's data files. -} -module Text.Pandoc.Data (dataFiles) where - +module Text.Pandoc.Data ( readDefaultDataFile + , readDataFile + , getDataFileNames + , defaultUserDataDir + ) where +import Text.Pandoc.Class (PandocMonad(..), checkUserDataDir, getTimestamp, + getUserDataDir, getPOSIXTime) +import Data.Time.Clock.POSIX (utcTimeToPOSIXSeconds) +import qualified Data.ByteString.Lazy as BL import qualified Data.ByteString as B -import Data.FileEmbed -import System.FilePath (splitDirectories) -import qualified System.FilePath.Posix as Posix - --- We ensure that the data files are stored using Posix --- path separators (/), even on Windows. -dataFiles :: [(FilePath, B.ByteString)] -dataFiles = map (\(fp, contents) -> - (Posix.joinPath (splitDirectories fp), contents)) dataFiles' - -dataFiles' :: [(FilePath, B.ByteString)] -dataFiles' = ("MANUAL.txt", $(embedFile "MANUAL.txt")) : - -- handle the hidden file separately, since embedDir doesn't - -- include it: - ("docx/_rels/.rels", $(embedFile "data/docx/_rels/.rels")) : - ("pptx/_rels/.rels", $(embedFile "data/pptx/_rels/.rels")) : - $(embedDir "data") +import Codec.Archive.Zip +import qualified Data.Text as T +import Control.Monad.Except (throwError) +import Text.Pandoc.Error (PandocError(..)) +import System.FilePath +import System.Directory +import qualified Control.Exception as E +#ifdef EMBED_DATA_FILES +import Text.Pandoc.Data.BakedIn (dataFiles) +import Text.Pandoc.Shared (makeCanonical) +#else +import Paths_pandoc (getDataDir) +#endif + +-- | Read file from from the default data files. +readDefaultDataFile :: PandocMonad m => FilePath -> m B.ByteString +readDefaultDataFile "reference.docx" = + B.concat . BL.toChunks . fromArchive <$> getDefaultReferenceDocx +readDefaultDataFile "reference.pptx" = + B.concat . BL.toChunks . fromArchive <$> getDefaultReferencePptx +readDefaultDataFile "reference.odt" = + B.concat . BL.toChunks . fromArchive <$> getDefaultReferenceODT +readDefaultDataFile fname = +#ifdef EMBED_DATA_FILES + case lookup (makeCanonical fname) dataFiles of + Nothing -> throwError $ PandocCouldNotFindDataFileError $ T.pack fname + Just contents -> return contents +#else + getDataFileName fname' >>= checkExistence >>= readFileStrict + where fname' = if fname == "MANUAL.txt" then fname else "data" fname + +-- | Returns the input filename unchanged if the file exits, and throws +-- a `PandocCouldNotFindDataFileError` if it doesn't. +checkExistence :: PandocMonad m => FilePath -> m FilePath +checkExistence fn = do + exists <- fileExists fn + if exists + then return fn + else throwError $ PandocCouldNotFindDataFileError $ T.pack fn +#endif + +--- | Read file from user data directory or, +--- if not found there, from the default data files. +readDataFile :: PandocMonad m => FilePath -> m B.ByteString +readDataFile fname = do + datadir <- checkUserDataDir fname + case datadir of + Nothing -> readDefaultDataFile fname + Just userDir -> do + exists <- fileExists (userDir fname) + if exists + then readFileStrict (userDir fname) + else readDefaultDataFile fname + +-- | Retrieve default reference.docx. +getDefaultReferenceDocx :: PandocMonad m => m Archive +getDefaultReferenceDocx = do + let paths = ["[Content_Types].xml", + "_rels/.rels", + "docProps/app.xml", + "docProps/core.xml", + "docProps/custom.xml", + "word/document.xml", + "word/fontTable.xml", + "word/footnotes.xml", + "word/comments.xml", + "word/numbering.xml", + "word/settings.xml", + "word/webSettings.xml", + "word/styles.xml", + "word/_rels/document.xml.rels", + "word/_rels/footnotes.xml.rels", + "word/theme/theme1.xml"] + let toLazy = BL.fromChunks . (:[]) + let pathToEntry path = do + epochtime <- floor . utcTimeToPOSIXSeconds <$> getTimestamp + contents <- toLazy <$> readDataFile ("docx/" ++ path) + return $ toEntry path epochtime contents + datadir <- getUserDataDir + mbArchive <- case datadir of + Nothing -> return Nothing + Just d -> do + exists <- fileExists (d "reference.docx") + if exists + then return (Just (d "reference.docx")) + else return Nothing + case mbArchive of + Just arch -> toArchive <$> readFileLazy arch + Nothing -> foldr addEntryToArchive emptyArchive <$> + mapM pathToEntry paths + +-- | Retrieve default reference.odt. +getDefaultReferenceODT :: PandocMonad m => m Archive +getDefaultReferenceODT = do + let paths = ["mimetype", + "manifest.rdf", + "styles.xml", + "content.xml", + "meta.xml", + "settings.xml", + "Configurations2/accelerator/current.xml", + "Thumbnails/thumbnail.png", + "META-INF/manifest.xml"] + let pathToEntry path = do epochtime <- floor `fmap` getPOSIXTime + contents <- (BL.fromChunks . (:[])) `fmap` + readDataFile ("odt/" ++ path) + return $ toEntry path epochtime contents + datadir <- getUserDataDir + mbArchive <- case datadir of + Nothing -> return Nothing + Just d -> do + exists <- fileExists (d "reference.odt") + if exists + then return (Just (d "reference.odt")) + else return Nothing + case mbArchive of + Just arch -> toArchive <$> readFileLazy arch + Nothing -> foldr addEntryToArchive emptyArchive <$> + mapM pathToEntry paths + +-- | Retrieve default reference.pptx. +getDefaultReferencePptx :: PandocMonad m => m Archive +getDefaultReferencePptx = do + -- We're going to narrow this down substantially once we get it + -- working. + let paths = [ "[Content_Types].xml" + , "_rels/.rels" + , "docProps/app.xml" + , "docProps/core.xml" + , "ppt/_rels/presentation.xml.rels" + , "ppt/presProps.xml" + , "ppt/presentation.xml" + , "ppt/slideLayouts/_rels/slideLayout1.xml.rels" + , "ppt/slideLayouts/_rels/slideLayout2.xml.rels" + , "ppt/slideLayouts/_rels/slideLayout3.xml.rels" + , "ppt/slideLayouts/_rels/slideLayout4.xml.rels" + , "ppt/slideLayouts/_rels/slideLayout5.xml.rels" + , "ppt/slideLayouts/_rels/slideLayout6.xml.rels" + , "ppt/slideLayouts/_rels/slideLayout7.xml.rels" + , "ppt/slideLayouts/_rels/slideLayout8.xml.rels" + , "ppt/slideLayouts/_rels/slideLayout9.xml.rels" + , "ppt/slideLayouts/_rels/slideLayout10.xml.rels" + , "ppt/slideLayouts/_rels/slideLayout11.xml.rels" + , "ppt/slideLayouts/slideLayout1.xml" + , "ppt/slideLayouts/slideLayout10.xml" + , "ppt/slideLayouts/slideLayout11.xml" + , "ppt/slideLayouts/slideLayout2.xml" + , "ppt/slideLayouts/slideLayout3.xml" + , "ppt/slideLayouts/slideLayout4.xml" + , "ppt/slideLayouts/slideLayout5.xml" + , "ppt/slideLayouts/slideLayout6.xml" + , "ppt/slideLayouts/slideLayout7.xml" + , "ppt/slideLayouts/slideLayout8.xml" + , "ppt/slideLayouts/slideLayout9.xml" + , "ppt/slideMasters/_rels/slideMaster1.xml.rels" + , "ppt/slideMasters/slideMaster1.xml" + , "ppt/slides/_rels/slide1.xml.rels" + , "ppt/slides/slide1.xml" + , "ppt/slides/_rels/slide2.xml.rels" + , "ppt/slides/slide2.xml" + , "ppt/slides/_rels/slide3.xml.rels" + , "ppt/slides/slide3.xml" + , "ppt/slides/_rels/slide4.xml.rels" + , "ppt/slides/slide4.xml" + , "ppt/tableStyles.xml" + , "ppt/theme/theme1.xml" + , "ppt/viewProps.xml" + -- These relate to notes slides. + , "ppt/notesMasters/notesMaster1.xml" + , "ppt/notesMasters/_rels/notesMaster1.xml.rels" + , "ppt/notesSlides/notesSlide1.xml" + , "ppt/notesSlides/_rels/notesSlide1.xml.rels" + , "ppt/notesSlides/notesSlide2.xml" + , "ppt/notesSlides/_rels/notesSlide2.xml.rels" + , "ppt/theme/theme2.xml" + ] + let toLazy = BL.fromChunks . (:[]) + let pathToEntry path = do + epochtime <- floor <$> getPOSIXTime + contents <- toLazy <$> readDataFile ("pptx/" ++ path) + return $ toEntry path epochtime contents + datadir <- getUserDataDir + mbArchive <- case datadir of + Nothing -> return Nothing + Just d -> do + exists <- fileExists (d "reference.pptx") + if exists + then return (Just (d "reference.pptx")) + else return Nothing + case mbArchive of + Just arch -> toArchive <$> readFileLazy arch + Nothing -> foldr addEntryToArchive emptyArchive <$> + mapM pathToEntry paths + +getDataFileNames :: IO [FilePath] +getDataFileNames = do +#ifdef EMBED_DATA_FILES + let allDataFiles = map fst dataFiles +#else + allDataFiles <- filter (\x -> x /= "." && x /= "..") <$> + (getDataDir >>= getDirectoryContents) +#endif + return $ "reference.docx" : "reference.odt" : "reference.pptx" : allDataFiles + +-- | Return appropriate user data directory for platform. We use +-- XDG_DATA_HOME (or its default value), but for backwards compatibility, +-- we fall back to the legacy user data directory ($HOME/.pandoc on *nix) +-- if the XDG_DATA_HOME is missing and this exists. If neither directory +-- is present, we return the XDG data directory. If the XDG data directory +-- is not defined (e.g. because we are in an environment where $HOME is +-- not defined), we return the empty string. +defaultUserDataDir :: IO FilePath +defaultUserDataDir = do + xdgDir <- E.catch (getXdgDirectory XdgData "pandoc") + (\(_ :: E.SomeException) -> return mempty) + legacyDir <- getAppUserDataDirectory "pandoc" + xdgExists <- doesDirectoryExist xdgDir + legacyDirExists <- doesDirectoryExist legacyDir + if not xdgExists && legacyDirExists + then return legacyDir + else return xdgDir diff --git a/src/Text/Pandoc/Data/BakedIn.hs b/src/Text/Pandoc/Data/BakedIn.hs new file mode 100644 index 000000000000..801a631814cc --- /dev/null +++ b/src/Text/Pandoc/Data/BakedIn.hs @@ -0,0 +1,32 @@ +{-# LANGUAGE TemplateHaskell #-} +{- | +Module : Text.Pandoc.Data.BakedIn +Copyright : Copyright (C) 2013-2022 John MacFarlane +License : GNU GPL, version 2 or above + +Maintainer : John MacFarlane +Stability : alpha +Portability : portable + +Provide contents data files as Haskell values. +-} +module Text.Pandoc.Data.BakedIn (dataFiles) where + +import qualified Data.ByteString as B +import Data.FileEmbed +import System.FilePath (splitDirectories) +import qualified System.FilePath.Posix as Posix + +-- We ensure that the data files are stored using Posix +-- path separators (/), even on Windows. +dataFiles :: [(FilePath, B.ByteString)] +dataFiles = map (\(fp, contents) -> + (Posix.joinPath (splitDirectories fp), contents)) dataFiles' + +dataFiles' :: [(FilePath, B.ByteString)] +dataFiles' = ("MANUAL.txt", $(embedFile "MANUAL.txt")) : + -- handle the hidden file separately, since embedDir doesn't + -- include it: + ("docx/_rels/.rels", $(embedFile "data/docx/_rels/.rels")) : + ("pptx/_rels/.rels", $(embedFile "data/pptx/_rels/.rels")) : + $(embedDir "data") diff --git a/src/Text/Pandoc/Error.hs b/src/Text/Pandoc/Error.hs index 3e2479d61f2c..434b37240f7b 100644 --- a/src/Text/Pandoc/Error.hs +++ b/src/Text/Pandoc/Error.hs @@ -23,18 +23,13 @@ import Control.Exception (Exception, displayException) import Data.Typeable (Typeable) import Data.Word (Word8) import Data.Text (Text) -import Data.List (sortOn) import qualified Data.Text as T -import Data.Ord (Down(..)) import GHC.Generics (Generic) import Network.HTTP.Client (HttpException) import System.Exit (ExitCode (..), exitWith) import System.IO (stderr) import qualified Text.Pandoc.UTF8 as UTF8 -import Text.Pandoc.Sources (Sources(..)) import Text.Printf (printf) -import Text.Parsec.Error -import Text.Parsec.Pos hiding (Line) import Text.Pandoc.Shared (tshow) import Citeproc (CiteprocError, prettyCiteprocError) @@ -43,7 +38,6 @@ data PandocError = PandocIOError Text IOError | PandocShouldNeverHappenError Text | PandocSomeError Text | PandocParseError Text - | PandocParsecError Sources ParseError | PandocMakePDFError Text | PandocOptionError Text | PandocSyntaxMapError Text @@ -53,6 +47,7 @@ data PandocError = PandocIOError Text IOError | PandocXMLError Text Text | PandocFilterError Text Text | PandocLuaError Text + | PandocNoScriptingEngine | PandocCouldNotFindDataFileError Text | PandocCouldNotFindMetadataFileError Text | PandocResourceNotFound Text @@ -63,6 +58,7 @@ data PandocError = PandocIOError Text IOError | PandocUTF8DecodingError Text Int Word8 | PandocIpynbDecodingError Text | PandocUnsupportedCharsetError Text + | PandocFormatError Text Text | PandocUnknownReaderError Text | PandocUnknownWriterError Text | PandocUnsupportedExtensionError Text Text @@ -83,28 +79,6 @@ renderError e = "Please report this to pandoc's developers: " <> s PandocSomeError s -> s PandocParseError s -> s - PandocParsecError (Sources inputs) err' -> - let errPos = errorPos err' - errLine = sourceLine errPos - errColumn = sourceColumn errPos - errFile = sourceName errPos - errorInFile = - case sortOn (Down . sourceLine . fst) - [ (pos,t) - | (pos,t) <- inputs - , sourceName pos == errFile - , sourceLine pos <= errLine - ] of - [] -> "" - ((pos,txt):_) -> - let ls = T.lines txt <> [""] - ln = (errLine - sourceLine pos) + 1 - in if length ls > ln && ln >= 1 - then T.concat ["\n", ls !! (ln - 1) - ,"\n", T.replicate (errColumn - 1) " " - ,"^"] - else "" - in "Error at " <> tshow err' <> errorInFile PandocMakePDFError s -> s PandocOptionError s -> s PandocSyntaxMapError s -> s @@ -117,6 +91,8 @@ renderError e = PandocFilterError filtername msg -> "Error running filter " <> filtername <> ":\n" <> msg PandocLuaError msg -> "Error running Lua:\n" <> msg + PandocNoScriptingEngine -> "This version of pandoc has been compiled " <> + "without Lua support." PandocCouldNotFindDataFileError fn -> "Could not find data file " <> fn PandocCouldNotFindMetadataFileError fn -> @@ -137,6 +113,8 @@ renderError e = "ipynb decoding error: " <> w PandocUnsupportedCharsetError charset -> "Unsupported charset " <> charset + PandocFormatError format s -> + "Error parsing format " <> tshow format <> ": " <> s PandocUnknownReaderError r -> "Unknown input format " <> r <> case r of @@ -178,6 +156,7 @@ handleError (Left e) = PandocAppError{} -> 4 PandocTemplateError{} -> 5 PandocOptionError{} -> 6 + PandocFormatError{} -> 20 PandocUnknownReaderError{} -> 21 PandocUnknownWriterError{} -> 22 PandocUnsupportedExtensionError{} -> 23 @@ -191,11 +170,11 @@ handleError (Left e) = PandocShouldNeverHappenError{} -> 62 PandocSomeError{} -> 63 PandocParseError{} -> 64 - PandocParsecError{} -> 65 PandocMakePDFError{} -> 66 PandocSyntaxMapError{} -> 67 PandocFilterError{} -> 83 PandocLuaError{} -> 84 + PandocNoScriptingEngine -> 89 PandocMacroLoop{} -> 91 PandocUTF8DecodingError{} -> 92 PandocIpynbDecodingError{} -> 93 diff --git a/src/Text/Pandoc/Extensions.hs b/src/Text/Pandoc/Extensions.hs index cc4f0c7303ac..1751295ad632 100644 --- a/src/Text/Pandoc/Extensions.hs +++ b/src/Text/Pandoc/Extensions.hs @@ -1,8 +1,6 @@ {-# LANGUAGE CPP #-} {-# LANGUAGE DeriveDataTypeable #-} {-# LANGUAGE DeriveGeneric #-} -{-# LANGUAGE GeneralizedNewtypeDeriving #-} -{-# LANGUAGE TemplateHaskell #-} {-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Extensions @@ -16,10 +14,12 @@ Data structures and functions for representing markup extensions. -} module Text.Pandoc.Extensions ( Extension(..) + , readExtension + , showExtension , Extensions , emptyExtensions , extensionsFromList - , parseFormatSpec + , extensionsToList , extensionEnabled , enableExtension , disableExtension @@ -32,16 +32,14 @@ module Text.Pandoc.Extensions ( Extension(..) , githubMarkdownExtensions , multimarkdownExtensions ) where -import Data.Bits (clearBit, setBit, testBit, (.|.)) import Data.Data (Data) -import Data.List (foldl') import qualified Data.Text as T import Data.Typeable (Typeable) import GHC.Generics (Generic) -import Safe (readMay) -import Text.Parsec -import Data.Aeson.TH (deriveJSON) +import Text.Read (readMaybe) import Data.Aeson +import Data.List (sort) +import qualified Data.Set as Set -- | Individually selectable syntax extensions. data Extension = @@ -137,41 +135,62 @@ data Extension = | Ext_xrefs_name -- ^ Use xrefs with names | Ext_xrefs_number -- ^ Use xrefs with numbers | Ext_yaml_metadata_block -- ^ YAML metadata block - deriving (Show, Read, Enum, Eq, Ord, Bounded, Data, Typeable, Generic) + | CustomExtension T.Text -- ^ Custom extension + deriving (Show, Read, Eq, Ord, Data, Typeable, Generic) -$(deriveJSON defaultOptions{ constructorTagModifier = drop 4 } ''Extension) +instance FromJSON Extension where + parseJSON = withText "Extension" (pure . readExtension . T.unpack) -newtype Extensions = Extensions Integer +instance ToJSON Extension where + toJSON = String . showExtension + +newtype Extensions = Extensions (Set.Set Extension) deriving (Show, Read, Eq, Ord, Data, Typeable, Generic) instance Semigroup Extensions where - (Extensions a) <> (Extensions b) = Extensions (a .|. b) + (Extensions a) <> (Extensions b) = Extensions (a <> b) instance Monoid Extensions where - mempty = Extensions 0 + mempty = Extensions mempty mappend = (<>) instance FromJSON Extensions where - parseJSON = - return . foldr enableExtension emptyExtensions . fromJSON + parseJSON = fmap extensionsFromList . parseJSON instance ToJSON Extensions where - toJSON exts = toJSON $ - [ext | ext <- [minBound..maxBound], extensionEnabled ext exts] + toJSON (Extensions exts) = toJSON exts + +-- | Reads a single extension from a string. +readExtension :: String -> Extension +readExtension "lhs" = Ext_literate_haskell +readExtension name = + case readMaybe ("Ext_" ++ name) of + Just ext -> ext + Nothing -> CustomExtension (T.pack name) + +-- | Show an extension in human-readable form. +showExtension :: Extension -> T.Text +showExtension ext = + case ext of + CustomExtension t -> t + _ -> T.drop 4 $ T.pack $ show ext extensionsFromList :: [Extension] -> Extensions -extensionsFromList = foldr enableExtension emptyExtensions +extensionsFromList = Extensions . Set.fromList + +extensionsToList :: Extensions -> [Extension] +extensionsToList (Extensions extset) = sort $ Set.toList extset emptyExtensions :: Extensions -emptyExtensions = Extensions 0 +emptyExtensions = Extensions mempty extensionEnabled :: Extension -> Extensions -> Bool -extensionEnabled x (Extensions exts) = testBit exts (fromEnum x) +extensionEnabled x (Extensions exts) = x `Set.member` exts enableExtension :: Extension -> Extensions -> Extensions -enableExtension x (Extensions exts) = Extensions (setBit exts (fromEnum x)) +enableExtension x (Extensions exts) = Extensions (Set.insert x exts) disableExtension :: Extension -> Extensions -> Extensions -disableExtension x (Extensions exts) = Extensions (clearBit exts (fromEnum x)) +disableExtension x (Extensions exts) = Extensions (Set.delete x exts) -- | Extensions to be used with pandoc-flavored markdown. pandocExtensions :: Extensions @@ -605,30 +624,3 @@ getAllExtensions f = universalExtensions <> getAll f extensionsFromList [ Ext_smart ] getAll _ = mempty - - --- | Parse a format-specifying string into a markup format, --- a set of extensions to enable, and a set of extensions to disable. -parseFormatSpec :: T.Text - -> Either ParseError (T.Text, [Extension], [Extension]) -parseFormatSpec = parse formatSpec "" - where formatSpec = do - name <- formatName - (extsToEnable, extsToDisable) <- foldl' (flip ($)) ([],[]) <$> - many extMod - return (T.pack name, reverse extsToEnable, reverse extsToDisable) - formatName = many1 $ noneOf "-+" - extMod = do - polarity <- oneOf "-+" - name <- many $ noneOf "-+" - ext <- case readMay ("Ext_" ++ name) of - Just n -> return n - Nothing - | name == "lhs" -> return Ext_literate_haskell - | otherwise -> unexpected $ - "unknown extension: " ++ name - return $ \(extsToEnable, extsToDisable) -> - case polarity of - '+' -> (ext : extsToEnable, extsToDisable) - _ -> (extsToEnable, ext : extsToDisable) - diff --git a/src/Text/Pandoc/Filter.hs b/src/Text/Pandoc/Filter.hs index 7185fd1e0624..2bfcf9b8a281 100644 --- a/src/Text/Pandoc/Filter.hs +++ b/src/Text/Pandoc/Filter.hs @@ -16,6 +16,7 @@ module Text.Pandoc.Filter ( Filter (..) , Environment (..) , applyFilters + , applyJSONFilter ) where import System.CPUTime (getCPUTime) @@ -28,8 +29,8 @@ import Text.Pandoc.Definition (Pandoc) import Text.Pandoc.Filter.Environment (Environment (..)) import Text.Pandoc.Logging import Text.Pandoc.Citeproc (processCitations) +import Text.Pandoc.Scripting (ScriptingEngine (engineApplyFilter)) import qualified Text.Pandoc.Filter.JSON as JSONFilter -import qualified Text.Pandoc.Filter.Lua as LuaFilter import qualified Data.Text as T import System.FilePath (takeExtension) import Control.Applicative ((<|>)) @@ -73,19 +74,20 @@ instance ToJSON Filter where -- | Modify the given document using a filter. applyFilters :: (PandocMonad m, MonadIO m) - => Environment + => ScriptingEngine + -> Environment -> [Filter] -> [String] -> Pandoc -> m Pandoc -applyFilters fenv filters args d = do +applyFilters scrngin fenv filters args d = do expandedFilters <- mapM expandFilterPath filters foldM applyFilter d expandedFilters where applyFilter doc (JSONFilter f) = withMessages f $ JSONFilter.apply fenv args f doc applyFilter doc (LuaFilter f) = - withMessages f $ LuaFilter.apply fenv args f doc + withMessages f $ engineApplyFilter scrngin fenv args f doc applyFilter doc CiteprocFilter = processCitations doc withMessages f action = do @@ -106,3 +108,11 @@ expandFilterPath CiteprocFilter = return CiteprocFilter filterPath :: PandocMonad m => FilePath -> m FilePath filterPath fp = fromMaybe fp <$> findFileWithDataFallback "filters" fp + +applyJSONFilter :: MonadIO m + => Environment + -> [String] + -> FilePath + -> Pandoc + -> m Pandoc +applyJSONFilter = JSONFilter.apply diff --git a/src/Text/Pandoc/Filter/JSON.hs b/src/Text/Pandoc/Filter/JSON.hs index 16ee4e3d63f8..883e3c7c4726 100644 --- a/src/Text/Pandoc/Filter/JSON.hs +++ b/src/Text/Pandoc/Filter/JSON.hs @@ -27,7 +27,8 @@ import Text.Pandoc.Definition (Pandoc) import Text.Pandoc.Error (PandocError (PandocFilterError)) import Text.Pandoc.Filter.Environment (Environment (..)) import Text.Pandoc.Process (pipeProcess) -import Text.Pandoc.Shared (pandocVersion, tshow) +import Text.Pandoc.Version (pandocVersionText) +import Text.Pandoc.Shared (tshow) import qualified Control.Exception as E import qualified Text.Pandoc.UTF8 as UTF8 @@ -65,7 +66,7 @@ externalFilter fenv f args' d = liftIO $ do let ropts = envReaderOptions fenv env <- getEnvironment let env' = Just - ( ("PANDOC_VERSION", T.unpack pandocVersion) + ( ("PANDOC_VERSION", T.unpack pandocVersionText) : ("PANDOC_READER_OPTIONS", UTF8.toStringLazy (encode ropts)) : env ) (exitcode, outbs) <- E.handle filterException $ diff --git a/src/Text/Pandoc/Filter/Lua.hs b/src/Text/Pandoc/Filter/Lua.hs deleted file mode 100644 index 047733e6d05d..000000000000 --- a/src/Text/Pandoc/Filter/Lua.hs +++ /dev/null @@ -1,51 +0,0 @@ -{- | - Module : Text.Pandoc.Filter.Lua - Copyright : Copyright (C) 2006-2022 John MacFarlane - License : GNU GPL, version 2 or above - - Maintainer : John MacFarlane - Stability : alpha - Portability : portable - -Apply Lua filters to modify a pandoc documents programmatically. --} -module Text.Pandoc.Filter.Lua (apply) where - -import Control.Exception (throw) -import Control.Monad ((>=>)) -import qualified Data.Text as T -import Text.Pandoc.Class (PandocMonad) -import Control.Monad.Trans (MonadIO) -import Text.Pandoc.Definition (Pandoc) -import Text.Pandoc.Error (PandocError (PandocFilterError, PandocLuaError)) -import Text.Pandoc.Filter.Environment (Environment (..)) -import Text.Pandoc.Lua (Global (..), runLua, runFilterFile, setGlobals) - --- | Run the Lua filter in @filterPath@ for a transformation to the --- target format (first element in args). Pandoc uses Lua init files to --- setup the Lua interpreter. -apply :: (PandocMonad m, MonadIO m) - => Environment - -> [String] - -> FilePath - -> Pandoc - -> m Pandoc -apply fenv args fp doc = do - let format = case args of - (x:_) -> x - _ -> error "Format not supplied for Lua filter" - runLua >=> forceResult fp $ do - setGlobals [ FORMAT $ T.pack format - , PANDOC_READER_OPTIONS (envReaderOptions fenv) - , PANDOC_WRITER_OPTIONS (envWriterOptions fenv) - , PANDOC_SCRIPT_FILE fp - ] - runFilterFile fp doc - -forceResult :: (PandocMonad m, MonadIO m) - => FilePath -> Either PandocError Pandoc -> m Pandoc -forceResult fp eitherResult = case eitherResult of - Right x -> return x - Left err -> throw . PandocFilterError (T.pack fp) $ case err of - PandocLuaError msg -> msg - _ -> T.pack $ show err diff --git a/src/Text/Pandoc/Format.hs b/src/Text/Pandoc/Format.hs new file mode 100644 index 000000000000..a668969edb7b --- /dev/null +++ b/src/Text/Pandoc/Format.hs @@ -0,0 +1,134 @@ +{-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE OverloadedStrings #-} +{- | + Module : Text.Pandoc.Format + Copyright : © 2022 Albert Krewinkel + License : GPL-2.0-or-later + Maintainer : Albert Krewinkel + +Handling of format specifiers for input and output. +-} +module Text.Pandoc.Format + ( FlavoredFormat (..) + , ExtensionsConfig (..) + , ExtensionsDiff (..) + , parseFlavoredFormat + , applyExtensionsDiff + , getExtensionsConfig + ) where + +import Control.Monad.Except (throwError) +import Data.List (foldl') +import System.FilePath (splitExtension) +import Text.Pandoc.Class (PandocMonad) +import Text.Pandoc.Error (PandocError (..)) +import Text.Pandoc.Extensions + ( Extension + , Extensions + , disableExtension + , enableExtension + , extensionEnabled + , getAllExtensions + , getDefaultExtensions + , readExtension + , showExtension + ) +import Text.Pandoc.Parsing +import qualified Data.Text as T + +type Parser = Parsec T.Text () + +-- | Format specifier with the format's name and the lists of extensions +-- to be enabled or disabled. +data FlavoredFormat = FlavoredFormat + { formatName :: T.Text + , formatExtsDiff :: ExtensionsDiff + } deriving (Show) + +-- | Changes to a set of extensions, i.e., list of extensions to be +-- enabled or disabled. +data ExtensionsDiff = ExtensionsDiff + { extsToEnable :: [Extension] + , extsToDisable :: [Extension] + } deriving (Show) + +instance Semigroup ExtensionsDiff where + ExtensionsDiff x1 y1 <> ExtensionsDiff x2 y2 = + ExtensionsDiff (x1 <> x2) (y1 <> y2) + +instance Monoid ExtensionsDiff where + mappend = (<>) + mempty = ExtensionsDiff [] [] + +-- | Describes the properties of a format. +data ExtensionsConfig = ExtensionsConfig + { extsDefault :: Extensions -- ^ Extensions enabled by default + , extsSupported :: Extensions -- ^ Extensions that can be enabled or disabled. + } deriving (Show) + +-- | Returns the extensions configuration of a format. +getExtensionsConfig :: T.Text -> ExtensionsConfig +getExtensionsConfig fmt = ExtensionsConfig + { extsDefault = getDefaultExtensions fmt + , extsSupported = getAllExtensions fmt + } + +-- | Apply the extension changes in the format spec to the extensions +-- given in the format's extensions configuration. Throws an error in +-- case of an unknown or unsupported extension. +applyExtensionsDiff :: PandocMonad m + => ExtensionsConfig + -> FlavoredFormat + -> m Extensions +applyExtensionsDiff extConf (FlavoredFormat fname extsDiff) = do + let unsupported = + filter (\ext -> not $ extensionEnabled ext (extsSupported extConf)) + (extsToEnable extsDiff ++ extsToDisable extsDiff) + case unsupported of + ext:_ -> throwError $ PandocUnsupportedExtensionError (showExtension ext) + fname + [] -> let enabled = foldr enableExtension + (extsDefault extConf) + (extsToEnable extsDiff) + in pure $ foldr disableExtension enabled (extsToDisable extsDiff) + +-- | Parse a format-specifying string into a markup format and the +-- change set to the format's extensions. Throws an error if the spec +-- cannot be parsed or contains an unknown extension. +parseFlavoredFormat :: PandocMonad m + => T.Text + -> m FlavoredFormat +parseFlavoredFormat spec = + -- Paths like `latex-foo-bar.lua` or `latex-smart-citations.lua` + -- should be parsed as the format name. The `-` (or `+`) in the + -- filename would confuse the extensions parser, so, if `spec` looks + -- like a filename, the file's basename is split off into the prefix. + -- Only the remaining part is parsed, and the prefix is appended back + -- to the format after parsing. + case parse (fixSourcePos *> formatSpec) "" spec' of + Right (fname, extsDiff) -> pure (FlavoredFormat (prefix <> fname) extsDiff) + Left err -> throwError $ PandocFormatError spec (T.pack $ show err) + where + fixSourcePos = do + pos <- getPosition + setPosition (incSourceColumn pos (T.length prefix)) + formatSpec = do + name <- parseFormatName + extsDiff <- pExtensionsDiff + return ( T.pack name, extsDiff ) + parseFormatName = many1 $ noneOf "-+" + (prefix, spec') = case splitExtension (T.unpack spec) of + (_, "") -> ("", T.toLower spec) -- no extension + (p,s) -> (T.pack p, T.pack s) + +pExtensionsDiff :: Parser ExtensionsDiff +pExtensionsDiff = foldl' (flip ($)) (ExtensionsDiff [] []) <$> many extMod + where + extMod = do + polarity <- oneOf "-+" + name <- many $ noneOf "-+" + let ext = readExtension name + return $ \extsDiff -> + case polarity of + '+' -> extsDiff{extsToEnable = (ext : extsToEnable extsDiff)} + _ -> extsDiff{extsToDisable = (ext : extsToDisable extsDiff)} diff --git a/src/Text/Pandoc/Highlighting.hs b/src/Text/Pandoc/Highlighting.hs index fac99cbcd8d5..54f62bb820dd 100644 --- a/src/Text/Pandoc/Highlighting.hs +++ b/src/Text/Pandoc/Highlighting.hs @@ -20,6 +20,7 @@ module Text.Pandoc.Highlighting ( highlightingStyles , styleToLaTeX , formatHtmlInline , formatHtmlBlock + , formatHtml4Block , styleToCss , pygments , espresso @@ -236,4 +237,3 @@ lookupHighlightingStyle s Just sty -> return sty Nothing -> throwError $ PandocOptionError $ T.pack $ "Unknown highlight-style " ++ s - diff --git a/src/Text/Pandoc/Lua.hs b/src/Text/Pandoc/Lua.hs deleted file mode 100644 index 3d699b4947a1..000000000000 --- a/src/Text/Pandoc/Lua.hs +++ /dev/null @@ -1,23 +0,0 @@ -{- | - Module : Text.Pandoc.Lua - Copyright : Copyright © 2017-2022 Albert Krewinkel - License : GNU GPL, version 2 or above - - Maintainer : Albert Krewinkel - Stability : alpha - -Running pandoc Lua filters. --} -module Text.Pandoc.Lua - ( runLua - -- * Lua globals - , Global (..) - , setGlobals - -- * Filters - , runFilterFile - ) where - -import Text.Pandoc.Lua.Filter (runFilterFile) -import Text.Pandoc.Lua.Global (Global (..), setGlobals) -import Text.Pandoc.Lua.Init (runLua) -import Text.Pandoc.Lua.Orphans () diff --git a/src/Text/Pandoc/Lua/ErrorConversion.hs b/src/Text/Pandoc/Lua/ErrorConversion.hs deleted file mode 100644 index 3968eba84035..000000000000 --- a/src/Text/Pandoc/Lua/ErrorConversion.hs +++ /dev/null @@ -1,40 +0,0 @@ -{-# OPTIONS_GHC -fno-warn-orphans #-} -{- | - Module : Text.Pandoc.Lua.ErrorConversion - Copyright : © 2020-2022 Albert Krewinkel - License : GNU GPL, version 2 or above - - Maintainer : Albert Krewinkel - Stability : alpha - -Define how Lua errors are converted into @'PandocError'@ Haskell -exceptions, and /vice versa/. --} -module Text.Pandoc.Lua.ErrorConversion - ( addContextToException - ) where - -import HsLua (LuaError, LuaE, resultToEither, runPeek, top) -import Text.Pandoc.Error (PandocError (PandocLuaError)) -import Text.Pandoc.Lua.Marshal.PandocError (pushPandocError, peekPandocError) - -import qualified Data.Text as T -import qualified HsLua as Lua - -addContextToException :: () -addContextToException = undefined - --- | Retrieve a @'PandocError'@ from the Lua stack. -popPandocError :: LuaE PandocError PandocError -popPandocError = do - errResult <- runPeek $ peekPandocError top - case resultToEither errResult of - Right x -> return x - Left err -> return $ PandocLuaError (T.pack err) - --- Ensure conversions between Lua errors and 'PandocError' exceptions --- are possible. -instance LuaError PandocError where - popException = popPandocError - pushException = pushPandocError - luaException = PandocLuaError . T.pack diff --git a/src/Text/Pandoc/Lua/Filter.hs b/src/Text/Pandoc/Lua/Filter.hs deleted file mode 100644 index da8af9a26c64..000000000000 --- a/src/Text/Pandoc/Lua/Filter.hs +++ /dev/null @@ -1,46 +0,0 @@ -{-# LANGUAGE FlexibleContexts #-} -{-# LANGUAGE IncoherentInstances #-} -{-# LANGUAGE OverloadedStrings #-} -{-# LANGUAGE ScopedTypeVariables #-} -{- | -Module : Text.Pandoc.Lua.Filter -Copyright : © 2012-2022 John MacFarlane, - © 2017-2022 Albert Krewinkel -License : GNU GPL, version 2 or above -Maintainer : Albert Krewinkel -Stability : alpha - -Types and functions for running Lua filters. --} -module Text.Pandoc.Lua.Filter - ( runFilterFile - ) where -import Control.Monad ((>=>), (<$!>)) -import HsLua as Lua -import Text.Pandoc.Definition -import Text.Pandoc.Error (PandocError) -import Text.Pandoc.Lua.ErrorConversion () -import Text.Pandoc.Lua.Marshal.AST -import Text.Pandoc.Lua.Marshal.Filter - - --- | Transform document using the filter defined in the given file. -runFilterFile :: FilePath -> Pandoc -> LuaE PandocError Pandoc -runFilterFile filterPath doc = do - oldtop <- gettop - stat <- dofileTrace filterPath - if stat /= Lua.OK - then throwErrorAsException - else do - newtop <- gettop - -- Use the returned filters, or the implicitly defined global - -- filter if nothing was returned. - luaFilters <- forcePeek $ - if newtop - oldtop >= 1 - then peekList peekFilter top - else (:[]) <$!> (liftLua pushglobaltable *> peekFilter top) - settop oldtop - runAll luaFilters doc - -runAll :: [Filter] -> Pandoc -> LuaE PandocError Pandoc -runAll = foldr ((>=>) . applyFully) return diff --git a/src/Text/Pandoc/Lua/Marshal/Context.hs b/src/Text/Pandoc/Lua/Marshal/Context.hs deleted file mode 100644 index 126f3a82d46e..000000000000 --- a/src/Text/Pandoc/Lua/Marshal/Context.hs +++ /dev/null @@ -1,28 +0,0 @@ -{-# OPTIONS_GHC -fno-warn-orphans #-} -{- | - Module : Text.Pandoc.Lua.Marshaling.Context - Copyright : © 2012-2022 John MacFarlane - © 2017-2022 Albert Krewinkel - License : GNU GPL, version 2 or above - - Maintainer : Albert Krewinkel - Stability : alpha - -Marshaling instance for doctemplates Context and its components. --} -module Text.Pandoc.Lua.Marshal.Context () where - -import qualified HsLua as Lua -import HsLua (Pushable) -import Text.DocTemplates (Context(..), Val(..), TemplateTarget) -import Text.DocLayout (render) - -instance (TemplateTarget a, Pushable a) => Pushable (Context a) where - push (Context m) = Lua.push m - -instance (TemplateTarget a, Pushable a) => Pushable (Val a) where - push NullVal = Lua.push () - push (BoolVal b) = Lua.push b - push (MapVal ctx) = Lua.push ctx - push (ListVal xs) = Lua.push xs - push (SimpleVal d) = Lua.push $ render Nothing d diff --git a/src/Text/Pandoc/Lua/Module/Template.hs b/src/Text/Pandoc/Lua/Module/Template.hs deleted file mode 100644 index 967fe31a8133..000000000000 --- a/src/Text/Pandoc/Lua/Module/Template.hs +++ /dev/null @@ -1,61 +0,0 @@ -{-# LANGUAGE OverloadedStrings #-} -{- | - Module : Text.Pandoc.Lua.Module.Template - Copyright : Copyright © 2022 Albert Krewinkel, John MacFarlane - License : GNU GPL, version 2 or above - Maintainer : Albert Krewinkel - -Lua module to handle pandoc templates. --} -module Text.Pandoc.Lua.Module.Template - ( documentedModule - ) where - -import HsLua -import Text.Pandoc.Error (PandocError) -import Text.Pandoc.Lua.Marshal.Template (pushTemplate) -import Text.Pandoc.Lua.PandocLua (PandocLua (unPandocLua), liftPandocLua) -import Text.Pandoc.Templates - (compileTemplate, getDefaultTemplate, runWithPartials, runWithDefaultPartials) - -import qualified Data.Text as T - --- | The "pandoc.template" module. -documentedModule :: Module PandocError -documentedModule = Module - { moduleName = "pandoc.template" - , moduleDescription = T.unlines - [ "Lua functions for pandoc templates." - ] - , moduleFields = [] - , moduleOperations = [] - , moduleFunctions = functions - } - --- | Template module functions. -functions :: [DocumentedFunction PandocError] -functions = - [ defun "compile" - ### (\template mfilepath -> unPandocLua $ - case mfilepath of - Just fp -> runWithPartials (compileTemplate fp template) - Nothing -> runWithDefaultPartials - (compileTemplate "templates/default" template)) - <#> parameter peekText "string" "template" "template string" - <#> opt (stringParam "templ_path" "template path") - =#> functionResult (either failLua pushTemplate) "pandoc Template" - "compiled template" - - , defun "default" - ### (\mformat -> unPandocLua $ do - let getFORMAT = liftPandocLua $ do - getglobal "FORMAT" - forcePeek $ peekText top `lastly` pop 1 - format <- maybe getFORMAT pure mformat - getDefaultTemplate format) - <#> opt (textParam "writer" - "writer for which the template should be returned.") - =#> functionResult pushText "string" - "string representation of the writer's default template" - - ] diff --git a/src/Text/Pandoc/MIME.hs b/src/Text/Pandoc/MIME.hs index 2f9d554c8213..66332d13fd36 100644 --- a/src/Text/Pandoc/MIME.hs +++ b/src/Text/Pandoc/MIME.hs @@ -20,8 +20,10 @@ module Text.Pandoc.MIME ( import Data.List (isPrefixOf, isSuffixOf) import qualified Data.Map as M import qualified Data.Text as T +import qualified Data.Text.Encoding as T import Data.Maybe (fromMaybe, listToMaybe) import Data.Tuple (swap) +import qualified Network.Mime import System.FilePath type MimeType = T.Text @@ -81,37 +83,17 @@ getCharset mt = -- Except for first entry, list borrowed from -- mimeTypesList :: [(T.Text, MimeType)] -mimeTypesList = - [("cpt","image/x-corelphotopaint") - ,("gz","application/x-gzip") - ,("cabal","application/x-cabal") - ,("%","application/x-trash") +mimeTypesList = M.toList (M.map T.decodeUtf8 Network.Mime.defaultMimeMap) ++ + [("%","application/x-trash") ,("323","text/h323") - ,("3gp","video/3gpp") - ,("7z","application/x-7z-compressed") - ,("abw","application/x-abiword") - ,("ai","application/postscript") - ,("aif","audio/x-aiff") - ,("aifc","audio/x-aiff") - ,("aiff","audio/x-aiff") ,("alc","chemical/x-alchemy") ,("art","image/x-jg") - ,("asc","text/plain") - ,("asf","video/x-ms-asf") ,("asn","chemical/x-ncbi-asn1") ,("aso","chemical/x-ncbi-asn1-binary") - ,("asx","video/x-ms-asf") - ,("atom","application/atom") - ,("atomcat","application/atomcat+xml") ,("atomsrv","application/atomserv+xml") - ,("au","audio/basic") - ,("avi","video/x-msvideo") ,("b","chemical/x-molconn-Z") ,("bak","application/x-trash") ,("bat","application/x-msdos-program") - ,("bcpio","application/x-bcpio") - ,("bib","text/x-bibtex") - ,("bin","application/octet-stream") ,("bmp","image/x-ms-bmp") ,("boo","text/x-boo") ,("book","application/x-maker") @@ -119,89 +101,55 @@ mimeTypesList = ,("c","text/x-csrc") ,("c++","text/x-c++src") ,("c3d","chemical/x-chem3d") - ,("cab","application/x-cab") + ,("cabal","application/x-cabal") ,("cac","chemical/x-cache") ,("cache","chemical/x-cache") - ,("cap","application/cap") ,("cascii","chemical/x-cactvs-binary") - ,("cat","application/vnd.ms-pki.seccat") ,("cbin","chemical/x-cactvs-binary") - ,("cbr","application/x-cbr") ,("cbz","application/x-cbz") ,("cc","text/x-c++src") ,("cdf","application/x-cdf") ,("cdr","image/x-coreldraw") ,("cdt","image/x-coreldrawtemplate") - ,("cdx","chemical/x-cdx") - ,("cdy","application/vnd.cinderella") ,("cef","chemical/x-cxf") ,("cer","chemical/x-cerius") ,("chm","chemical/x-chemdraw") ,("chrt","application/x-kchart") - ,("cif","chemical/x-cif") - ,("class","application/java-vm") - ,("cls","text/x-tex") - ,("cmdf","chemical/x-cmdf") - ,("cml","chemical/x-cml") - ,("cod","application/vnd.rim.cod") ,("com","application/x-msdos-program") ,("cpa","chemical/x-compass") - ,("cpio","application/x-cpio") ,("cpp","text/x-c++src") - ,("cpt","application/mac-compactpro") + ,("cpt","image/x-corelphotopaint") ,("crl","application/x-pkcs7-crl") - ,("crt","application/x-x509-ca-cert") ,("csf","chemical/x-cache-csf") - ,("csh","application/x-csh") ,("csm","chemical/x-csml") - ,("csml","chemical/x-csml") - ,("css","text/css") - ,("csv","text/csv") ,("ctab","chemical/x-cactvs-binary") ,("ctx","chemical/x-ctx") - ,("cu","application/cu-seeme") ,("cub","chemical/x-gaussian-cube") ,("cxf","chemical/x-cxf") ,("cxx","text/x-c++src") ,("d","text/x-dsrc") ,("dat","chemical/x-mopac-input") - ,("dcr","application/x-director") - ,("deb","application/x-debian-package") ,("dif","video/dv") ,("diff","text/x-diff") - ,("dir","application/x-director") - ,("djv","image/vnd.djvu") - ,("djvu","image/vnd.djvu") ,("dl","video/dl") ,("dll","application/x-msdos-program") - ,("dmg","application/x-apple-diskimage") ,("dms","application/x-dms") - ,("doc","application/msword") - ,("dot","application/msword") - ,("dv","video/dv") - ,("dvi","application/x-dvi") ,("dx","chemical/x-jcamp-dx") - ,("dxr","application/x-director") ,("emb","chemical/x-embl-dl-nucleotide") ,("embl","chemical/x-embl-dl-nucleotide") ,("emf","image/x-emf") - ,("emz","application/x-msmetafile") - ,("eml","message/rfc822") ,("ent","chemical/x-ncbi-asn1-ascii") - ,("eot","application/vnd.ms-fontobject") + + -- The type used in mime-types is `application/postscript`, + -- but code in Text.Pandoc.PDF relies on the type being + -- `application/eps`. Do not remove without updating that + -- module. ,("eps","application/eps") - ,("etx","text/x-setext") - ,("exe","application/x-msdos-program") - ,("ez","application/andrew-inset") + ,("fb","application/x-maker") ,("fbdoc","application/x-maker") ,("fch","chemical/x-gaussian-checkpoint") ,("fchk","chemical/x-gaussian-checkpoint") - ,("fig","application/x-xfig") - ,("flac","application/x-flac") - ,("fli","video/fli") - ,("fm","application/x-maker") - ,("frame","application/x-maker") ,("frm","application/x-maker") ,("fs","text/plain") ,("gal","chemical/x-gaussian-log") @@ -212,164 +160,66 @@ mimeTypesList = ,("gcf","application/x-graphing-calculator") ,("gcg","chemical/x-gcg8-sequence") ,("gen","chemical/x-genbank") - ,("gf","application/x-tex-gf") - ,("gif","image/gif") ,("gjc","chemical/x-gaussian-input") ,("gjf","chemical/x-gaussian-input") ,("gl","video/gl") ,("glsl","text/plain") - ,("gnumeric","application/x-gnumeric") ,("gpt","chemical/x-mopac-graph") - ,("gsf","application/x-font") ,("gsm","audio/x-gsm") - ,("gtar","application/x-gtar") ,("h","text/x-chdr") ,("h++","text/x-c++hdr") - ,("hdf","application/x-hdf") ,("hh","text/x-c++hdr") ,("hin","chemical/x-hin") ,("hpp","text/x-c++hdr") - ,("hqx","application/mac-binhex40") ,("hs","text/x-haskell") ,("hta","application/hta") - ,("htc","text/x-component") - ,("htm","text/html") - ,("html","text/html") ,("hxx","text/x-c++hdr") ,("ica","application/x-ica") - ,("ice","x-conference/x-cooltalk") - ,("ico","image/x-icon") - ,("ics","text/calendar") ,("icz","text/calendar") - ,("ief","image/ief") - ,("iges","model/iges") - ,("igs","model/iges") ,("iii","application/x-iphone") ,("inp","chemical/x-gamess-input") ,("ins","application/x-internet-signup") - ,("iso","application/x-iso9660-image") ,("isp","application/x-internet-signup") ,("ist","chemical/x-isostar") ,("istr","chemical/x-isostar") - ,("jad","text/vnd.sun.j2me.app-descriptor") - ,("jar","application/java-archive") - ,("java","text/x-java") ,("jdx","chemical/x-jcamp-dx") - ,("jmz","application/x-jmol") - ,("jng","image/x-jng") - ,("jnlp","application/x-java-jnlp-file") - ,("jpe","image/jpeg") - ,("jpeg","image/jpeg") ,("jfif","image/jpeg") - ,("jpg","image/jpeg") - ,("js","application/javascript") - ,("kar","audio/midi") + ,("jmz","application/x-jmol") ,("key","application/pgp-keys") ,("kil","application/x-killustrator") ,("kin","chemical/x-kinemage") - ,("kml","application/vnd.google-earth.kml+xml") - ,("kmz","application/vnd.google-earth.kmz") - ,("kpr","application/x-kpresenter") - ,("kpt","application/x-kpresenter") - ,("ksp","application/x-kspread") - ,("kwd","application/x-kword") - ,("kwt","application/x-kword") - ,("latex","application/x-latex") - ,("lha","application/x-lha") ,("lhs","text/x-literate-haskell") ,("lsf","video/x-la-asf") ,("lsx","video/x-la-asf") - ,("ltx","text/x-tex") ,("lyx","application/x-lyx") ,("lzh","application/x-lzh") ,("lzx","application/x-lzx") - ,("m3u","audio/mpegurl") - ,("m4a","audio/mpeg") - ,("m4v","video/x-m4v") - ,("maker","application/x-maker") ,("man","application/x-troff-man") ,("mcif","chemical/x-mmcif") ,("mcm","chemical/x-macmolecule") ,("mdb","application/msaccess") ,("me","application/x-troff-me") - ,("mesh","model/mesh") - ,("mid","audio/midi") - ,("midi","audio/midi") - ,("mif","application/x-mif") - ,("mkv","video/x-matroska") ,("mm","application/x-freemind") ,("mmd","chemical/x-macromodel-input") - ,("mmf","application/vnd.smaf") - ,("mml","text/mathml") ,("mmod","chemical/x-macromodel-input") - ,("mng","video/x-mng") ,("moc","text/x-moc") ,("mol","chemical/x-mdl-molfile") ,("mol2","chemical/x-mol2") ,("moo","chemical/x-mopac-out") ,("mop","chemical/x-mopac-input") ,("mopcrt","chemical/x-mopac-input") - ,("mov","video/quicktime") - ,("movie","video/x-sgi-movie") - ,("mp2","audio/mpeg") - ,("mp3","audio/mpeg") - ,("mp4","video/mp4") ,("mpc","chemical/x-mopac-input") - ,("mpe","video/mpeg") - ,("mpeg","video/mpeg") ,("mpega","audio/mpeg") - ,("mpg","video/mpeg") - ,("mpga","audio/mpeg") ,("ms","application/x-troff-ms") - ,("msh","model/mesh") ,("msi","application/x-msi") ,("mvb","chemical/x-mopac-vib") - ,("mxu","video/vnd.mpegurl") - ,("nb","application/mathematica") - ,("nc","application/x-netcdf") ,("nwc","application/x-nwc") ,("o","application/x-object") - ,("oda","application/oda") - ,("odb","application/vnd.oasis.opendocument.database") - ,("odc","application/vnd.oasis.opendocument.chart") - ,("odf","application/vnd.oasis.opendocument.formula") - ,("odg","application/vnd.oasis.opendocument.graphics") - ,("odi","application/vnd.oasis.opendocument.image") - ,("odm","application/vnd.oasis.opendocument.text-master") - ,("odp","application/vnd.oasis.opendocument.presentation") - ,("ods","application/vnd.oasis.opendocument.spreadsheet") - ,("odt","application/vnd.oasis.opendocument.text") - ,("oga","audio/ogg") - ,("ogg","application/ogg") - ,("ogv","video/ogg") - ,("ogx","application/ogg") ,("old","application/x-trash") - ,("opus","audio/ogg") - ,("otg","application/vnd.oasis.opendocument.graphics-template") - ,("oth","application/vnd.oasis.opendocument.text-web") - ,("otp","application/vnd.oasis.opendocument.presentation-template") - ,("ots","application/vnd.oasis.opendocument.spreadsheet-template") - ,("otf","font/otf") - ,("ott","application/vnd.oasis.opendocument.text-template") ,("oza","application/x-oz-application") - ,("p","text/x-pascal") - ,("p7r","application/x-pkcs7-certreqresp") - ,("pac","application/x-ns-proxy-autoconfig") - ,("pas","text/x-pascal") ,("pat","image/x-coreldrawpattern") ,("patch","text/x-diff") - ,("pbm","image/x-portable-bitmap") - ,("pcap","application/cap") - ,("pcf","application/x-font") - ,("pcf.Z","application/x-font") - ,("pcx","image/pcx") ,("pdb","chemical/x-pdb") - ,("pdf","application/pdf") - ,("pfa","application/x-font") - ,("pfb","application/x-font") - ,("pgm","image/x-portable-graymap") - ,("pgn","application/x-chess-pgn") - ,("pgp","application/pgp-signature") ,("php","application/x-httpd-php") ,("php3","application/x-httpd-php3") ,("php3p","application/x-httpd-php3-preprocessed") @@ -378,171 +228,46 @@ mimeTypesList = ,("pht","application/x-httpd-php") ,("phtml","application/x-httpd-php") ,("pk","application/x-tex-pk") - ,("pl","text/x-perl") ,("pls","audio/x-scpls") - ,("pm","text/x-perl") - ,("png","image/png") - ,("pnm","image/x-portable-anymap") ,("pot","text/plain") - ,("ppm","image/x-portable-pixmap") - ,("pps","application/vnd.ms-powerpoint") - ,("ppt","application/vnd.ms-powerpoint") - ,("prf","application/pics-rules") ,("prt","chemical/x-ncbi-asn1-ascii") - ,("ps","application/postscript") - ,("psd","image/x-photoshop") ,("py","text/x-python") ,("pyc","application/x-python-code") ,("pyo","application/x-python-code") - ,("qt","video/quicktime") ,("qtl","application/x-quicktimeplayer") - ,("ra","audio/x-pn-realaudio") - ,("ram","audio/x-pn-realaudio") - ,("rar","application/rar") - ,("ras","image/x-cmu-raster") ,("rd","chemical/x-mdl-rdfile") - ,("rdf","application/rdf+xml") - ,("rgb","image/x-rgb") ,("rhtml","application/x-httpd-eruby") ,("rm","audio/x-pn-realaudio") - ,("roff","application/x-troff") ,("ros","chemical/x-rosdal") - ,("rpm","application/x-redhat-package-manager") - ,("rss","application/rss+xml") - ,("rtf","application/rtf") - ,("rtx","text/richtext") ,("rxn","chemical/x-mdl-rxnfile") ,("sct","text/scriptlet") ,("sd","chemical/x-mdl-sdfile") ,("sd2","audio/x-sd2") - ,("sda","application/vnd.stardivision.draw") - ,("sdc","application/vnd.stardivision.calc") - ,("sdd","application/vnd.stardivision.impress") ,("sdf","application/vnd.stardivision.math") ,("sds","application/vnd.stardivision.chart") - ,("sdw","application/vnd.stardivision.writer") - ,("ser","application/java-serialized-object") ,("sgf","application/x-go-sgf") - ,("sgl","application/vnd.stardivision.writer-global") - ,("sh","application/x-sh") - ,("shar","application/x-shar") - ,("shtml","text/html") ,("sid","audio/prs.sid") ,("sik","application/x-trash") - ,("silo","model/mesh") - ,("sis","application/vnd.symbian.install") - ,("sisx","x-epoc/x-sisx-app") - ,("sit","application/x-stuffit") - ,("sitx","application/x-stuffit") - ,("skd","application/x-koan") - ,("skm","application/x-koan") - ,("skp","application/x-koan") - ,("skt","application/x-koan") - ,("smi","application/smil") - ,("smil","application/smil") - ,("snd","audio/basic") ,("spc","chemical/x-galactic-spc") - ,("spl","application/futuresplash") - ,("spx","audio/ogg") - ,("src","application/x-wais-source") - ,("stc","application/vnd.sun.xml.calc.template") - ,("std","application/vnd.sun.xml.draw.template") - ,("sti","application/vnd.sun.xml.impress.template") - ,("stl","application/vnd.ms-pki.stl") - ,("stw","application/vnd.sun.xml.writer.template") - ,("sty","text/x-tex") - ,("sv4cpio","application/x-sv4cpio") - ,("sv4crc","application/x-sv4crc") - ,("svg","image/svg+xml") - -- removed for now, since it causes problems with - -- extensionFromMimeType: see #2183. - -- ,("svgz","image/svg+xml") ,("sw","chemical/x-swissprot") - ,("swf","application/x-shockwave-flash") ,("swfl","application/x-shockwave-flash") - ,("sxc","application/vnd.sun.xml.calc") - ,("sxd","application/vnd.sun.xml.draw") - ,("sxg","application/vnd.sun.xml.writer.global") - ,("sxi","application/vnd.sun.xml.impress") - ,("sxm","application/vnd.sun.xml.math") - ,("sxw","application/vnd.sun.xml.writer") - ,("t","application/x-troff") - ,("tar","application/x-tar") ,("taz","application/x-gtar") - ,("tcl","application/x-tcl") - ,("tex","text/x-tex") - ,("texi","application/x-texinfo") - ,("texinfo","application/x-texinfo") - ,("text","text/plain") ,("tgf","chemical/x-mdl-tgf") - ,("tgz","application/x-gtar") - ,("tif","image/tiff") - ,("tiff","image/tiff") - ,("tk","text/x-tcl") ,("tm","text/texmacs") - ,("torrent","application/x-bittorrent") - ,("tr","application/x-troff") ,("ts","text/texmacs") ,("tsp","application/dsptype") - ,("tsv","text/tab-separated-values") - ,("ttf","font/ttf") - ,("txt","text/plain") - ,("udeb","application/x-debian-package") - ,("uls","text/iuls") - ,("ustar","application/x-ustar") ,("val","chemical/x-ncbi-asn1-binary") - ,("vcd","application/x-cdlink") - ,("vcf","text/x-vcard") - ,("vcs","text/x-vcalendar") ,("vmd","chemical/x-vmd") ,("vms","chemical/x-vamas-iso14976") ,("vrm","x-world/x-vrml") - ,("vrml","model/vrml") ,("vs","text/plain") - ,("vsd","application/vnd.visio") - ,("vtt","text/vtt") - ,("wad","application/x-doom") - ,("wav","audio/x-wav") - ,("wax","audio/x-ms-wax") - ,("wbmp","image/vnd.wap.wbmp") - ,("wbxml","application/vnd.wap.wbxml") - ,("webm","video/webm") - ,("webp","image/webp") ,("wk","application/x-123") - ,("wm","video/x-ms-wm") - ,("wma","audio/x-ms-wma") - ,("wmd","application/x-ms-wmd") ,("wmf","image/x-wmf") - ,("wml","text/vnd.wap.wml") - ,("wmlc","application/vnd.wap.wmlc") - ,("wmls","text/vnd.wap.wmlscript") - ,("wmlsc","application/vnd.wap.wmlscriptc") - ,("wmv","video/x-ms-wmv") - ,("wmx","video/x-ms-wmx") ,("wmz","application/x-ms-wmz") - ,("woff","font/woff") - ,("woff2","font/woff2") ,("wp5","application/wordperfect5.1") - ,("wpd","application/wordperfect") - ,("wrl","model/vrml") ,("wsc","text/scriptlet") - ,("wvx","video/x-ms-wvx") ,("wz","application/x-wingz") - ,("xbm","image/x-xbitmap") - ,("xcf","image/x-xcf") - ,("xht","application/xhtml+xml") - ,("xhtml","application/xhtml+xml") ,("xlb","application/vnd.ms-excel") - ,("xls","application/vnd.ms-excel") - ,("xlt","application/vnd.ms-excel") - ,("xml","application/xml") - ,("xpi","application/x-xpinstall") - ,("xpm","image/x-xpixmap") - ,("xsl","application/xml") ,("xtel","chemical/x-xtel") - ,("xul","application/vnd.mozilla.xul+xml") - ,("xwd","image/x-xwindowdump") - ,("xyz","chemical/x-xyz") - ,("zip","application/zip") ,("zmt","chemical/x-mopac-input") ] diff --git a/src/Text/Pandoc/Network/HTTP.hs b/src/Text/Pandoc/Network/HTTP.hs deleted file mode 100644 index 7c31fdb2485a..000000000000 --- a/src/Text/Pandoc/Network/HTTP.hs +++ /dev/null @@ -1,18 +0,0 @@ -{- | - Module : Text.Pandoc.Writers.Markdown.Inline - Copyright : Copyright (C) 2006-2022 John MacFarlane - License : GNU GPL, version 2 or above - - Maintainer : John MacFarlane - Stability : alpha - Portability : portable --} -module Text.Pandoc.Network.HTTP ( - urlEncode - ) where -import qualified Network.HTTP.Types as HTTP -import qualified Text.Pandoc.UTF8 as UTF8 -import qualified Data.Text as T - -urlEncode :: T.Text -> T.Text -urlEncode = UTF8.toText . HTTP.urlEncode True . UTF8.fromText diff --git a/src/Text/Pandoc/Options.hs b/src/Text/Pandoc/Options.hs index 92b471827267..d7040a97c4bd 100644 --- a/src/Text/Pandoc/Options.hs +++ b/src/Text/Pandoc/Options.hs @@ -312,6 +312,7 @@ data WriterOptions = WriterOptions , writerHighlightStyle :: Maybe Style -- ^ Style to use for highlighting -- (Nothing = no highlighting) , writerSetextHeaders :: Bool -- ^ Use setext headers for levels 1-2 in markdown + , writerListTables :: Bool -- ^ Use list tables for RST tables , writerEpubSubdirectory :: Text -- ^ Subdir for epub in OCF , writerEpubMetadata :: Maybe Text -- ^ Metadata to include in EPUB , writerEpubFonts :: [FilePath] -- ^ Paths to fonts to embed @@ -347,6 +348,7 @@ instance Default WriterOptions where , writerListings = False , writerHighlightStyle = Just pygments , writerSetextHeaders = False + , writerListTables = False , writerEpubSubdirectory = "EPUB" , writerEpubMetadata = Nothing , writerEpubFonts = [] diff --git a/src/Text/Pandoc/PDF.hs b/src/Text/Pandoc/PDF.hs index bdcdbedddcdd..419798f7e949 100644 --- a/src/Text/Pandoc/PDF.hs +++ b/src/Text/Pandoc/PDF.hs @@ -61,6 +61,7 @@ import Text.Pandoc.Class (fillMediaBag, getVerbosity, readFileLazy, readFileStrict, fileExists, report, extractMedia, PandocMonad) import Text.Pandoc.Logging +import Text.DocTemplates ( FromContext(lookupContext) ) #ifdef _WINDOWS changePathSeparators :: FilePath -> FilePath @@ -89,8 +90,16 @@ makePDF program pdfargs writer opts doc = liftIO $ html2pdf verbosity program pdfargs source "pdfroff" -> do source <- writer opts doc + let paperargs = + case lookupContext "papersize" (writerVariables opts) of + Just s + | T.takeEnd 1 s == "l" -> ["-P-p" <> + T.unpack (T.dropEnd 1 s), "-P-l"] + | otherwise -> ["-P-p" <> T.unpack s] + Nothing -> [] let args = ["-ms", "-mpdfmark", "-mspdf", - "-e", "-t", "-k", "-KUTF-8", "-i"] ++ pdfargs + "-e", "-t", "-k", "-KUTF-8", "-i"] ++ + paperargs ++ pdfargs generic2pdf program args source baseProg -> do withTempDir "tex2pdf." $ \tmpdir' -> do diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs index ca2e86713e0e..6284bb0f3a7c 100644 --- a/src/Text/Pandoc/Parsing.hs +++ b/src/Text/Pandoc/Parsing.hs @@ -47,6 +47,7 @@ module Text.Pandoc.Parsing ( module Text.Pandoc.Sources, mathDisplay, withHorizDisplacement, withRaw, + fromParsecError, escaped, characterReference, upperRoman, @@ -104,10 +105,9 @@ module Text.Pandoc.Parsing ( module Text.Pandoc.Sources, doubleCloseQuote, ellipses, dash, - nested, citeKey, - Parser, - ParserT, + Parsec, + ParsecT, Future(..), runF, askF, @@ -119,7 +119,7 @@ module Text.Pandoc.Parsing ( module Text.Pandoc.Sources, extractIdClass, insertIncludedFile, -- * Re-exports from Text.Parsec - Stream, + Stream(..), runParser, runParserT, parse, @@ -154,6 +154,8 @@ module Text.Pandoc.Parsing ( module Text.Pandoc.Sources, setState, updateState, SourcePos, + SourceName, + updatePosString, getPosition, setPosition, sourceName, @@ -168,13 +170,19 @@ module Text.Pandoc.Parsing ( module Text.Pandoc.Sources, initialPos, Line, Column, - ParseError + ParseError, + errorMessages, + messageString ) where import Text.Pandoc.Sources import Text.Parsec - ( setSourceName, + ( Parsec, + ParsecT, + SourcePos, + SourceName, + setSourceName, Column, Line, incSourceLine, @@ -221,9 +229,11 @@ import Text.Parsec runParserT, runParser, ParseError, - SourcePos, Stream(..) ) -import Text.Parsec.Pos (initialPos, newPos) +import Text.Parsec.Error ( + errorMessages, + messageString ) +import Text.Parsec.Pos (initialPos, newPos, updatePosString) import Text.Pandoc.Parsing.Capabilities ( guardDisabled, guardEnabled, @@ -268,7 +278,6 @@ import Text.Pandoc.Parsing.General manyTillChar, manyUntil, manyUntilChar, - nested, nonspaceChar, notFollowedBy', oneOfStrings, @@ -287,7 +296,8 @@ import Text.Pandoc.Parsing.General trimInlinesF, uri, withHorizDisplacement, - withRaw ) + withRaw, + fromParsecError ) import Text.Pandoc.Parsing.GridTable ( gridTableWith, gridTableWith', @@ -329,5 +339,5 @@ import Text.Pandoc.Parsing.State ParserContext(..), ParserState(..), SubstTable ) -import Text.Pandoc.Parsing.Types - ( ParserT, askF, asksF, returnF, runF, Future(..), Parser ) +import Text.Pandoc.Parsing.Future + ( askF, asksF, returnF, runF, Future(..) ) diff --git a/src/Text/Pandoc/Parsing/Capabilities.hs b/src/Text/Pandoc/Parsing/Capabilities.hs index 770b307a345b..4a2b9d2ca8d4 100644 --- a/src/Text/Pandoc/Parsing/Capabilities.hs +++ b/src/Text/Pandoc/Parsing/Capabilities.hs @@ -43,7 +43,8 @@ where import Control.Monad (guard, when) import Data.Text (Text) -import Text.Parsec (ParsecT, SourcePos, Stream, getPosition, getState, updateState) +import Text.Parsec (SourcePos, Stream, ParsecT, + getPosition, getState, updateState) import Text.Pandoc.Class.PandocMonad (PandocMonad, report) import Text.Pandoc.Logging (LogMessage) import Text.Pandoc.Options @@ -51,15 +52,14 @@ import Text.Pandoc.Options , ReaderOptions(readerExtensions) , extensionEnabled ) -import Text.Pandoc.Parsing.Types -import Text.Pandoc.Readers.LaTeX.Types (Macro) +import Text.Pandoc.TeX (Macro) import qualified Data.Map as M import qualified Data.Set as Set class HasReaderOptions st where extractReaderOptions :: st -> ReaderOptions - getOption :: (Stream s m t) => (ReaderOptions -> b) -> ParserT s st m b + getOption :: (Stream s m t) => (ReaderOptions -> b) -> ParsecT s st m b -- default getOption f = f . extractReaderOptions <$> getState @@ -69,7 +69,7 @@ class HasQuoteContext st m where failIfInQuoteContext :: (HasQuoteContext st m, Stream s m t) => QuoteContext - -> ParserT s st m () + -> ParsecT s st m () failIfInQuoteContext context = do context' <- getQuoteContext when (context' == context) $ Prelude.fail "already inside quotes" @@ -97,34 +97,34 @@ class HasIncludeFiles st where -- | Add a log message. logMessage :: (Stream s m a, HasLogMessages st) - => LogMessage -> ParserT s st m () + => LogMessage -> ParsecT s st m () logMessage msg = updateState (addLogMessage msg) -- | Report all the accumulated log messages, according to verbosity level. -reportLogMessages :: (PandocMonad m, HasLogMessages st) => ParserT s st m () +reportLogMessages :: (PandocMonad m, HasLogMessages st) => ParsecT s st m () reportLogMessages = do msgs <- getLogMessages <$> getState mapM_ report msgs -- | Succeed only if the extension is enabled. guardEnabled :: (Stream s m a, HasReaderOptions st) - => Extension -> ParserT s st m () + => Extension -> ParsecT s st m () guardEnabled ext = getOption readerExtensions >>= guard . extensionEnabled ext -- | Succeed only if the extension is disabled. guardDisabled :: (Stream s m a, HasReaderOptions st) - => Extension -> ParserT s st m () + => Extension -> ParsecT s st m () guardDisabled ext = getOption readerExtensions >>= guard . not . extensionEnabled ext -- | Update the position on which the last string ended. updateLastStrPos :: (Stream s m a, HasLastStrPosition st) - => ParserT s st m () + => ParsecT s st m () updateLastStrPos = getPosition >>= updateState . setLastStrPos . Just -- | Whether we are right after the end of a string. -notAfterString :: (Stream s m a, HasLastStrPosition st) => ParserT s st m Bool +notAfterString :: (Stream s m a, HasLastStrPosition st) => ParsecT s st m Bool notAfterString = do pos <- getPosition st <- getState diff --git a/src/Text/Pandoc/Parsing/Citations.hs b/src/Text/Pandoc/Parsing/Citations.hs index dfad454fdc41..f3c62c03cf40 100644 --- a/src/Text/Pandoc/Parsing/Citations.hs +++ b/src/Text/Pandoc/Parsing/Citations.hs @@ -21,6 +21,7 @@ import Text.Pandoc.Sources import Text.Parsec ( (<|>) , Stream(..) + , ParsecT , lookAhead , many , option @@ -28,25 +29,25 @@ import Text.Parsec ) import Text.Pandoc.Parsing.Capabilities (HasLastStrPosition, notAfterString) import Text.Pandoc.Parsing.General -import Text.Pandoc.Parsing.Types (ParserT) import qualified Data.Text as T citeKey :: (Stream s m Char, UpdateSourcePos s Char, HasLastStrPosition st) => Bool -- ^ If True, allow expanded @{..} syntax. - -> ParserT s st m (Bool, Text) + -> ParsecT s st m (Bool, Text) citeKey allowBraced = try $ do guard =<< notAfterString suppress_author <- option False (True <$ char '-') char '@' key <- simpleCiteIdentifier <|> if allowBraced - then charsInBalanced '{' '}' (satisfy (not . isSpace)) + then charsInBalanced '{' '}' + (T.singleton <$> (satisfy (not . isSpace))) else mzero return (suppress_author, key) simpleCiteIdentifier :: (Stream s m Char, UpdateSourcePos s Char) - => ParserT s st m Text + => ParsecT s st m Text simpleCiteIdentifier = do firstChar <- alphaNum <|> char '_' <|> char '*' -- @* for wildcard in nocite let regchar = satisfy (\c -> isAlphaNum c || c == '_') @@ -54,4 +55,3 @@ simpleCiteIdentifier = do rest <- many $ regchar <|> internal (oneOf ":.#$%&-+?<>~/") <|> try (oneOf ":/" <* lookAhead (char '/')) return $ T.pack $ firstChar:rest - diff --git a/src/Text/Pandoc/Parsing/Types.hs b/src/Text/Pandoc/Parsing/Future.hs similarity index 76% rename from src/Text/Pandoc/Parsing/Types.hs rename to src/Text/Pandoc/Parsing/Future.hs index f3745270b459..041ee0a785b1 100644 --- a/src/Text/Pandoc/Parsing/Types.hs +++ b/src/Text/Pandoc/Parsing/Future.hs @@ -1,17 +1,15 @@ {-# LANGUAGE GeneralizedNewtypeDeriving #-} {- | - Module : Text.Pandoc.Parsing + Module : Text.Pandoc.Parsing.Future Copyright : Copyright (C) 2006-2022 John MacFarlane License : GPL-2.0-or-later Maintainer : John MacFarlane -Types and type-related functions for parsers. +Future type for parsing. -} -module Text.Pandoc.Parsing.Types - ( Parser - , ParserT - , Future (..) +module Text.Pandoc.Parsing.Future + ( Future (..) , runF , askF , asksF @@ -23,13 +21,6 @@ import Prelude hiding (Applicative(..)) import Control.Applicative (Applicative(..)) import Control.Monad.Reader ( asks, runReader, MonadReader(ask), Reader, ReaderT(ReaderT) ) -import Text.Parsec ( Parsec , ParsecT ) - --- | Generic parser type used by many pandoc readers. -type Parser t s = Parsec t s - --- | Generic parser transformer used by many pandoc readers. -type ParserT = ParsecT -- | Reader monad wrapping the parser state. This is used to possibly -- delay evaluation until all relevant information has been parsed and diff --git a/src/Text/Pandoc/Parsing/General.hs b/src/Text/Pandoc/Parsing/General.hs index c03bda117266..9decaef7dcf3 100644 --- a/src/Text/Pandoc/Parsing/General.hs +++ b/src/Text/Pandoc/Parsing/General.hs @@ -37,7 +37,6 @@ module Text.Pandoc.Parsing.General , manyTillChar , manyUntil , manyUntilChar - , nested , nonspaceChar , notFollowedBy' , oneOfStrings @@ -58,19 +57,20 @@ module Text.Pandoc.Parsing.General , uri , withHorizDisplacement , withRaw + , fromParsecError ) where import Control.Monad - ( guard - , join + ( join , liftM , unless , void , when + , MonadPlus(mzero) ) import Control.Monad.Except ( MonadError(throwError) ) -import Control.Monad.Identity ( Identity(..), MonadPlus(mzero) ) +import Control.Monad.Identity ( Identity(..) ) import Data.Char ( chr , isAlphaNum @@ -82,10 +82,10 @@ import Data.Char , toUpper ) import Data.Functor (($>)) -import Data.List (intercalate) +import Data.List (intercalate, sortOn) +import Data.Ord (Down(..)) import Data.Maybe (fromMaybe) import Data.Text (Text) -import Text.HTML.TagSoup.Entity (lookupEntity) import Text.Pandoc.Asciify (toAsciiText) import Text.Pandoc.Builder (Attr, Inline(Str), Inlines, trimInlines) import Text.Pandoc.Class.PandocMonad (PandocMonad, readFileFromDirs, report) @@ -95,13 +95,20 @@ import Text.Pandoc.Options ( extensionEnabled , Extension(Ext_auto_identifiers, Ext_ascii_identifiers) , ReaderOptions(readerTabStop, readerExtensions) ) -import Text.Pandoc.Shared (escapeURI, mapLeft, schemes, tshow, uniqueIdent) +import Text.Pandoc.Shared (tshow, uniqueIdent) +import Text.Pandoc.URI (schemes, escapeURI) import Text.Pandoc.Sources -import Text.Pandoc.XML (fromEntities) +import Text.Pandoc.XML (fromEntities, lookupEntity) import Text.Parsec ( (<|>) + , Parsec , ParsecT , SourcePos + , sourceLine + , sourceColumn + , sourceName + , ParseError + , errorPos , Stream(..) , between , choice @@ -128,15 +135,15 @@ import Text.Parsec ) import Text.Parsec.Pos (initialPos, newPos) import Text.Pandoc.Error - ( PandocError(PandocParseError, PandocParsecError) ) + ( PandocError(PandocParseError) ) import Text.Pandoc.Parsing.Capabilities import Text.Pandoc.Parsing.State -import Text.Pandoc.Parsing.Types ( Parser, ParserT, Future (..)) - +import Text.Pandoc.Parsing.Future (Future (..)) import qualified Data.Set as Set import qualified Data.Text as T import qualified Text.Pandoc.Builder as B import qualified Text.Pandoc.UTF8 as UTF8 (putStrLn) +import qualified Data.Bifunctor as Bifunctor -- | Remove whitespace from start and end; just like @'trimInlines'@, -- but lifted into the 'Future' type. @@ -158,7 +165,7 @@ textStr t = string (T.unpack t) $> t -- | Parse any line of text, returning the contents without the -- final newline. -anyLine :: Monad m => ParserT Sources st m Text +anyLine :: Monad m => ParsecT Sources st m Text anyLine = do -- This is much faster than: -- manyTill anyChar newline @@ -181,13 +188,13 @@ anyLine = do return this -- | Parse any line, include the final newline in the output -anyLineNewline :: Monad m => ParserT Sources st m Text +anyLineNewline :: Monad m => ParsecT Sources st m Text anyLineNewline = (<> "\n") <$> anyLine -- | Parse indent by specified number of spaces (or equiv. tabs) indentWith :: (Stream s m Char, UpdateSourcePos s Char) => HasReaderOptions st - => Int -> ParserT s st m Text + => Int -> ParsecT s st m Text indentWith num = do tabStop <- getOption readerTabStop if num < tabStop @@ -197,28 +204,28 @@ indentWith num = do -- | Like @many@, but packs its result. manyChar :: Stream s m t - => ParserT s st m Char - -> ParserT s st m Text + => ParsecT s st m Char + -> ParsecT s st m Text manyChar = fmap T.pack . many -- | Like @many1@, but packs its result. many1Char :: Stream s m t - => ParserT s st m Char - -> ParserT s st m Text + => ParsecT s st m Char + -> ParsecT s st m Text many1Char = fmap T.pack . many1 -- | Like @manyTill@, but packs its result. manyTillChar :: Stream s m t - => ParserT s st m Char - -> ParserT s st m a - -> ParserT s st m Text + => ParsecT s st m Char + -> ParsecT s st m a + -> ParsecT s st m Text manyTillChar p = fmap T.pack . manyTill p -- | Like @manyTill@, but reads at least one item. many1Till :: (Show end, Stream s m t) - => ParserT s st m a - -> ParserT s st m end - -> ParserT s st m [a] + => ParsecT s st m a + -> ParsecT s st m end + -> ParsecT s st m [a] many1Till p end = do notFollowedBy' end first <- p @@ -227,15 +234,15 @@ many1Till p end = do -- | Like @many1Till@, but packs its result many1TillChar :: (Show end, Stream s m t) - => ParserT s st m Char - -> ParserT s st m end - -> ParserT s st m Text + => ParsecT s st m Char + -> ParsecT s st m end + -> ParsecT s st m Text many1TillChar p = fmap T.pack . many1Till p -- | Like @manyTill@, but also returns the result of end parser. -manyUntil :: ParserT s u m a - -> ParserT s u m b - -> ParserT s u m ([a], b) +manyUntil :: ParsecT s u m a + -> ParsecT s u m b + -> ParsecT s u m ([a], b) manyUntil p end = scan where scan = (do e <- end @@ -246,9 +253,9 @@ manyUntil p end = scan return (x:xs, e)) -- | Like @manyUntil@, but also packs its result. -manyUntilChar :: ParserT s u m Char - -> ParserT s u m b - -> ParserT s u m (Text, b) +manyUntilChar :: ParsecT s u m Char + -> ParsecT s u m b + -> ParsecT s u m (Text, b) manyUntilChar p = fmap go . manyUntil p where go (x, y) = (T.pack x, y) @@ -263,7 +270,7 @@ sepBy1' p sep = (:) <$> p <*> many (try $ sep >> p) -- | A more general form of @notFollowedBy@. This one allows any -- type of parser to be specified, and succeeds only if that parser fails. -- It does not consume any input. -notFollowedBy' :: (Show b, Stream s m a) => ParserT s st m b -> ParserT s st m () +notFollowedBy' :: (Show b, Stream s m a) => ParsecT s st m b -> ParsecT s st m () notFollowedBy' p = try $ join $ do a <- try p return (unexpected (show a)) <|> @@ -271,12 +278,12 @@ notFollowedBy' p = try $ join $ do a <- try p -- (This version due to Andrew Pimlott on the Haskell mailing list.) oneOfStrings' :: (Stream s m Char, UpdateSourcePos s Char) - => (Char -> Char -> Bool) -> [Text] -> ParserT s st m Text + => (Char -> Char -> Bool) -> [Text] -> ParsecT s st m Text oneOfStrings' f = fmap T.pack . oneOfStrings'' f . fmap T.unpack -- TODO: This should be re-implemented in a Text-aware way oneOfStrings'' :: (Stream s m Char, UpdateSourcePos s Char) - => (Char -> Char -> Bool) -> [String] -> ParserT s st m String + => (Char -> Char -> Bool) -> [String] -> ParsecT s st m String oneOfStrings'' _ [] = Prelude.fail "no strings" oneOfStrings'' matches strs = try $ do c <- anyChar @@ -292,7 +299,7 @@ oneOfStrings'' matches strs = try $ do -- two strings one of which is a prefix of the other, the longer -- string will be matched if possible. oneOfStrings :: (Stream s m Char, UpdateSourcePos s Char) - => [Text] -> ParserT s st m Text + => [Text] -> ParsecT s st m Text oneOfStrings = oneOfStrings' (==) -- | Parses one of a list of strings (tried in order), case insensitive. @@ -300,7 +307,7 @@ oneOfStrings = oneOfStrings' (==) -- TODO: This will not be accurate with general Unicode (neither -- Text.toLower nor Text.toCaseFold can be implemented with a map) oneOfStringsCI :: (Stream s m Char, UpdateSourcePos s Char) - => [Text] -> ParserT s st m Text + => [Text] -> ParsecT s st m Text oneOfStringsCI = oneOfStrings' ciMatch where ciMatch x y = toLower' x == toLower' y -- this optimizes toLower by checking common ASCII case @@ -312,12 +319,12 @@ oneOfStringsCI = oneOfStrings' ciMatch -- | Parses a space or tab. spaceChar :: (Stream s m Char, UpdateSourcePos s Char) - => ParserT s st m Char + => ParsecT s st m Char spaceChar = satisfy $ \c -> c == ' ' || c == '\t' -- | Parses a nonspace, nonnewline character. nonspaceChar :: (Stream s m Char, UpdateSourcePos s Char) - => ParserT s st m Char + => ParsecT s st m Char nonspaceChar = satisfy (not . isSpaceChar) isSpaceChar :: Char -> Bool @@ -329,23 +336,23 @@ isSpaceChar _ = False -- | Skips zero or more spaces or tabs. skipSpaces :: (Stream s m Char, UpdateSourcePos s Char) - => ParserT s st m () + => ParsecT s st m () skipSpaces = skipMany spaceChar -- | Skips zero or more spaces or tabs, then reads a newline. blankline :: (Stream s m Char, UpdateSourcePos s Char) - => ParserT s st m Char + => ParsecT s st m Char blankline = try $ skipSpaces >> newline -- | Parses one or more blank lines and returns a string of newlines. blanklines :: (Stream s m Char, UpdateSourcePos s Char) - => ParserT s st m Text + => ParsecT s st m Text blanklines = T.pack <$> many1 blankline -- | Gobble n spaces; if tabs are encountered, expand them -- and gobble some or all of their spaces, leaving the rest. gobbleSpaces :: (HasReaderOptions st, Monad m) - => Int -> ParserT Sources st m () + => Int -> ParsecT Sources st m () gobbleSpaces 0 = return () gobbleSpaces n | n < 0 = error "gobbleSpaces called with negative number" @@ -353,7 +360,7 @@ gobbleSpaces n char ' ' <|> eatOneSpaceOfTab gobbleSpaces (n - 1) -eatOneSpaceOfTab :: (HasReaderOptions st, Monad m) => ParserT Sources st m Char +eatOneSpaceOfTab :: (HasReaderOptions st, Monad m) => ParsecT Sources st m Char eatOneSpaceOfTab = do lookAhead (char '\t') pos <- getPosition @@ -372,7 +379,7 @@ eatOneSpaceOfTab = do -- | Gobble up to n spaces; if tabs are encountered, expand them -- and gobble some or all of their spaces, leaving the rest. gobbleAtMostSpaces :: (HasReaderOptions st, Monad m) - => Int -> ParserT Sources st m Int + => Int -> ParsecT Sources st m Int gobbleAtMostSpaces 0 = return 0 gobbleAtMostSpaces n | n < 0 = error "gobbleAtMostSpaces called with negative number" @@ -382,20 +389,20 @@ gobbleAtMostSpaces n -- | Parses material enclosed between start and end parsers. enclosed :: (Show end, Stream s m Char, UpdateSourcePos s Char) - => ParserT s st m t -- ^ start parser - -> ParserT s st m end -- ^ end parser - -> ParserT s st m a -- ^ content parser (to be used repeatedly) - -> ParserT s st m [a] + => ParsecT s st m t -- ^ start parser + -> ParsecT s st m end -- ^ end parser + -> ParsecT s st m a -- ^ content parser (to be used repeatedly) + -> ParsecT s st m [a] enclosed start end parser = try $ start >> notFollowedBy space >> many1Till parser end -- | Parse string, case insensitive. stringAnyCase :: (Stream s m Char, UpdateSourcePos s Char) - => Text -> ParserT s st m Text + => Text -> ParsecT s st m Text stringAnyCase = fmap T.pack . stringAnyCase' . T.unpack stringAnyCase' :: (Stream s m Char, UpdateSourcePos s Char) - => String -> ParserT s st m String + => String -> ParsecT s st m String stringAnyCase' [] = string "" stringAnyCase' (x:xs) = do firstChar <- char (toUpper x) <|> char (toLower x) @@ -405,9 +412,9 @@ stringAnyCase' (x:xs) = do -- TODO rewrite by just adding to Sources stream? -- | Parse contents of 'str' using 'parser' and return result. parseFromString :: Monad m - => ParserT Sources st m r + => ParsecT Sources st m r -> Text - -> ParserT Sources st m r + -> ParsecT Sources st m r parseFromString parser str = do oldPos <- getPosition oldInput <- getInput @@ -422,9 +429,9 @@ parseFromString parser str = do -- | Like 'parseFromString' but specialized for 'ParserState'. -- This resets 'stateLastStrPos', which is almost always what we want. parseFromString' :: (Monad m, HasLastStrPosition u) - => ParserT Sources u m a + => ParsecT Sources u m a -> Text - -> ParserT Sources u m a + -> ParsecT Sources u m a parseFromString' parser str = do oldLastStrPos <- getLastStrPos <$> getState updateState $ setLastStrPos Nothing @@ -433,7 +440,7 @@ parseFromString' parser str = do return res -- | Parse raw line block up to and including blank lines. -lineClump :: Monad m => ParserT Sources st m Text +lineClump :: Monad m => ParsecT Sources st m Text lineClump = blanklines <|> (T.unlines <$> many1 (notFollowedBy blankline >> anyLine)) @@ -442,27 +449,22 @@ lineClump = blanklines -- pairs of open and close, which must be different. For example, -- @charsInBalanced '(' ')' anyChar@ will parse "(hello (there))" -- and return "hello (there)". -charsInBalanced :: (Stream s m Char, UpdateSourcePos s Char) => Char -> Char -> ParserT s st m Char - -> ParserT s st m Text +charsInBalanced :: (Stream s m Char, UpdateSourcePos s Char) + => Char -> Char -> ParsecT s st m Text -> ParsecT s st m Text charsInBalanced open close parser = try $ do char open let isDelim c = c == open || c == close - raw <- many $ T.pack <$> many1 (notFollowedBy (satisfy isDelim) >> parser) + raw <- many $ mconcat <$> many1 (notFollowedBy (satisfy isDelim) >> parser) <|> (do res <- charsInBalanced open close parser return $ T.singleton open <> res <> T.singleton close) char close - return $ T.concat raw - --- old charsInBalanced would be: --- charsInBalanced open close (noneOf "\n" <|> char '\n' >> notFollowedBy blankline) --- old charsInBalanced' would be: --- charsInBalanced open close anyChar + return $ mconcat raw -- Parsers for email addresses and URIs -- | Parses an email address; returns original and corresponding -- escaped mailto: URI. -emailAddress :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m (Text, Text) +emailAddress :: (Stream s m Char, UpdateSourcePos s Char) => ParsecT s st m (Text, Text) emailAddress = try $ toResult <$> mailbox <*> (char '@' *> domain) where toResult mbox dom = let full = fromEntities $ T.pack $ mbox ++ '@':dom in (full, escapeURI $ "mailto:" <> full) @@ -485,11 +487,11 @@ emailAddress = try $ toResult <$> mailbox <*> (char '@' *> domain) isEmailPunct c = T.any (== c) "!\"#$%&'*+-/=?^_{|}~;" -uriScheme :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m Text +uriScheme :: (Stream s m Char, UpdateSourcePos s Char) => ParsecT s st m Text uriScheme = oneOfStringsCI (Set.toList schemes) -- | Parses a URI. Returns pair of original and URI-escaped version. -uri :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m (Text, Text) +uri :: (Stream s m Char, UpdateSourcePos s Char) => ParsecT s st m (Text, Text) uri = try $ do scheme <- uriScheme char ':' @@ -523,7 +525,7 @@ uri = try $ do wordChar = satisfy isWordChar percentEscaped = try $ (:) <$> char '%' <*> many1 hexDigit - entity = try $ pure <$> characterReference + entity = try $ T.unpack <$> characterReference punct = try $ many1 (char ',') <|> fmap pure (satisfy (\c -> not (isSpace c) && c /= '<' && c /= '>')) uriChunk = many1 wordChar <|> percentEscaped @@ -537,8 +539,8 @@ uri = try $ do -- and the source column at the beginning). Vertical displacement -- (source row) is ignored. withHorizDisplacement :: (Stream s m Char, UpdateSourcePos s Char) - => ParserT s st m a -- ^ Parser to apply - -> ParserT s st m (a, Int) -- ^ (result, displacement) + => ParsecT s st m a -- ^ Parsec to apply + -> ParsecT s st m (a, Int) -- ^ (result, displacement) withHorizDisplacement parser = do pos1 <- getPosition result <- parser @@ -573,28 +575,24 @@ sourcesDifference (Sources is1) (Sources is2) = go is1 is2 -- | Parses backslash, then applies character parser. escaped :: (Stream s m Char, UpdateSourcePos s Char) - => ParserT s st m Char -- ^ Parser for character to escape - -> ParserT s st m Char + => ParsecT s st m Char -- ^ Parsec for character to escape + -> ParsecT s st m Char escaped parser = try $ char '\\' >> parser -- | Parse character entity. -characterReference :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m Char +characterReference :: (Stream s m Char, UpdateSourcePos s Char) => ParsecT s st m Text characterReference = try $ do char '&' - ent <- many1Till nonspaceChar (char ';') - let ent' = case ent of - '#':'X':xs -> '#':'x':xs -- workaround tagsoup bug - '#':_ -> ent - _ -> ent ++ ";" - case lookupEntity ent' of - Just (c : _) -> return c + ent <- many1TillChar nonspaceChar (char ';') + case lookupEntity (ent <> ";") of + Just t -> return t _ -> Prelude.fail "entity not found" -- | Parses a character reference and returns a Str element. -charRef :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m Inline -charRef = Str . T.singleton <$> characterReference +charRef :: (Stream s m Char, UpdateSourcePos s Char) => ParsecT s st m Inline +charRef = Str <$> characterReference -lineBlockLine :: Monad m => ParserT Sources st m Text +lineBlockLine :: Monad m => ParsecT Sources st m Text lineBlockLine = try $ do char '|' char ' ' @@ -604,11 +602,11 @@ lineBlockLine = try $ do continuations <- many (try $ char ' ' >> anyLine) return $ white <> T.unwords (line : continuations) -blankLineBlockLine :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m Char +blankLineBlockLine :: (Stream s m Char, UpdateSourcePos s Char) => ParsecT s st m Char blankLineBlockLine = try (char '|' >> blankline) -- | Parses an RST-style line block and returns a list of strings. -lineBlockLines :: Monad m => ParserT Sources st m [Text] +lineBlockLines :: Monad m => ParsecT Sources st m [Text] lineBlockLines = try $ do lines' <- many1 (lineBlockLine <|> (T.singleton <$> blankLineBlockLine)) skipMany blankline @@ -617,19 +615,20 @@ lineBlockLines = try $ do -- | Removes the ParsecT layer from the monad transformer stack readWithM :: (Monad m, ToSources t) - => ParserT Sources st m a -- ^ parser + => ParsecT Sources st m a -- ^ parser -> st -- ^ initial state -> t -- ^ input -> m (Either PandocError a) readWithM parser state input = - mapLeft (PandocParsecError sources) + Bifunctor.first (fromParsecError sources) <$> runParserT parser state (initialSourceName sources) sources where sources = toSources input + -- | Parse a string with a given parser and state readWith :: ToSources t - => Parser Sources st a + => Parsec Sources st a -> st -> t -> Either PandocError a @@ -637,7 +636,7 @@ readWith p t inp = runIdentity $ readWithM p t inp -- | Parse a string with @parser@ (for testing). testStringWith :: Show a - => ParserT Sources ParserState Identity a + => ParsecT Sources ParserState Identity a -> Text -> IO () testStringWith parser str = UTF8.putStrLn $ tshow $ @@ -652,7 +651,7 @@ testStringWith parser str = UTF8.putStrLn $ tshow $ -- (explicit or automatically generated). registerHeader :: (Stream s m a, HasReaderOptions st, HasLogMessages st, HasIdentifierList st) - => Attr -> Inlines -> ParserT s st m Attr + => Attr -> Inlines -> ParsecT s st m Attr registerHeader (ident,classes,kvs) header' = do ids <- extractIdentifierList <$> getState exts <- getOption readerExtensions @@ -673,19 +672,6 @@ registerHeader (ident,classes,kvs) header' = do updateState $ updateIdentifierList $ Set.insert ident return (ident,classes,kvs) --- This is used to prevent exponential blowups for things like: --- a**a*a**a*a**a*a**a*a**a*a**a*a** -nested :: Stream s m a - => ParserT s ParserState m a - -> ParserT s ParserState m a -nested p = do - nestlevel <- stateMaxNestingLevel <$> getState - guard $ nestlevel > 0 - updateState $ \st -> st{ stateMaxNestingLevel = stateMaxNestingLevel st - 1 } - res <- p - updateState $ \st -> st{ stateMaxNestingLevel = nestlevel } - return res - token :: (Stream s m t) => (t -> Text) -> (t -> SourcePos) @@ -694,7 +680,7 @@ token :: (Stream s m t) token pp pos match = tokenPrim (T.unpack . pp) (\_ t _ -> pos t) match infixr 5 <+?> -(<+?>) :: (Monoid a) => ParserT s st m a -> ParserT s st m a -> ParserT s st m a +(<+?>) :: (Monoid a) => ParsecT s st m a -> ParsecT s st m a -> ParsecT s st m a a <+?> b = a >>= flip fmap (try b <|> return mempty) . mappend extractIdClass :: Attr -> Attr @@ -705,13 +691,13 @@ extractIdClass (ident, cls, kvs) = (ident', cls', kvs') kvs' = filter (\(k,_) -> k /= "id" || k /= "class") kvs insertIncludedFile :: (PandocMonad m, HasIncludeFiles st) - => ParserT a st m b -- ^ parser to apply + => ParsecT a st m b -- ^ parser to apply -> (Text -> a) -- ^ convert Text to stream type -> [FilePath] -- ^ search path (directories) -> FilePath -- ^ path of file to include -> Maybe Int -- ^ start line (negative counts from end) -> Maybe Int -- ^ end line (negative counts from end) - -> ParserT a st m b + -> ParsecT a st m b insertIncludedFile parser toStream dirs f mbstartline mbendline = do oldPos <- getPosition oldInput <- getInput @@ -749,3 +735,28 @@ exciseLines mbstartline mbendline t = Nothing -> numLines Just x | x >= 0 -> x | otherwise -> numLines + x -- negative from end + +fromParsecError :: Sources -> ParseError -> PandocError +fromParsecError (Sources inputs) err' = PandocParseError msg + where + msg = "Error at " <> tshow err' <> errorContext + errPos = errorPos err' + errLine = sourceLine errPos + errColumn = sourceColumn errPos + errFile = sourceName errPos + errorContext = + case sortOn (Down . sourceLine . fst) + [ (pos,t) + | (pos,t) <- inputs + , sourceName pos == errFile + , sourceLine pos <= errLine + ] of + [] -> "" + ((pos,txt):_) -> + let ls = T.lines txt <> [""] + ln = (errLine - sourceLine pos) + 1 + in if length ls > ln && ln >= 1 + then T.concat ["\n", ls !! (ln - 1) + ,"\n", T.replicate (errColumn - 1) " " + ,"^"] + else "" diff --git a/src/Text/Pandoc/Parsing/GridTable.hs b/src/Text/Pandoc/Parsing/GridTable.hs index 58ab1494bdb0..cec8653f7ade 100644 --- a/src/Text/Pandoc/Parsing/GridTable.hs +++ b/src/Text/Pandoc/Parsing/GridTable.hs @@ -31,9 +31,8 @@ import Text.Pandoc.Builder (Blocks) import Text.Pandoc.Definition import Text.Pandoc.Parsing.Capabilities import Text.Pandoc.Parsing.General -import Text.Pandoc.Parsing.Types import Text.Pandoc.Sources -import Text.Parsec (Stream (..), optional, sepEndBy1, try) +import Text.Parsec (Stream (..), ParsecT, optional, sepEndBy1, try) import qualified Data.Text as T import qualified Text.GridTable as GT @@ -103,8 +102,8 @@ data TableNormalization -- blank lines, and ending with a footer (dashed line followed by blank -- line). gridTableWith :: (Monad m, Monad mf, HasLastStrPosition st, HasReaderOptions st) - => ParserT Sources st m (mf Blocks) -- ^ Block list parser - -> ParserT Sources st m (mf Blocks) + => ParsecT Sources st m (mf Blocks) -- ^ Block list parser + -> ParsecT Sources st m (mf Blocks) gridTableWith blocks = fmap tableFromComponents <$> gridTableWith' NoNormalization blocks @@ -113,8 +112,8 @@ gridTableWith blocks = fmap tableFromComponents <$> gridTableWith' :: (Monad m, Monad mf, HasReaderOptions st, HasLastStrPosition st) => TableNormalization - -> ParserT Sources st m (mf Blocks) -- ^ Block list parser - -> ParserT Sources st m (mf TableComponents) + -> ParsecT Sources st m (mf Blocks) -- ^ Block list parser + -> ParsecT Sources st m (mf TableComponents) gridTableWith' normalization blocks = do tbl <- GT.gridTable <* optional blanklines let blkTbl = GT.mapCells @@ -135,9 +134,13 @@ gridTableWith' normalization blocks = do let caption = B.emptyCaption return $ do rows'' <- mapM sequence rows' - let (hRows, bRows) = - splitAt (maybe 0 GT.fromRowIndex $ GT.arrayTableHead tbl) - (map (B.Row B.nullAttr) rows'') + let headLen = maybe 0 GT.fromRowIndex $ GT.arrayTableHead tbl + let (hRows, bRows') = + splitAt headLen (map (B.Row B.nullAttr) rows'') + let (bRows, fRows) = + case GT.arrayTableFoot tbl of + Just fIdx -> splitAt (GT.fromRowIndex fIdx - headLen - 1) bRows' + Nothing -> (bRows', []) let thead = B.TableHead B.nullAttr $ case (hRows, normalization) of -- normalize header if necessary: remove header if it contains -- only a single row in which all cells are empty. @@ -151,7 +154,7 @@ gridTableWith' normalization blocks = do in [B.Row nullAttr cells | not (null cells) && not (all simple cells)] _ -> hRows - let tfoot = B.TableFoot B.nullAttr [] + let tfoot = B.TableFoot B.nullAttr fRows let tbody = B.TableBody B.nullAttr 0 [] bRows return $ TableComponents nullAttr caption colspecs thead [tbody] tfoot @@ -177,7 +180,7 @@ convAlign GT.AlignDefault = B.AlignDefault fractionalColumnWidths :: GT.ArrayTable a -> Int -> [Double] fractionalColumnWidths gt charColumns = - let widths = map ((+1) . snd) $ -- include width of separator + let widths = map ((+1) . snd) -- include width of separator (elems $ GT.arrayTableColSpecs gt) norm = fromIntegral $ max (sum widths + length widths - 2) charColumns in map (\w -> fromIntegral w / norm) widths @@ -188,22 +191,22 @@ fractionalColumnWidths gt charColumns = -- 'lineParser', and 'footerParser'. tableWith :: (Stream s m Char, UpdateSourcePos s Char, HasReaderOptions st, Monad mf) - => ParserT s st m (mf [Blocks], [Alignment], [Int]) -- ^ header parser - -> ([Int] -> ParserT s st m (mf [Blocks])) -- ^ row parser - -> ParserT s st m sep -- ^ line parser - -> ParserT s st m end -- ^ footer parser - -> ParserT s st m (mf Blocks) + => ParsecT s st m (mf [Blocks], [Alignment], [Int]) -- ^ header parser + -> ([Int] -> ParsecT s st m (mf [Blocks])) -- ^ row parser + -> ParsecT s st m sep -- ^ line parser + -> ParsecT s st m end -- ^ footer parser + -> ParsecT s st m (mf Blocks) tableWith hp rp lp fp = fmap tableFromComponents <$> tableWith' NoNormalization hp rp lp fp tableWith' :: (Stream s m Char, UpdateSourcePos s Char, HasReaderOptions st, Monad mf) => TableNormalization - -> ParserT s st m (mf [Blocks], [Alignment], [Int]) -- ^ header parser - -> ([Int] -> ParserT s st m (mf [Blocks])) -- ^ row parser - -> ParserT s st m sep -- ^ line parser - -> ParserT s st m end -- ^ footer parser - -> ParserT s st m (mf TableComponents) + -> ParsecT s st m (mf [Blocks], [Alignment], [Int]) -- ^ header parser + -> ([Int] -> ParsecT s st m (mf [Blocks])) -- ^ row parser + -> ParsecT s st m sep -- ^ line parser + -> ParsecT s st m end -- ^ footer parser + -> ParsecT s st m (mf TableComponents) tableWith' n11n headerParser rowParser lineParser footerParser = try $ do (heads, aligns, indices) <- headerParser lines' <- sequence <$> rowParser indices `sepEndBy1` lineParser diff --git a/src/Text/Pandoc/Parsing/Lists.hs b/src/Text/Pandoc/Parsing/Lists.hs index 6f0c47ce23b1..bd12c6ac2da0 100644 --- a/src/Text/Pandoc/Parsing/Lists.hs +++ b/src/Text/Pandoc/Parsing/Lists.hs @@ -37,6 +37,7 @@ import Text.Pandoc.Shared (safeRead) import Text.Pandoc.Sources import Text.Parsec ( (<|>) + , ParsecT , Stream(..) , choice , getState @@ -48,7 +49,6 @@ import Text.Parsec , updateState ) import Text.Pandoc.Parsing.State -import Text.Pandoc.Parsing.Types (ParserT) import qualified Data.Map as M import qualified Data.Text as T @@ -56,7 +56,7 @@ import qualified Data.Text as T -- | Parses a roman numeral (uppercase or lowercase), returns number. romanNumeral :: (Stream s m Char, UpdateSourcePos s Char) => Bool -- ^ Uppercase if true - -> ParserT s st m Int + -> ParsecT s st m Int romanNumeral upperCase = do let rchar uc = char $ if upperCase then uc else toLower uc let one = rchar 'I' @@ -88,19 +88,19 @@ romanNumeral upperCase = do else return total -- | Parses an uppercase roman numeral and returns (UpperRoman, number). -upperRoman :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m (ListNumberStyle, Int) +upperRoman :: (Stream s m Char, UpdateSourcePos s Char) => ParsecT s st m (ListNumberStyle, Int) upperRoman = do num <- romanNumeral True return (UpperRoman, num) -- | Parses a lowercase roman numeral and returns (LowerRoman, number). -lowerRoman :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m (ListNumberStyle, Int) +lowerRoman :: (Stream s m Char, UpdateSourcePos s Char) => ParsecT s st m (ListNumberStyle, Int) lowerRoman = do num <- romanNumeral False return (LowerRoman, num) -- | Parses a decimal numeral and returns (Decimal, number). -decimal :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m (ListNumberStyle, Int) +decimal :: (Stream s m Char, UpdateSourcePos s Char) => ParsecT s st m (ListNumberStyle, Int) decimal = do num <- many1 digit return (Decimal, fromMaybe 1 $ safeRead $ T.pack num) @@ -110,7 +110,7 @@ decimal = do -- example number is incremented in parser state, and the label -- (if present) is added to the label table. exampleNum :: (Stream s m Char, UpdateSourcePos s Char) - => ParserT s ParserState m (ListNumberStyle, Int) + => ParsecT s ParserState m (ListNumberStyle, Int) exampleNum = do char '@' lab <- mconcat . map T.pack <$> @@ -128,30 +128,30 @@ exampleNum = do return (Example, num) -- | Parses a '#' returns (DefaultStyle, 1). -defaultNum :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m (ListNumberStyle, Int) +defaultNum :: (Stream s m Char, UpdateSourcePos s Char) => ParsecT s st m (ListNumberStyle, Int) defaultNum = do char '#' return (DefaultStyle, 1) -- | Parses a lowercase letter and returns (LowerAlpha, number). -lowerAlpha :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m (ListNumberStyle, Int) +lowerAlpha :: (Stream s m Char, UpdateSourcePos s Char) => ParsecT s st m (ListNumberStyle, Int) lowerAlpha = do ch <- satisfy isAsciiLower return (LowerAlpha, ord ch - ord 'a' + 1) -- | Parses an uppercase letter and returns (UpperAlpha, number). -upperAlpha :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m (ListNumberStyle, Int) +upperAlpha :: (Stream s m Char, UpdateSourcePos s Char) => ParsecT s st m (ListNumberStyle, Int) upperAlpha = do ch <- satisfy isAsciiUpper return (UpperAlpha, ord ch - ord 'A' + 1) -- | Parses a roman numeral i or I -romanOne :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m (ListNumberStyle, Int) +romanOne :: (Stream s m Char, UpdateSourcePos s Char) => ParsecT s st m (ListNumberStyle, Int) romanOne = (char 'i' >> return (LowerRoman, 1)) <|> (char 'I' >> return (UpperRoman, 1)) -- | Parses an ordered list marker and returns list attributes. -anyOrderedListMarker :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s ParserState m ListAttributes +anyOrderedListMarker :: (Stream s m Char, UpdateSourcePos s Char) => ParsecT s ParserState m ListAttributes anyOrderedListMarker = choice [delimParser numParser | delimParser <- [inPeriod, inOneParen, inTwoParens], numParser <- [decimal, exampleNum, defaultNum, romanOne, @@ -159,8 +159,8 @@ anyOrderedListMarker = choice -- | Parses a list number (num) followed by a period, returns list attributes. inPeriod :: (Stream s m Char, UpdateSourcePos s Char) - => ParserT s st m (ListNumberStyle, Int) - -> ParserT s st m ListAttributes + => ParsecT s st m (ListNumberStyle, Int) + -> ParsecT s st m ListAttributes inPeriod num = try $ do (style, start) <- num char '.' @@ -171,8 +171,8 @@ inPeriod num = try $ do -- | Parses a list number (num) followed by a paren, returns list attributes. inOneParen :: (Stream s m Char, UpdateSourcePos s Char) - => ParserT s st m (ListNumberStyle, Int) - -> ParserT s st m ListAttributes + => ParsecT s st m (ListNumberStyle, Int) + -> ParsecT s st m ListAttributes inOneParen num = try $ do (style, start) <- num char ')' @@ -180,8 +180,8 @@ inOneParen num = try $ do -- | Parses a list number (num) enclosed in parens, returns list attributes. inTwoParens :: (Stream s m Char, UpdateSourcePos s Char) - => ParserT s st m (ListNumberStyle, Int) - -> ParserT s st m ListAttributes + => ParsecT s st m (ListNumberStyle, Int) + -> ParsecT s st m ListAttributes inTwoParens num = try $ do char '(' (style, start) <- num @@ -193,7 +193,7 @@ inTwoParens num = try $ do orderedListMarker :: (Stream s m Char, UpdateSourcePos s Char) => ListNumberStyle -> ListNumberDelim - -> ParserT s ParserState m Int + -> ParsecT s ParserState m Int orderedListMarker style delim = do let num = defaultNum <|> -- # can continue any kind of list case style of diff --git a/src/Text/Pandoc/Parsing/Math.hs b/src/Text/Pandoc/Parsing/Math.hs index a2cfa1a0700a..d001dc82ae10 100644 --- a/src/Text/Pandoc/Parsing/Math.hs +++ b/src/Text/Pandoc/Parsing/Math.hs @@ -17,20 +17,19 @@ where import Control.Monad (mzero, when) import Data.Text (Text) -import Text.Parsec ((<|>), Stream(..), notFollowedBy, skipMany, try) +import Text.Parsec ((<|>), ParsecT, Stream(..), notFollowedBy, skipMany, try) import Text.Pandoc.Options ( Extension(Ext_tex_math_dollars, Ext_tex_math_single_backslash, Ext_tex_math_double_backslash) ) import Text.Pandoc.Parsing.Capabilities (HasReaderOptions, guardEnabled) import Text.Pandoc.Parsing.General -import Text.Pandoc.Parsing.Types (ParserT) import Text.Pandoc.Shared (trimMath) import Text.Pandoc.Sources (UpdateSourcePos, anyChar, char, digit, newline, satisfy, space, string) import qualified Data.Text as T -mathInlineWith :: (Stream s m Char, UpdateSourcePos s Char) => Text -> Text -> ParserT s st m Text +mathInlineWith :: (Stream s m Char, UpdateSourcePos s Char) => Text -> Text -> ParsecT s st m Text mathInlineWith op cl = try $ do textStr op when (op == "$") $ notFollowedBy space @@ -51,10 +50,10 @@ mathInlineWith op cl = try $ do notFollowedBy digit -- to prevent capture of $5 return $ trimMath $ T.concat words' where - inBalancedBraces :: (Stream s m Char, UpdateSourcePos s Char) => Int -> Text -> ParserT s st m Text + inBalancedBraces :: (Stream s m Char, UpdateSourcePos s Char) => Int -> Text -> ParsecT s st m Text inBalancedBraces n = fmap T.pack . inBalancedBraces' n . T.unpack - inBalancedBraces' :: (Stream s m Char, UpdateSourcePos s Char) => Int -> String -> ParserT s st m String + inBalancedBraces' :: (Stream s m Char, UpdateSourcePos s Char) => Int -> String -> ParsecT s st m String inBalancedBraces' 0 "" = do c <- anyChar if c == '{' @@ -71,14 +70,14 @@ mathInlineWith op cl = try $ do '{' -> inBalancedBraces' (numOpen + 1) (c:xs) _ -> inBalancedBraces' numOpen (c:xs) -mathDisplayWith :: (Stream s m Char, UpdateSourcePos s Char) => Text -> Text -> ParserT s st m Text +mathDisplayWith :: (Stream s m Char, UpdateSourcePos s Char) => Text -> Text -> ParsecT s st m Text mathDisplayWith op cl = try $ fmap T.pack $ do textStr op many1Till (satisfy (/= '\n') <|> (newline <* notFollowedBy' blankline)) (try $ textStr cl) mathDisplay :: (HasReaderOptions st, Stream s m Char, UpdateSourcePos s Char) - => ParserT s st m Text + => ParsecT s st m Text mathDisplay = (guardEnabled Ext_tex_math_dollars >> mathDisplayWith "$$" "$$") <|> (guardEnabled Ext_tex_math_single_backslash >> @@ -87,7 +86,7 @@ mathDisplay = mathDisplayWith "\\\\[" "\\\\]") mathInline :: (HasReaderOptions st, Stream s m Char, UpdateSourcePos s Char) - => ParserT s st m Text + => ParsecT s st m Text mathInline = (guardEnabled Ext_tex_math_dollars >> mathInlineWith "$" "$") <|> (guardEnabled Ext_tex_math_single_backslash >> diff --git a/src/Text/Pandoc/Parsing/Smart.hs b/src/Text/Pandoc/Parsing/Smart.hs index 52ad2711970f..e9fe9f7a7217 100644 --- a/src/Text/Pandoc/Parsing/Smart.hs +++ b/src/Text/Pandoc/Parsing/Smart.hs @@ -33,17 +33,17 @@ import Text.Pandoc.Options import Text.Pandoc.Sources import Text.Pandoc.Parsing.Capabilities import Text.Pandoc.Parsing.General -import Text.Pandoc.Parsing.Types (ParserT) import Text.Parsec ( (<|>) , Stream(..) + , ParsecT , choice , lookAhead , manyTill , notFollowedBy , try ) - +import qualified Data.Text as T import qualified Text.Pandoc.Builder as B -- | Parses various ASCII punctuation, quotes, and apostrophe in a smart @@ -53,8 +53,8 @@ import qualified Text.Pandoc.Builder as B smartPunctuation :: (HasReaderOptions st, HasLastStrPosition st, HasQuoteContext st m, Stream s m Char, UpdateSourcePos s Char) - => ParserT s st m Inlines - -> ParserT s st m Inlines + => ParsecT s st m Inlines + -> ParsecT s st m Inlines smartPunctuation inlineParser = do guardEnabled Ext_smart choice [ quoted inlineParser, apostrophe, doubleCloseQuote, dash, ellipses ] @@ -63,16 +63,16 @@ smartPunctuation inlineParser = do -- quoting conventions. quoted :: (HasLastStrPosition st, HasQuoteContext st m, Stream s m Char, UpdateSourcePos s Char) - => ParserT s st m Inlines - -> ParserT s st m Inlines + => ParsecT s st m Inlines + -> ParsecT s st m Inlines quoted inlineParser = doubleQuoted inlineParser <|> singleQuoted inlineParser -- | Parses inline text in single quotes, assumes English quoting -- conventions. singleQuoted :: (HasLastStrPosition st, HasQuoteContext st m, Stream s m Char, UpdateSourcePos s Char) - => ParserT s st m Inlines - -> ParserT s st m Inlines + => ParsecT s st m Inlines + -> ParsecT s st m Inlines singleQuoted inlineParser = do singleQuoteStart (B.singleQuoted . mconcat <$> @@ -84,8 +84,8 @@ singleQuoted inlineParser = do -- conventions. doubleQuoted :: (HasQuoteContext st m, HasLastStrPosition st, Stream s m Char, UpdateSourcePos s Char) - => ParserT s st m Inlines - -> ParserT s st m Inlines + => ParsecT s st m Inlines + -> ParsecT s st m Inlines doubleQuoted inlineParser = do doubleQuoteStart (B.doubleQuoted . mconcat <$> @@ -93,11 +93,12 @@ doubleQuoted inlineParser = do (withQuoteContext InDoubleQuote (manyTill inlineParser doubleQuoteEnd))) <|> pure (B.str "\8220") -charOrRef :: (Stream s m Char, UpdateSourcePos s Char) => [Char] -> ParserT s st m Char +charOrRef :: (Stream s m Char, UpdateSourcePos s Char) => [Char] -> ParsecT s st m Char charOrRef cs = - oneOf cs <|> try (do c <- characterReference - guard (c `elem` cs) - return c) + oneOf cs <|> try (do t <- characterReference + case T.unpack t of + [c] | c `elem` cs -> return c + _ -> fail "unexpected character reference") -- | Succeeds if the parser is -- @@ -109,7 +110,7 @@ charOrRef cs = -- Gobbles the quote character on success. singleQuoteStart :: (HasLastStrPosition st, HasQuoteContext st m, Stream s m Char, UpdateSourcePos s Char) - => ParserT s st m () + => ParsecT s st m () singleQuoteStart = do failIfInQuoteContext InSingleQuote -- single quote start can't be right after str @@ -119,7 +120,7 @@ singleQuoteStart = do void $ lookAhead (satisfy (not . isSpaceChar)) singleQuoteEnd :: (Stream s m Char, UpdateSourcePos s Char) - => ParserT s st m () + => ParsecT s st m () singleQuoteEnd = try $ do charOrRef "'\8217\146" notFollowedBy alphaNum @@ -137,7 +138,7 @@ singleQuoteEnd = try $ do doubleQuoteStart :: (HasLastStrPosition st, HasQuoteContext st m, Stream s m Char, UpdateSourcePos s Char) - => ParserT s st m () + => ParsecT s st m () doubleQuoteStart = do failIfInQuoteContext InDoubleQuote guard =<< notAfterString @@ -146,24 +147,24 @@ doubleQuoteStart = do -- | Parses a closing quote character. doubleQuoteEnd :: (Stream s m Char, UpdateSourcePos s Char) - => ParserT s st m () + => ParsecT s st m () doubleQuoteEnd = void (charOrRef "\"\8221\148") -- | Parses an ASCII apostrophe (@'@) or right single quotation mark and -- returns a RIGHT SINGLE QUOtatiON MARK character. apostrophe :: (Stream s m Char, UpdateSourcePos s Char) - => ParserT s st m Inlines + => ParsecT s st m Inlines apostrophe = (char '\'' <|> char '\8217') >> return (B.str "\8217") -- | Parses an ASCII quotation mark character and returns a RIGHT DOUBLE -- QUOTATION MARK. doubleCloseQuote :: (Stream s m Char, UpdateSourcePos s Char) - => ParserT s st m Inlines + => ParsecT s st m Inlines doubleCloseQuote = B.str "\8221" <$ char '"' -- | Parses three dots as HORIZONTAL ELLIPSIS. ellipses :: (Stream s m Char, UpdateSourcePos s Char) - => ParserT s st m Inlines + => ParsecT s st m Inlines ellipses = try (string "..." >> return (B.str "\8230")) -- | Parses two hyphens as EN DASH and three as EM DASH. @@ -172,7 +173,7 @@ ellipses = try (string "..." >> return (B.str "\8230")) -- parsed as EM DASH, and one hyphen is parsed as EN DASH if it is -- followed by a digit. dash :: (HasReaderOptions st, Stream s m Char, UpdateSourcePos s Char) - => ParserT s st m Inlines + => ParsecT s st m Inlines dash = try $ do oldDashes <- extensionEnabled Ext_old_dashes <$> getOption readerExtensions if oldDashes diff --git a/src/Text/Pandoc/Parsing/State.hs b/src/Text/Pandoc/Parsing/State.hs index 93eca0c50daf..66432fa1edec 100644 --- a/src/Text/Pandoc/Parsing/State.hs +++ b/src/Text/Pandoc/Parsing/State.hs @@ -32,8 +32,8 @@ import Text.Pandoc.Definition (Attr, Meta, Target, nullMeta) import Text.Pandoc.Logging (LogMessage) import Text.Pandoc.Options (ReaderOptions) import Text.Pandoc.Parsing.Capabilities -import Text.Pandoc.Parsing.Types -import Text.Pandoc.Readers.LaTeX.Types (Macro) +import Text.Pandoc.Parsing.Future +import Text.Pandoc.TeX (Macro) import qualified Data.Map as M import qualified Data.Set as Set @@ -46,7 +46,6 @@ data ParserState = ParserState , stateQuoteContext :: QuoteContext -- ^ Inside quoted environment? , stateAllowLinks :: Bool -- ^ Allow parsing of links , stateAllowLineBreaks :: Bool -- ^ Allow parsing of line breaks - , stateMaxNestingLevel :: Int -- ^ Max # of nested Strong/Emph , stateLastStrPos :: Maybe SourcePos -- ^ Position after last str parsed , stateKeys :: KeyTable -- ^ List of reference keys , stateHeaderKeys :: KeyTable -- ^ List of implicit header ref keys @@ -141,7 +140,6 @@ defaultParserState = ParserState , stateQuoteContext = NoQuote , stateAllowLinks = True , stateAllowLineBreaks = True - , stateMaxNestingLevel = 6 , stateLastStrPos = Nothing , stateKeys = M.empty , stateHeaderKeys = M.empty diff --git a/src/Text/Pandoc/Readers.hs b/src/Text/Pandoc/Readers.hs index 7abd1d0244b8..2c9dee416cd0 100644 --- a/src/Text/Pandoc/Readers.hs +++ b/src/Text/Pandoc/Readers.hs @@ -1,7 +1,8 @@ +{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE MonoLocalBinds #-} {-# LANGUAGE RankNTypes #-} {-# LANGUAGE ScopedTypeVariables #-} -{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE TupleSections #-} {- | Module : Text.Pandoc.Readers Copyright : Copyright (C) 2006-2022 John MacFarlane @@ -25,7 +26,7 @@ module Text.Pandoc.Readers Reader (..) , readers , readDocx - , readOdt + , readODT , readMarkdown , readCommonMark , readCreole @@ -64,17 +65,16 @@ module Text.Pandoc.Readers , getDefaultExtensions ) where -import Control.Monad (unless) import Control.Monad.Except (throwError) import Data.Aeson import qualified Data.ByteString.Lazy as BL import Data.Text (Text) import qualified Data.Text as T -import Text.Pandoc.Shared (tshow) import Text.Pandoc.Class import Text.Pandoc.Definition import Text.Pandoc.Error import Text.Pandoc.Extensions +import qualified Text.Pandoc.Format as Format import Text.Pandoc.Options import Text.Pandoc.Readers.CommonMark import Text.Pandoc.Readers.Markdown @@ -93,7 +93,7 @@ import Text.Pandoc.Readers.LaTeX import Text.Pandoc.Readers.MediaWiki import Text.Pandoc.Readers.Muse import Text.Pandoc.Readers.Native -import Text.Pandoc.Readers.Odt +import Text.Pandoc.Readers.ODT import Text.Pandoc.Readers.OPML import Text.Pandoc.Readers.Org import Text.Pandoc.Readers.RST @@ -145,7 +145,7 @@ readers = [("native" , TextReader readNative) ,("twiki" , TextReader readTWiki) ,("tikiwiki" , TextReader readTikiWiki) ,("docx" , ByteStringReader readDocx) - ,("odt" , ByteStringReader readOdt) + ,("odt" , ByteStringReader readODT) ,("t2t" , TextReader readTxt2Tags) ,("epub" , ByteStringReader readEPUB) ,("muse" , TextReader readMuse) @@ -162,29 +162,14 @@ readers = [("native" , TextReader readNative) ,("rtf" , TextReader readRTF) ] --- | Retrieve reader, extensions based on formatSpec (format+extensions). -getReader :: PandocMonad m => Text -> m (Reader m, Extensions) -getReader s = - case parseFormatSpec s of - Left e -> throwError $ PandocAppError $ - "Error parsing reader format " <> tshow s <> ": " <> tshow e - Right (readerName, extsToEnable, extsToDisable) -> - case lookup readerName readers of - Nothing -> throwError $ PandocUnknownReaderError - readerName - Just r -> do - let allExts = getAllExtensions readerName - let exts = foldr disableExtension - (foldr enableExtension - (getDefaultExtensions readerName) - extsToEnable) extsToDisable - mapM_ (\ext -> - unless (extensionEnabled ext allExts) $ - throwError $ - PandocUnsupportedExtensionError - (T.drop 4 $ T.pack $ show ext) readerName) - (extsToEnable ++ extsToDisable) - return (r, exts) +-- | Retrieve reader, extensions based on format spec (format+extensions). +getReader :: PandocMonad m => Format.FlavoredFormat -> m (Reader m, Extensions) +getReader flvrd = do + let readerName = Format.formatName flvrd + case lookup readerName readers of + Nothing -> throwError $ PandocUnknownReaderError readerName + Just r -> (r,) <$> + Format.applyExtensionsDiff (Format.getExtensionsConfig readerName) flvrd -- | Read pandoc document from JSON format. readJSON :: (PandocMonad m, ToSources a) diff --git a/src/Text/Pandoc/Readers/BibTeX.hs b/src/Text/Pandoc/Readers/BibTeX.hs index 63151c742039..6d64e1adfa78 100644 --- a/src/Text/Pandoc/Readers/BibTeX.hs +++ b/src/Text/Pandoc/Readers/BibTeX.hs @@ -23,6 +23,7 @@ where import Text.Pandoc.Options import Text.Pandoc.Definition import Text.Pandoc.Builder (setMeta, cite, str) +import Text.Pandoc.Parsing (fromParsecError) import Citeproc (Lang(..), parseLang) import Citeproc.Locale (getLocale) import Text.Pandoc.Error (PandocError(..)) @@ -63,7 +64,7 @@ readBibTeX' variant _opts t = do Left _ -> throwError $ PandocCiteprocError e Right l -> return l case BibTeX.readBibtexString variant locale (const True) t of - Left e -> throwError $ PandocParsecError (toSources t) e + Left e -> throwError $ fromParsecError (toSources t) e Right refs -> return $ setMeta "references" (map referenceToMetaValue refs) . setMeta "nocite" diff --git a/src/Text/Pandoc/Readers/CSV.hs b/src/Text/Pandoc/Readers/CSV.hs index 23e0f7448aec..fd15a5510811 100644 --- a/src/Text/Pandoc/Readers/CSV.hs +++ b/src/Text/Pandoc/Readers/CSV.hs @@ -21,11 +21,11 @@ import Text.Pandoc.CSV (parseCSV, defaultCSVOptions, CSVOptions(..)) import Text.Pandoc.Definition import qualified Text.Pandoc.Builder as B import Text.Pandoc.Class (PandocMonad) -import Text.Pandoc.Error import Text.Pandoc.Sources (ToSources(..), sourcesToText) import Text.Pandoc.Options (ReaderOptions) import Control.Monad.Except (throwError) import Data.Text (Text) +import Text.Pandoc.Parsing (fromParsecError) readCSV :: (PandocMonad m, ToSources a) => ReaderOptions -- ^ Reader options @@ -68,4 +68,4 @@ readCSVWith csvopts txt = do aligns = replicate numcols AlignDefault widths = replicate numcols ColWidthDefault Right [] -> return $ B.doc mempty - Left e -> throwError $ PandocParsecError (toSources [("",txt)]) e + Left e -> throwError $ fromParsecError (toSources [("",txt)]) e diff --git a/src/Text/Pandoc/Readers/CommonMark.hs b/src/Text/Pandoc/Readers/CommonMark.hs index 528d84dbf5a1..ec62765f15d8 100644 --- a/src/Text/Pandoc/Readers/CommonMark.hs +++ b/src/Text/Pandoc/Readers/CommonMark.hs @@ -25,15 +25,16 @@ import Text.Pandoc.Class.PandocMonad (PandocMonad) import Text.Pandoc.Definition import Text.Pandoc.Builder as B import Text.Pandoc.Options -import Text.Pandoc.Error import Text.Pandoc.Readers.Metadata (yamlMetaBlock) -import Control.Monad.Except +import Control.Monad (MonadPlus(mzero)) +import Control.Monad.Except ( MonadError(throwError) ) import Data.Functor.Identity (runIdentity) import Data.Typeable import Text.Pandoc.Parsing (runParserT, getInput, getPosition, runF, defaultParserState, option, many1, anyChar, - Sources(..), ToSources(..), ParserT, Future, - sourceName, sourceLine, incSourceLine) + Sources(..), ToSources(..), ParsecT, Future, + sourceName, sourceLine, incSourceLine, + fromParsecError) import Text.Pandoc.Walk (walk) import qualified Data.Text as T import qualified Data.Attoparsec.Text as A @@ -80,7 +81,7 @@ sourceToToks (pos, s) = map adjust $ tokenize (sourceName pos) s metaValueParser :: Monad m - => ReaderOptions -> ParserT Sources st m (Future st MetaValue) + => ReaderOptions -> ParsecT Sources st m (Future st MetaValue) metaValueParser opts = do inp <- option "" $ T.pack <$> many1 anyChar let toks = concatMap sourceToToks (unSources (toSources inp)) @@ -95,10 +96,10 @@ readCommonMarkBody opts s toks = else id) <$> if isEnabled Ext_sourcepos opts then case runIdentity (parseCommonmarkWith (specFor opts) toks) of - Left err -> throwError $ PandocParsecError s err + Left err -> throwError $ fromParsecError s err Right (Cm bls :: Cm SourceRange Blocks) -> return $ B.doc bls else case runIdentity (parseCommonmarkWith (specFor opts) toks) of - Left err -> throwError $ PandocParsecError s err + Left err -> throwError $ fromParsecError s err Right (Cm bls :: Cm () Blocks) -> return $ B.doc bls stripBlockComments :: Block -> Block @@ -158,4 +159,3 @@ specFor opts = foldr ($) defaultSyntaxSpec exts [ (taskListSpec <>) | isEnabled Ext_task_lists opts ] ++ [ (rebaseRelativePathsSpec <>) | isEnabled Ext_rebase_relative_paths opts ] - diff --git a/src/Text/Pandoc/Readers/Creole.hs b/src/Text/Pandoc/Readers/Creole.hs index ad848ada78c9..63a5b52d0684 100644 --- a/src/Text/Pandoc/Readers/Creole.hs +++ b/src/Text/Pandoc/Readers/Creole.hs @@ -13,7 +13,8 @@ Conversion of creole text to 'Pandoc' document. module Text.Pandoc.Readers.Creole ( readCreole ) where -import Control.Monad.Except (guard, liftM2, throwError) +import Control.Monad +import Control.Monad.Except (throwError) import qualified Data.Foldable as F import Data.Maybe (fromMaybe) import Data.Text (Text) @@ -36,7 +37,7 @@ readCreole opts s = do Left e -> throwError e Right d -> return d -type CRLParser = ParserT Sources ParserState +type CRLParser = ParsecT Sources ParserState -- -- Utility functions diff --git a/src/Text/Pandoc/Readers/Custom.hs b/src/Text/Pandoc/Readers/Custom.hs deleted file mode 100644 index 37959574e9ff..000000000000 --- a/src/Text/Pandoc/Readers/Custom.hs +++ /dev/null @@ -1,83 +0,0 @@ -{-# LANGUAGE LambdaCase #-} -{-# LANGUAGE OverloadedStrings #-} -{- | - Module : Text.Pandoc.Readers.Custom - Copyright : Copyright (C) 2021-2022 John MacFarlane - License : GNU GPL, version 2 or above - - Maintainer : John MacFarlane - Stability : alpha - Portability : portable - -Supports custom parsers written in Lua which produce a Pandoc AST. --} -module Text.Pandoc.Readers.Custom ( readCustom ) where -import Control.Exception -import Control.Monad (when) -import Control.Monad.IO.Class (MonadIO) -import Data.Maybe (fromMaybe) -import HsLua as Lua hiding (Operation (Div)) -import Text.Pandoc.Definition -import Text.Pandoc.Class (PandocMonad, findFileWithDataFallback, report) -import Text.Pandoc.Logging -import Text.Pandoc.Lua (Global (..), runLua, setGlobals) -import Text.Pandoc.Lua.PandocLua -import Text.Pandoc.Lua.Marshal.Pandoc (peekPandoc) -import Text.Pandoc.Options -import Text.Pandoc.Sources (ToSources(..), sourcesToText) -import qualified Data.Text as T - --- | Convert custom markup to Pandoc. -readCustom :: (PandocMonad m, MonadIO m, ToSources s) - => FilePath -> ReaderOptions -> s -> m Pandoc -readCustom luaFile opts srcs = do - let globals = [ PANDOC_SCRIPT_FILE luaFile ] - luaFile' <- fromMaybe luaFile <$> findFileWithDataFallback "readers" luaFile - res <- runLua $ do - setGlobals globals - stat <- dofileTrace luaFile' - -- check for error in lua script (later we'll change the return type - -- to handle this more gracefully): - when (stat /= Lua.OK) - Lua.throwErrorAsException - parseCustom - case res of - Left msg -> throw msg - Right doc -> return doc - where - parseCustom = do - let input = toSources srcs - getglobal "Reader" - push input - push opts - pcallTrace 2 1 >>= \case - OK -> forcePeek $ peekPandoc top - ErrRun -> do - -- Caught a runtime error. Check if parsing might work if we - -- pass a string instead of a Sources list, then retry. - runPeek (peekText top) >>= \case - Failure {} -> - -- not a string error object. Bail! - throwErrorAsException - Success errmsg -> do - if "string expected, got pandoc Sources" `T.isInfixOf` errmsg - then do - pop 1 - _ <- unPandocLua $ do - report $ Deprecated "old Reader function signature" $ - T.unlines - [ "Reader functions should accept a sources list; " - , "functions expecting `string` input are deprecated. " - , "Use `tostring` to convert the first argument to a " - , "string." - ] - getglobal "Reader" - push $ sourcesToText input -- push sources as string - push opts - callTrace 2 1 - forcePeek $ peekPandoc top - else - -- nothing we can do here - throwErrorAsException - _ -> -- not a runtime error, we won't be able to recover from that - throwErrorAsException diff --git a/src/Text/Pandoc/Readers/DocBook.hs b/src/Text/Pandoc/Readers/DocBook.hs index b7a2b06ea99a..2e594bf735f7 100644 --- a/src/Text/Pandoc/Readers/DocBook.hs +++ b/src/Text/Pandoc/Readers/DocBook.hs @@ -13,7 +13,13 @@ Conversion of DocBook XML to 'Pandoc' document. -} module Text.Pandoc.Readers.DocBook ( readDocBook ) where +import Control.Monad (MonadPlus(mplus)) import Control.Monad.State.Strict + ( MonadTrans(lift), + StateT(runStateT), + MonadState(get), + gets, + modify ) import Data.ByteString (ByteString) import Data.FileEmbed import Data.Char (isSpace, isLetter, chr) @@ -31,7 +37,7 @@ import Data.Text.Encoding (decodeUtf8) import qualified Data.Text as T import qualified Data.Text.Lazy as TL import Control.Monad.Except (throwError) -import Text.HTML.TagSoup.Entity (lookupEntity) +import Text.Pandoc.XML (lookupEntity) import Text.Pandoc.Error (PandocError(..)) import Text.Pandoc.Builder import Text.Pandoc.Class.PandocMonad (PandocMonad, report) @@ -1166,7 +1172,7 @@ attrValueAsOptionalAttr n e = case attrValue n e of parseInline :: PandocMonad m => Content -> DB m Inlines parseInline (Text (CData _ s _)) = return $ text s parseInline (CRef ref) = - return $ text $ maybe (T.toUpper ref) T.pack $ lookupEntity (T.unpack ref) + return $ text $ fromMaybe (T.toUpper ref) $ lookupEntity ref parseInline (Elem e) = case qName (elName e) of "anchor" -> do diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs index 2ac529b87c06..4e60f412c412 100644 --- a/src/Text/Pandoc/Readers/Docx.hs +++ b/src/Text/Pandoc/Readers/Docx.hs @@ -59,8 +59,17 @@ module Text.Pandoc.Readers.Docx ) where import Codec.Archive.Zip +import Control.Monad ( liftM, unless ) import Control.Monad.Reader + ( asks, + MonadReader(local), + MonadTrans(lift), + ReaderT(runReaderT) ) import Control.Monad.State.Strict + ( StateT, + gets, + modify, + evalStateT ) import Data.Bifunctor (bimap, first) import qualified Data.ByteString.Lazy as B import Data.Default (Default) @@ -117,6 +126,7 @@ data DState = DState { docxAnchorMap :: M.Map T.Text T.Text , docxAnchorSet :: Set.Set T.Text , docxImmedPrevAnchor :: Maybe T.Text , docxMediaBag :: MediaBag + , docxNumberedHeadings :: Bool , docxDropCap :: Inlines -- keep track of (numId, lvl) values for -- restarting @@ -131,6 +141,7 @@ instance Default DState where , docxAnchorSet = mempty , docxImmedPrevAnchor = Nothing , docxMediaBag = mempty + , docxNumberedHeadings = False , docxDropCap = mempty , docxListState = M.empty , docxPrevPara = mempty @@ -662,10 +673,17 @@ bodyPartToBlocks (Paragraph pPr parparts) T.concat $ map parPartToText parparts | Just (style, n) <- pHeading pPr = do - ils <-local (\s-> s{docxInHeaderBlock=True}) + ils <- local (\s-> s{docxInHeaderBlock=True}) (smushInlines <$> mapM parPartToInlines parparts) + let classes = map normalizeToClassName . delete style + $ getStyleNames (pStyle pPr) + + hasNumbering <- gets docxNumberedHeadings + let addNum = if hasNumbering && not (numbered pPr) + then (++ ["unnumbered"]) + else id makeHeaderAnchor $ - headerWith ("", map normalizeToClassName . delete style $ getStyleNames (pStyle pPr), []) n ils + headerWith ("", addNum classes, []) n ils | otherwise = do ils <- trimSps . smushInlines <$> mapM parPartToInlines parparts prevParaIls <- gets docxPrevPara @@ -812,6 +830,9 @@ bodyToOutput (Body bps) = do let (metabps, blkbps) = sepBodyParts bps meta <- bodyPartsToMeta metabps captions <- catMaybes <$> mapM bodyPartToTableCaption blkbps + let isNumberedPara (Paragraph pPr _) = numbered pPr + isNumberedPara _ = False + modify (\s -> s { docxNumberedHeadings = any isNumberedPara blkbps }) modify (\s -> s { docxTableCaptions = captions }) blks <- smushBlocks <$> mapM bodyPartToBlocks blkbps blks' <- rewriteLinks $ blocksToDefinitions $ blocksToBullets $ toList blks diff --git a/src/Text/Pandoc/Readers/Docx/Fields.hs b/src/Text/Pandoc/Readers/Docx/Fields.hs index 4cb89ac0c5d9..79c4cca3f9dc 100644 --- a/src/Text/Pandoc/Readers/Docx/Fields.hs +++ b/src/Text/Pandoc/Readers/Docx/Fields.hs @@ -17,8 +17,7 @@ module Text.Pandoc.Readers.Docx.Fields ( FieldInfo(..) import Data.Functor (($>), void) import qualified Data.Text as T -import Text.Parsec -import Text.Parsec.Text (Parser) +import Text.Pandoc.Parsing type URL = T.Text type Anchor = T.Text @@ -33,6 +32,8 @@ data FieldInfo = HyperlinkField URL | UnknownField deriving (Show) +type Parser = Parsec T.Text () + parseFieldInfo :: T.Text -> Either ParseError FieldInfo parseFieldInfo = parse fieldInfo "" @@ -132,6 +133,6 @@ pageref = do farg <- fieldArgument switches <- spaces *> many pagerefSwitch let isLink = case switches of - ("\\h", _) : _ -> True + ("\\h", _) : _ -> True _ -> False return (farg, isLink) diff --git a/src/Text/Pandoc/Readers/Docx/Parse.hs b/src/Text/Pandoc/Readers/Docx/Parse.hs index af8143facdbd..ad67d6170843 100644 --- a/src/Text/Pandoc/Readers/Docx/Parse.hs +++ b/src/Text/Pandoc/Readers/Docx/Parse.hs @@ -1,4 +1,5 @@ {-# LANGUAGE ViewPatterns #-} +{-# LANGUAGE TupleSections #-} {-# LANGUAGE LambdaCase #-} {-# LANGUAGE OverloadedStrings #-} {- | @@ -59,6 +60,7 @@ module Text.Pandoc.Readers.Docx.Parse ( Docx(..) import Text.Pandoc.Readers.Docx.Parse.Styles import Codec.Archive.Zip import Control.Applicative ((<|>)) +import Control.Monad import Control.Monad.Except import Control.Monad.Reader import Control.Monad.State.Strict @@ -219,6 +221,7 @@ data TrackedChange = TrackedChange ChangeType ChangeInfo data ParagraphStyle = ParagraphStyle { pStyle :: [ParStyle] , indentation :: Maybe ParIndentation + , numbered :: Bool , dropCap :: Bool , pChange :: Maybe TrackedChange , pBidi :: Maybe Bool @@ -228,6 +231,7 @@ data ParagraphStyle = ParagraphStyle { pStyle :: [ParStyle] defaultParagraphStyle :: ParagraphStyle defaultParagraphStyle = ParagraphStyle { pStyle = [] , indentation = Nothing + , numbered = False , dropCap = False , pChange = Nothing , pBidi = Just False @@ -278,12 +282,12 @@ rowsToRowspans rows = let -> Maybe Integer -- Number of columns left below -> Maybe [(Int, Cell)] -- (rowspan so far, cell) for the row below this one -> [(Int, Cell)] -- (rowspan so far, cell) for this row - g cells _ Nothing = zip (repeat 1) cells + g cells _ Nothing = map (1,) cells g cells columnsLeftBelow (Just rowBelow) = case cells of [] -> [] thisCell@(Cell thisGridSpan _ _) : restOfRow -> case rowBelow of - [] -> zip (repeat 1) cells + [] -> map (1,) cells (spanSoFarBelow, Cell gridSpanBelow vmerge _) : _ -> let spanSoFar = case vmerge of Restart -> 1 @@ -688,6 +692,11 @@ pHeading = getParStyleField headingLev . pStyle pNumInfo :: ParagraphStyle -> Maybe (T.Text, T.Text) pNumInfo = getParStyleField numInfo . pStyle +mkListItem :: ParagraphStyle -> Text -> Text -> [ParPart] -> D BodyPart +mkListItem parstyle numId lvl parparts = do + lvlInfo <- lookupLevel numId lvl <$> asks envNumbering + return $ ListItem parstyle numId lvl lvlInfo parparts + pStyleIndentation :: ParagraphStyle -> Maybe ParIndentation pStyleIndentation style = (getParStyleField indent . pStyle) style @@ -700,38 +709,43 @@ elemToBodyPart ns element elemToBodyPart ns element | isElem ns "w" "p" element , Just (numId, lvl) <- getNumInfo ns element = do - parstyle <- elemToParagraphStyle ns element <$> asks envParStyles + parstyle <- elemToParagraphStyle ns element + <$> asks envParStyles + <*> asks envNumbering parparts <- mconcat <$> mapD (elemToParPart ns) (elChildren element) - levelInfo <- lookupLevel numId lvl <$> asks envNumbering - return $ ListItem parstyle numId lvl levelInfo parparts + case pHeading parstyle of + Nothing -> mkListItem parstyle numId lvl parparts + Just _ -> do + return $ Paragraph parstyle parparts elemToBodyPart ns element | isElem ns "w" "p" element = do - parstyle <- elemToParagraphStyle ns element <$> asks envParStyles + parstyle <- elemToParagraphStyle ns element + <$> asks envParStyles + <*> asks envNumbering parparts' <- mconcat <$> mapD (elemToParPart ns) (elChildren element) fldCharState <- gets stateFldCharState modify $ \st -> st {stateFldCharState = emptyFldCharContents fldCharState} -- Word uses list enumeration for numbered headings, so we only -- want to infer a list from the styles if it is NOT a heading. - let parparts = parparts' ++ (openFldCharsToParParts fldCharState) in - case pHeading parstyle of - Nothing | Just (numId, lvl) <- pNumInfo parstyle -> do - levelInfo <- lookupLevel numId lvl <$> asks envNumbering - return $ ListItem parstyle numId lvl levelInfo parparts - _ -> let - hasCaptionStyle = elem "Caption" (pStyleId <$> pStyle parstyle) - - hasSimpleTableField = fromMaybe False $ do - fldSimple <- findChildByName ns "w" "fldSimple" element - instr <- findAttrByName ns "w" "instr" fldSimple - pure ("Table" `elem` T.words instr) - - hasComplexTableField = fromMaybe False $ do - instrText <- findElementByName ns "w" "instrText" element - pure ("Table" `elem` T.words (strContent instrText)) - - in if hasCaptionStyle && (hasSimpleTableField || hasComplexTableField) - then return $ TblCaption parstyle parparts - else return $ Paragraph parstyle parparts + let parparts = parparts' ++ (openFldCharsToParParts fldCharState) + case pHeading parstyle of + Nothing | Just (numId, lvl) <- pNumInfo parstyle -> do + mkListItem parstyle numId lvl parparts + _ -> let + hasCaptionStyle = elem "Caption" (pStyleId <$> pStyle parstyle) + + hasSimpleTableField = fromMaybe False $ do + fldSimple <- findChildByName ns "w" "fldSimple" element + instr <- findAttrByName ns "w" "instr" fldSimple + pure ("Table" `elem` T.words instr) + + hasComplexTableField = fromMaybe False $ do + instrText <- findElementByName ns "w" "instrText" element + pure ("Table" `elem` T.words (strContent instrText)) + + in if hasCaptionStyle && (hasSimpleTableField || hasComplexTableField) + then return $ TblCaption parstyle parparts + else return $ Paragraph parstyle parparts elemToBodyPart ns element | isElem ns "w" "tbl" element = do @@ -1115,15 +1129,22 @@ getTrackedChange ns element Just $ TrackedChange Deletion (ChangeInfo cId cAuthor mcDate) getTrackedChange _ _ = Nothing -elemToParagraphStyle :: NameSpaces -> Element -> ParStyleMap -> ParagraphStyle -elemToParagraphStyle ns element sty +elemToParagraphStyle :: NameSpaces -> Element + -> ParStyleMap + -> Numbering + -> ParagraphStyle +elemToParagraphStyle ns element sty numbering | Just pPr <- findChildByName ns "w" "pPr" element = let style = mapMaybe (fmap ParaStyleId . findAttrByName ns "w" "val") (findChildrenByName ns "w" "pStyle" pPr) + pStyle' = mapMaybe (`M.lookup` sty) style in ParagraphStyle - {pStyle = mapMaybe (`M.lookup` sty) style + {pStyle = pStyle' + , numbered = case getNumInfo ns element of + Just (numId, lvl) -> isJust $ lookupLevel numId lvl numbering + Nothing -> isJust $ getParStyleField numInfo pStyle' , indentation = getIndentation ns element , dropCap = @@ -1143,7 +1164,7 @@ elemToParagraphStyle ns element sty getTrackedChange ns , pBidi = checkOnOff ns pPr (elemName ns "w" "bidi") } -elemToParagraphStyle _ _ _ = defaultParagraphStyle + | otherwise = defaultParagraphStyle elemToRunStyleD :: NameSpaces -> Element -> D RunStyle elemToRunStyleD ns element diff --git a/src/Text/Pandoc/Readers/Docx/Util.hs b/src/Text/Pandoc/Readers/Docx/Util.hs index c0f9182f4254..2e87fa946ccd 100644 --- a/src/Text/Pandoc/Readers/Docx/Util.hs +++ b/src/Text/Pandoc/Readers/Docx/Util.hs @@ -67,4 +67,3 @@ findAttrByName :: NameSpaces -> Text -> Text -> Element -> Maybe Text findAttrByName ns pref name el = let ns' = ns <> elemToNameSpaces el in findAttr (elemName ns' pref name) el - diff --git a/src/Text/Pandoc/Readers/DokuWiki.hs b/src/Text/Pandoc/Readers/DokuWiki.hs index d1b673611636..1ca4cf696ca0 100644 --- a/src/Text/Pandoc/Readers/DokuWiki.hs +++ b/src/Text/Pandoc/Readers/DokuWiki.hs @@ -26,9 +26,8 @@ import qualified Data.Text as T import qualified Text.Pandoc.Builder as B import Text.Pandoc.Class.PandocMonad (PandocMonad (..)) import Text.Pandoc.Definition -import Text.Pandoc.Error (PandocError (PandocParsecError)) import Text.Pandoc.Options -import Text.Pandoc.Parsing hiding (enclosed, nested) +import Text.Pandoc.Parsing hiding (enclosed) import Text.Pandoc.Shared (trim, stringify, tshow) import Data.List (isPrefixOf, isSuffixOf) import qualified Safe @@ -43,26 +42,17 @@ readDokuWiki opts s = do res <- runParserT parseDokuWiki def {stateOptions = opts } (initialSourceName sources) sources case res of - Left e -> throwError $ PandocParsecError sources e + Left e -> throwError $ fromParsecError sources e Right d -> return d -type DWParser = ParserT Sources ParserState +type DWParser = ParsecT Sources ParserState -- * Utility functions -- | Parse end-of-line, which can be either a newline or end-of-file. -eol :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m () +eol :: (Stream s m Char, UpdateSourcePos s Char) => ParsecT s st m () eol = void newline <|> eof -nested :: PandocMonad m => DWParser m a -> DWParser m a -nested p = do - nestlevel <- stateMaxNestingLevel <$> getState - guard $ nestlevel > 0 - updateState $ \st -> st{ stateMaxNestingLevel = stateMaxNestingLevel st - 1 } - res <- p - updateState $ \st -> st{ stateMaxNestingLevel = nestlevel } - return res - guardColumnOne :: PandocMonad m => DWParser m () guardColumnOne = getPosition >>= \pos -> guard (sourceColumn pos == 1) @@ -164,7 +154,7 @@ nestedInlines :: (Show a, PandocMonad m) nestedInlines end = innerSpace <|> nestedInline where innerSpace = try $ whitespace <* notFollowedBy end - nestedInline = notFollowedBy whitespace >> nested inline + nestedInline = notFollowedBy whitespace >> inline bold :: PandocMonad m => DWParser m B.Inlines bold = try $ B.strong <$> enclosed (string "**") nestedInlines @@ -254,7 +244,7 @@ nocache :: PandocMonad m => DWParser m B.Inlines nocache = try $ mempty <$ string "~~NOCACHE~~" str :: PandocMonad m => DWParser m B.Inlines -str = B.str <$> (many1Char alphaNum <|> countChar 1 characterReference) +str = B.str <$> (many1Char alphaNum <|> characterReference) symbol :: PandocMonad m => DWParser m B.Inlines symbol = B.str <$> countChar 1 nonspaceChar diff --git a/src/Text/Pandoc/Readers/EPUB.hs b/src/Text/Pandoc/Readers/EPUB.hs index eb8d2405d42d..cdf51b4298cf 100644 --- a/src/Text/Pandoc/Readers/EPUB.hs +++ b/src/Text/Pandoc/Readers/EPUB.hs @@ -40,7 +40,8 @@ import Text.Pandoc.Extensions (Extension (Ext_raw_html), enableExtension) import Text.Pandoc.MIME (MimeType) import Text.Pandoc.Options (ReaderOptions (..)) import Text.Pandoc.Readers.HTML (readHtml) -import Text.Pandoc.Shared (addMetaField, collapseFilePath, escapeURI, tshow) +import Text.Pandoc.Shared (addMetaField, collapseFilePath, tshow) +import Text.Pandoc.URI (escapeURI) import qualified Text.Pandoc.UTF8 as UTF8 (toTextLazy) import Text.Pandoc.Walk (query, walk) import Text.Pandoc.XML.Light diff --git a/src/Text/Pandoc/Readers/EndNote.hs b/src/Text/Pandoc/Readers/EndNote.hs index 586dcf451dfc..cdfbe6a4e1b4 100644 --- a/src/Text/Pandoc/Readers/EndNote.hs +++ b/src/Text/Pandoc/Readers/EndNote.hs @@ -30,7 +30,7 @@ import Text.Pandoc.Error (PandocError(..)) import Text.Pandoc.Class (PandocMonad) import Text.Pandoc.Citeproc.MetaValue (referenceToMetaValue) import Text.Pandoc.Sources (Sources(..), ToSources(..), sourcesToText) -import Text.Pandoc.Citeproc.BibTeX (toName) +import Text.Pandoc.Citeproc.Name (toName, NameOpts(..)) import Control.Applicative ((<|>)) import Control.Monad.Except (throwError) import Control.Monad (mzero, unless) @@ -140,7 +140,9 @@ recordToReference e = filterChildrenName (name "contributors") e >>= filterChildrenName (name "authors") >>= filterChildrenName (name "author") >>= - toName [] . B.toList . B.text . T.strip . getText + toName NameOpts{ nameOptsPrefixIsNonDroppingParticle = False + , nameOptsUseJuniorComma = False } + . B.toList . B.text . T.strip . getText titles = do x <- filterChildrenName (name "titles") e (key, name') <- [("title", "title"), diff --git a/src/Text/Pandoc/Readers/FB2.hs b/src/Text/Pandoc/Readers/FB2.hs index 1cdc2e25c25f..2844c9266696 100644 --- a/src/Text/Pandoc/Readers/FB2.hs +++ b/src/Text/Pandoc/Readers/FB2.hs @@ -34,7 +34,7 @@ import qualified Data.Text as T import qualified Data.Text.Lazy as TL import Data.Default import Data.Maybe -import Text.HTML.TagSoup.Entity (lookupEntity) +import Text.Pandoc.XML (lookupEntity) import Text.Pandoc.Builder import Text.Pandoc.Class.PandocMonad (PandocMonad, insertMedia, report) import Text.Pandoc.Error @@ -88,7 +88,7 @@ removeHash t = case T.uncons t of _ -> t convertEntity :: Text -> Text -convertEntity e = maybe (T.toUpper e) T.pack $ lookupEntity (T.unpack e) +convertEntity e = fromMaybe (T.toUpper e) $ lookupEntity e parseInline :: PandocMonad m => Content -> FB2 m Inlines parseInline (Elem e) = diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs index 4fb6028e46f4..286839dcd975 100644 --- a/src/Text/Pandoc/Readers/HTML.hs +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -21,6 +21,7 @@ module Text.Pandoc.Readers.HTML ( readHtml , isBlockTag , isTextTag , isCommentTag + , toAttr ) where import Control.Applicative ((<|>)) @@ -62,10 +63,10 @@ import Text.Pandoc.Options ( extensionEnabled) import Text.Pandoc.Parsing hiding ((<|>)) import Text.Pandoc.Shared ( - addMetaField, blocksToInlines', escapeURI, extractSpaces, + addMetaField, blocksToInlines', extractSpaces, htmlSpanLikeElements, renderTags', safeRead, tshow, formatCode) +import Text.Pandoc.URI (escapeURI) import Text.Pandoc.Walk -import Text.Parsec.Error import Text.TeXMath (readMathML, writeTeX) -- | Convert HTML-formatted string to 'Pandoc' document. @@ -661,6 +662,7 @@ inline = pTagText <|> do "img" -> pImage "svg" -> pSvg "bdo" -> pBdo + "tt" -> pCode "code" -> pCode "samp" -> pCodeWithClass "samp" "sample" "var" -> pCodeWithClass "var" "variable" @@ -1026,7 +1028,7 @@ isCommentTag = tagComment (const True) -- | Matches a stretch of HTML in balanced tags. htmlInBalanced :: Monad m => (Tag Text -> Bool) - -> ParserT Sources st m Text + -> ParsecT Sources st m Text htmlInBalanced f = try $ do lookAhead (char '<') sources <- getInput @@ -1075,7 +1077,7 @@ hasTagWarning _ = False -- | Matches a tag meeting a certain condition. htmlTag :: (HasReaderOptions st, Monad m) => (Tag Text -> Bool) - -> ParserT Sources st m (Tag Text, Text) + -> ParsecT Sources st m (Tag Text, Text) htmlTag f = try $ do lookAhead (char '<') startpos <- getPosition diff --git a/src/Text/Pandoc/Readers/HTML/Types.hs b/src/Text/Pandoc/Readers/HTML/Types.hs index b70e81939b6a..cc26319a1c39 100644 --- a/src/Text/Pandoc/Readers/HTML/Types.hs +++ b/src/Text/Pandoc/Readers/HTML/Types.hs @@ -33,12 +33,12 @@ import Text.Pandoc.Options (ReaderOptions) import Text.Pandoc.Parsing ( HasIdentifierList (..), HasLastStrPosition (..), HasLogMessages (..) , HasMacros (..), HasQuoteContext (..), HasReaderOptions (..) - , ParserT, ParserState, QuoteContext (NoQuote) + , ParsecT, ParserState, QuoteContext (NoQuote) ) -import Text.Pandoc.Readers.LaTeX.Types (Macro) +import Text.Pandoc.TeX (Macro) -- | HTML parser type -type HTMLParser m s = ParserT s HTMLState (ReaderT HTMLLocal m) +type HTMLParser m s = ParsecT s HTMLState (ReaderT HTMLLocal m) -- | HTML parser, expecting @Tag Text@ as tokens. type TagParser m = HTMLParser m [Tag Text] diff --git a/src/Text/Pandoc/Readers/Ipynb.hs b/src/Text/Pandoc/Readers/Ipynb.hs index a82877242427..7385cc4ba5d0 100644 --- a/src/Text/Pandoc/Readers/Ipynb.hs +++ b/src/Text/Pandoc/Readers/Ipynb.hs @@ -81,13 +81,13 @@ cellToBlocks opts lang c = do Nothing -> mempty Just (MimeAttachments m) -> M.toList m let ident = fromMaybe mempty $ cellId c - mapM_ addAttachment attachments + mapM_ (addAttachment (cellId c)) attachments case cellType c of Ipynb.Markdown -> do bs <- if isEnabled Ext_raw_markdown opts then return [RawBlock (Format "markdown") source] else do - Pandoc _ bs <- walk fixImage <$> readMarkdown opts source + Pandoc _ bs <- walk (fixImage (cellId c)) <$> readMarkdown opts source return bs return $ B.divWith (ident,["cell","markdown"],kvs) $ B.fromList bs @@ -121,14 +121,17 @@ cellToBlocks opts lang c = do <> outputBlocks -- Remove attachment: prefix from images... -fixImage :: Inline -> Inline -fixImage (Image attr lab (src,tit)) - | "attachment:" `T.isPrefixOf` src = Image attr lab (T.drop 11 src, tit) -fixImage x = x - -addAttachment :: PandocMonad m => (Text, MimeBundle) -> m () -addAttachment (fname, mimeBundle) = do - let fp = T.unpack fname +fixImage :: Maybe Text -> Inline -> Inline +fixImage mbident (Image attr lab (src,tit)) + | "attachment:" `T.isPrefixOf` src = + let src' = T.drop 11 src + qualifiedSrc = maybe src' (<> ("-" <> src')) mbident + in Image attr lab (qualifiedSrc, tit) +fixImage _ x = x + +addAttachment :: PandocMonad m => Maybe Text -> (Text, MimeBundle) -> m () +addAttachment mbident (fname, mimeBundle) = do + let fp = T.unpack $ maybe fname (<> ("-" <> fname)) mbident case M.toList (unMimeBundle mimeBundle) of (mimeType, BinaryData bs):_ -> insertMedia fp (Just mimeType) (BL.fromStrict bs) diff --git a/src/Text/Pandoc/Readers/JATS.hs b/src/Text/Pandoc/Readers/JATS.hs index a86cc0e4cb5a..643c92242e66 100644 --- a/src/Text/Pandoc/Readers/JATS.hs +++ b/src/Text/Pandoc/Readers/JATS.hs @@ -14,7 +14,8 @@ Conversion of JATS XML to 'Pandoc' document. -} module Text.Pandoc.Readers.JATS ( readJATS ) where -import Control.Monad.State.Strict +import Control.Monad.State.Strict ( StateT(runStateT), gets, modify ) +import Control.Monad (forM_, when, unless, MonadPlus(mplus)) import Control.Monad.Except (throwError) import Text.Pandoc.Error (PandocError(..)) import Data.Char (isDigit, isSpace) @@ -26,7 +27,7 @@ import Data.Maybe (maybeToList, fromMaybe, catMaybes) import Data.Text (Text) import qualified Data.Text as T import qualified Data.Text.Lazy as TL -import Text.HTML.TagSoup.Entity (lookupEntity) +import Text.Pandoc.XML (lookupEntity) import Text.Pandoc.Builder import Text.Pandoc.Class.PandocMonad (PandocMonad) import Text.Pandoc.Options @@ -444,6 +445,7 @@ parseRef e = do "issue" -> Just . ("issue",) . toMetaValue <$> getInlines el "isbn" -> Just . ("ISBN",) . toMetaValue <$> getInlines el "issn" -> Just . ("ISSN",) . toMetaValue <$> getInlines el + "uri" -> Just . ("url",) . toMetaValue <$> getInlines el "fpage" -> case filterChild (named "lpage") c of Just lp -> Just . ("page",) . toMetaValue <$> @@ -494,8 +496,8 @@ elementToStr x = x parseInline :: PandocMonad m => Content -> JATS m Inlines parseInline (Text (CData _ s _)) = return $ text s -parseInline (CRef ref) = return $ maybe (text $ T.toUpper ref) (text . T.pack) - $ lookupEntity (T.unpack ref) +parseInline (CRef ref) = return $ maybe (text $ T.toUpper ref) text + $ lookupEntity ref parseInline (Elem e) = case qName (elName e) of "italic" -> innerInlines emph diff --git a/src/Text/Pandoc/Readers/Jira.hs b/src/Text/Pandoc/Readers/Jira.hs index 74918130d34e..0362704156be 100644 --- a/src/Text/Pandoc/Readers/Jira.hs +++ b/src/Text/Pandoc/Readers/Jira.hs @@ -12,8 +12,8 @@ Conversion of jira wiki formatted plain text to 'Pandoc' document. module Text.Pandoc.Readers.Jira ( readJira ) where import Control.Monad.Except (throwError) -import Data.Text (Text, append, pack, singleton, unpack) -import Text.HTML.TagSoup.Entity (lookupEntity) +import Data.Text (Text, append, pack, singleton) +import Text.Pandoc.XML (lookupEntity) import Text.Jira.Parser (parse) import Text.Pandoc.Class.PandocMonad (PandocMonad (..)) import Text.Pandoc.Builder hiding (cell) @@ -137,9 +137,9 @@ jiraToPandocInlines = \case Jira.Styled style inlns -> fromStyle style $ fromInlines inlns where fromInlines = foldMap jiraToPandocInlines - fromEntity e = case lookupEntity (unpack e ++ ";") of + fromEntity e = case lookupEntity (e <> ";") of Nothing -> "&" `append` e `append` ";" - Just cs -> pack cs + Just t ->t fromStyle = \case Jira.Emphasis -> emph diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index f3aa2949067b..cfe6d1299ba7 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -24,6 +24,7 @@ module Text.Pandoc.Readers.LaTeX ( readLaTeX, import Control.Applicative (many, optional, (<|>)) import Control.Monad import Control.Monad.Except (throwError) +import Data.Containers.ListUtils (nubOrd) import Data.Char (isDigit, isLetter, isAlphaNum, toUpper, chr) import Data.Default import Data.List (intercalate) @@ -40,14 +41,14 @@ import Text.Pandoc.Class (PandocPure, PandocMonad (..), getResourcePath, readFileFromDirs, report, setResourcePath, getZonedTime) import Data.Time (ZonedTime(..), LocalTime(..), showGregorian) -import Text.Pandoc.Error (PandocError (PandocParseError, PandocParsecError)) +import Text.Pandoc.Error (PandocError (PandocParseError)) import Text.Pandoc.Highlighting (languagesByExtension) import Text.Pandoc.ImageSize (numUnit, showFl) import Text.Pandoc.Logging import Text.Pandoc.Options import Text.Pandoc.Parsing hiding (blankline, many, mathDisplay, mathInline, optional, space, spaces, withRaw, (<|>)) -import Text.Pandoc.Readers.LaTeX.Types (Tok (..), TokType (..)) +import Text.Pandoc.TeX (Tok (..), TokType (..)) import Text.Pandoc.Readers.LaTeX.Parsing import Text.Pandoc.Readers.LaTeX.Citation (citationCommands, cites) import Text.Pandoc.Readers.LaTeX.Math (dollarsMath, inlineEnvironments, @@ -87,7 +88,7 @@ readLaTeX opts ltx = do (TokStream False (tokenizeSources sources)) case parsed of Right result -> return result - Left e -> throwError $ PandocParsecError sources e + Left e -> throwError $ fromParsecError sources e parseLaTeX :: PandocMonad m => LP m Pandoc parseLaTeX = do @@ -134,7 +135,7 @@ resolveRefs _ x = x rawLaTeXBlock :: (PandocMonad m, HasMacros s, HasReaderOptions s) - => ParserT Sources s m Text + => ParsecT Sources s m Text rawLaTeXBlock = do lookAhead (try (char '\\' >> letter)) toks <- getInputTokens @@ -165,7 +166,7 @@ beginOrEndCommand = try $ do (txt <> untokenize rawargs) rawLaTeXInline :: (PandocMonad m, HasMacros s, HasReaderOptions s) - => ParserT Sources s m Text + => ParsecT Sources s m Text rawLaTeXInline = do lookAhead (try (char '\\' >> letter)) toks <- getInputTokens @@ -179,7 +180,7 @@ rawLaTeXInline = do finalbraces <- mconcat <$> many (try (string "{}")) -- see #5439 return $ raw <> T.pack finalbraces -inlineCommand :: PandocMonad m => ParserT Sources ParserState m Inlines +inlineCommand :: PandocMonad m => ParsecT Sources ParserState m Inlines inlineCommand = do lookAhead (try (char '\\' >> letter)) toks <- getInputTokens @@ -301,7 +302,7 @@ inlineCommand' = try $ do else pure "" overlay <- option "" overlaySpecification let name' = name <> star <> overlay - let names = ordNub [name', name] -- check non-starred as fallback + let names = nubOrd [name', name] -- check non-starred as fallback let raw = do guard $ isInlineCommand name || not (isBlockCommand name) rawcommand <- getRawCommand name (cmd <> star) @@ -315,7 +316,7 @@ tok = tokWith inline unescapeURL :: Text -> Text unescapeURL = T.concat . go . T.splitOn "\\" where - isEscapable c = c `elemText` "#$%&~_^\\{}" + isEscapable c = T.any (== c) "#$%&~_^\\{}" go (x:xs) = x : map unescapeInterior xs go [] = [] unescapeInterior t @@ -669,7 +670,7 @@ opt = do (TokStream False toks) case parsed of Right result -> return result - Left e -> throwError $ PandocParsecError (toSources toks) e + Left e -> throwError $ fromParsecError (toSources toks) e -- block elements: @@ -755,8 +756,11 @@ readFileFromTexinputs fp = do case M.lookup (T.pack fp) fileContentsMap of Just t -> return (Just t) Nothing -> do - dirs <- map T.unpack . splitTextBy (==':') . fromMaybe "." - <$> lookupEnv "TEXINPUTS" + dirs <- map (\t -> if T.null t + then "." + else T.unpack t) + . T.split (==':') . fromMaybe "" + <$> lookupEnv "TEXINPUTS" readFileFromDirs dirs fp ensureExtension :: (FilePath -> Bool) -> FilePath -> FilePath -> FilePath @@ -840,7 +844,7 @@ blockCommand = try $ do guard $ name /= "begin" && name /= "end" && name /= "and" star <- option "" ("*" <$ symbol '*' <* sp) let name' = name <> star - let names = ordNub [name', name] + let names = nubOrd [name', name] let rawDefiniteBlock = do guard $ isBlockCommand name rawcontents <- getRawCommand name (txt <> star) @@ -1170,7 +1174,7 @@ addImageCaption = walkM go st <- getState case sCaption st of Nothing -> return p - Just figureCaption -> do + Just (Caption _mbshort bs) -> do let mblabel = sLastLabel st let attr' = case mblabel of Just lab -> (lab, cls, kvs) @@ -1185,7 +1189,8 @@ addImageCaption = walkM go [Str (renderDottedNum num)] (sLabels st) } return $ SimpleFigure attr' - (maybe id removeLabel mblabel (B.toList figureCaption)) + (maybe id removeLabel mblabel + (blocksToInlines bs)) (src, tit) go x = return x @@ -1324,4 +1329,3 @@ block = do blocks :: PandocMonad m => LP m Blocks blocks = mconcat <$> many block - diff --git a/src/Text/Pandoc/Readers/LaTeX/Citation.hs b/src/Text/Pandoc/Readers/LaTeX/Citation.hs index 90fa1bc16948..f26288c6ef80 100644 --- a/src/Text/Pandoc/Readers/LaTeX/Citation.hs +++ b/src/Text/Pandoc/Readers/LaTeX/Citation.hs @@ -15,7 +15,6 @@ import Control.Applicative ((<|>), optional, many) import Control.Monad (mzero) import Control.Monad.Trans (lift) import Control.Monad.Except (throwError) -import Text.Pandoc.Error (PandocError(PandocParsecError)) import Text.Pandoc.Parsing hiding (blankline, many, mathDisplay, mathInline, optional, space, spaces, withRaw, (<|>)) @@ -121,7 +120,7 @@ simpleCiteArgs inline = try $ do (TokStream False toks) case parsed of Right result -> return result - Left e -> throwError $ PandocParsecError (toSources toks) e + Left e -> throwError $ fromParsecError (toSources toks) e @@ -208,4 +207,3 @@ complexNatbibCitation inline mode = try $ do inNote :: Inlines -> Inlines inNote ils = note $ para $ ils <> str "." - diff --git a/src/Text/Pandoc/Readers/LaTeX/Inline.hs b/src/Text/Pandoc/Readers/LaTeX/Inline.hs index 53a3c1597c59..f8805268dd32 100644 --- a/src/Text/Pandoc/Readers/LaTeX/Inline.hs +++ b/src/Text/Pandoc/Readers/LaTeX/Inline.hs @@ -27,10 +27,11 @@ import Data.Text (Text) import qualified Data.Text as T import Text.Pandoc.Builder import Text.Pandoc.Shared (toRomanNumeral, safeRead) -import Text.Pandoc.Readers.LaTeX.Types (Tok (..), TokType (..)) +import Text.Pandoc.TeX (Tok (..), TokType (..)) import Control.Applicative (optional, (<|>)) import Control.Monad (guard, mzero, mplus, unless) -import Text.Pandoc.Class.PandocMonad (PandocMonad (..), translateTerm) +import Text.Pandoc.Class.PandocMonad (PandocMonad (..)) +import Text.Pandoc.Translations (translateTerm) import Text.Pandoc.Readers.LaTeX.Parsing import Text.Pandoc.Extensions (extensionEnabled, Extension(..)) import Text.Pandoc.Parsing (getOption, updateState, getState, notFollowedBy, @@ -394,5 +395,3 @@ doAcronymPlural form = do return . mconcat $ [spanWith ("",[],[("acronym-label", untokenize acro), ("acronym-form", "plural+" <> form)]) $ mconcat [str $ untokenize acro, plural]] - - diff --git a/src/Text/Pandoc/Readers/LaTeX/Lang.hs b/src/Text/Pandoc/Readers/LaTeX/Lang.hs index 3c24bbd0022f..b80fc9242e63 100644 --- a/src/Text/Pandoc/Readers/LaTeX/Lang.hs +++ b/src/Text/Pandoc/Readers/LaTeX/Lang.hs @@ -24,7 +24,8 @@ import Data.Text (Text) import qualified Data.Text as T import Text.Pandoc.Shared (extractSpaces) import Text.Collate.Lang (Lang(..), renderLang) -import Text.Pandoc.Class (PandocMonad(..), setTranslations) +import Text.Pandoc.Class (PandocMonad(..)) +import Text.Pandoc.Translations (setTranslations) import Text.Pandoc.Readers.LaTeX.Parsing import Text.Pandoc.Parsing (updateState, option, getState, QuoteContext(..), withQuoteContext) diff --git a/src/Text/Pandoc/Readers/LaTeX/Macro.hs b/src/Text/Pandoc/Readers/LaTeX/Macro.hs index 4756c5381d14..c640e2299081 100644 --- a/src/Text/Pandoc/Readers/LaTeX/Macro.hs +++ b/src/Text/Pandoc/Readers/LaTeX/Macro.hs @@ -6,7 +6,7 @@ where import Text.Pandoc.Extensions (Extension(..)) import Text.Pandoc.Logging (LogMessage(MacroAlreadyDefined)) import Text.Pandoc.Readers.LaTeX.Parsing -import Text.Pandoc.Readers.LaTeX.Types +import Text.Pandoc.TeX import Text.Pandoc.Class import Text.Pandoc.Shared (safeRead) import Text.Pandoc.Parsing hiding (blankline, mathDisplay, mathInline, diff --git a/src/Text/Pandoc/Readers/LaTeX/Math.hs b/src/Text/Pandoc/Readers/LaTeX/Math.hs index 92f41910b570..6eb57c178dc2 100644 --- a/src/Text/Pandoc/Readers/LaTeX/Math.hs +++ b/src/Text/Pandoc/Readers/LaTeX/Math.hs @@ -16,7 +16,7 @@ import Text.Pandoc.Walk (walk) import Text.Pandoc.Builder as B import qualified Data.Sequence as Seq import Text.Pandoc.Readers.LaTeX.Parsing -import Text.Pandoc.Readers.LaTeX.Types +import Text.Pandoc.TeX import Text.Pandoc.Class import Text.Pandoc.Shared (trimMath, stripTrailingNewlines) import Text.Pandoc.Parsing hiding (blankline, mathDisplay, mathInline, @@ -218,5 +218,3 @@ italicize x@(Para [Image{}]) = x -- see #6925 italicize (Para ils) = Para [Emph ils] italicize (Plain ils) = Plain [Emph ils] italicize x = x - - diff --git a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs index 7528e8623b4a..c4102fbbb87b 100644 --- a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs +++ b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs @@ -34,7 +34,6 @@ module Text.Pandoc.Readers.LaTeX.Parsing , getInputTokens , untokenize , untoken - , toksToString , satisfyTok , peekTok , parseFromToks @@ -118,11 +117,9 @@ import Text.Pandoc.Logging import Text.Pandoc.Options import Text.Pandoc.Parsing hiding (blankline, many, mathDisplay, mathInline, space, spaces, withRaw, (<|>)) -import Text.Pandoc.Readers.LaTeX.Types (ExpansionPoint (..), Macro (..), +import Text.Pandoc.TeX (ExpansionPoint (..), Macro (..), ArgSpec (..), Tok (..), TokType (..)) import Text.Pandoc.Shared -import Text.Parsec.Pos -import Text.Parsec (Stream(uncons)) import Text.Pandoc.Walk newtype DottedNum = DottedNum [Int] @@ -160,7 +157,7 @@ data LaTeXState = LaTeXState{ sOptions :: ReaderOptions , sLogMessages :: [LogMessage] , sIdentifiers :: Set.Set Text , sVerbatimMode :: Bool - , sCaption :: Maybe Inlines + , sCaption :: Maybe Caption , sInListItem :: Bool , sInTableCell :: Bool , sLastHeaderNum :: DottedNum @@ -264,7 +261,7 @@ instance Monad m => Stream TokStream m Tok where uncons (TokStream _ []) = return Nothing uncons (TokStream _ (t:ts)) = return $ Just (t, TokStream False ts) -type LP m = ParserT TokStream LaTeXState m +type LP m = ParsecT TokStream LaTeXState m withVerbatimMode :: PandocMonad m => LP m a -> LP m a withVerbatimMode parser = do @@ -279,7 +276,7 @@ withVerbatimMode parser = do rawLaTeXParser :: (PandocMonad m, HasMacros s, HasReaderOptions s, Show a) => [Tok] -> LP m a -> LP m a - -> ParserT Sources s m (a, Text) + -> ParsecT Sources s m (a, Text) rawLaTeXParser toks parser valParser = do pstate <- getState let lstate = def{ sOptions = extractReaderOptions pstate } @@ -319,7 +316,7 @@ rawLaTeXParser toks parser valParser = do return (val, result') applyMacros :: (PandocMonad m, HasMacros s, HasReaderOptions s) - => Text -> ParserT Sources s m Text + => Text -> ParsecT Sources s m Text applyMacros s = (guardDisabled Ext_latex_macros >> return s) <|> do let retokenize = untokenize <$> many anyTok pstate <- getState @@ -347,7 +344,7 @@ tokenizeSources = concatMap tokenizeSource . unSources -- Return tokens from input sources. Ensure that starting position is -- correct. -getInputTokens :: PandocMonad m => ParserT Sources s m [Tok] +getInputTokens :: PandocMonad m => ParsecT Sources s m [Tok] getInputTokens = do pos <- getPosition ss <- getInput @@ -485,9 +482,6 @@ untokenAccum (Tok _ _ t) accum = t <> accum untoken :: Tok -> Text untoken t = untokenAccum t mempty -toksToString :: [Tok] -> String -toksToString = T.unpack . untokenize - parseFromToks :: PandocMonad m => LP m a -> [Tok] -> LP m a parseFromToks parser toks = do oldInput <- getInput @@ -887,7 +881,7 @@ dimenarg = try $ do guard $ rest `elem` ["", "pt","pc","in","bp","cm","mm","dd","cc","sp"] return $ T.pack ['=' | ch] <> minus <> s -ignore :: (Monoid a, PandocMonad m) => Text -> ParserT s u m a +ignore :: (Monoid a, PandocMonad m) => Text -> ParsecT s u m a ignore raw = do pos <- getPosition report $ SkippedContent raw pos @@ -1078,10 +1072,11 @@ label = do setCaption :: PandocMonad m => LP m Inlines -> LP m () setCaption inline = try $ do - skipopts + mbshort <- Just . toList <$> bracketed inline <|> pure Nothing ils <- tokWith inline optional $ try $ spaces *> label - updateState $ \st -> st{ sCaption = Just ils } + updateState $ \st -> st{ sCaption = Just $ + Caption mbshort [Plain $ toList ils] } resetCaption :: PandocMonad m => LP m () resetCaption = updateState $ \st -> st{ sCaption = Nothing diff --git a/src/Text/Pandoc/Readers/LaTeX/SIunitx.hs b/src/Text/Pandoc/Readers/LaTeX/SIunitx.hs index 6c22a4ceb233..ac13f6f0cfbc 100644 --- a/src/Text/Pandoc/Readers/LaTeX/SIunitx.hs +++ b/src/Text/Pandoc/Readers/LaTeX/SIunitx.hs @@ -28,7 +28,7 @@ import Text.Pandoc.Readers.LaTeX.Parsing symbol, untokenize, LP ) -import Text.Pandoc.Readers.LaTeX.Types +import Text.Pandoc.TeX ( Tok(Tok), TokType(Word, CtrlSeq) ) import Text.Pandoc.Class.PandocMonad ( PandocMonad ) import Text.Pandoc.Parsing @@ -43,7 +43,7 @@ import Text.Pandoc.Parsing try, skipMany1, runParser, - Parser ) + Parsec ) import Control.Applicative ((<|>)) import Control.Monad (void) import qualified Data.Map as M @@ -122,7 +122,7 @@ doSIlist tok = do mconcat (intersperse (str "," <> space) (init xs)) <> text ", & " <> last xs -parseNum :: Parser Text () Inlines +parseNum :: Parsec Text () Inlines parseNum = (mconcat <$> many parseNumPart) <* eof minus :: Text @@ -132,7 +132,7 @@ hyphenToMinus :: Inline -> Inline hyphenToMinus (Str t) = Str (T.replace "-" minus t) hyphenToMinus x = x -parseNumPart :: Parser Text () Inlines +parseNumPart :: Parsec Text () Inlines parseNumPart = parseDecimalNum <|> parseComma <|> @@ -145,7 +145,7 @@ parseNumPart = where parseDecimalNum, parsePlusMinus, parsePM, parseComma, parseI, parseX, - parseExp, parseSpace :: Parser Text () Inlines + parseExp, parseSpace :: Parsec Text () Inlines parseDecimalNum = try $ do pref <- option mempty $ (mempty <$ char '+') <|> (minus <$ char '-') basenum' <- many1 (satisfy (\c -> isDigit c || c == '.')) @@ -469,5 +469,3 @@ siUnitMap = M.fromList , ("watt", str "W") , ("weber", str "Wb") ] - - diff --git a/src/Text/Pandoc/Readers/LaTeX/Table.hs b/src/Text/Pandoc/Readers/LaTeX/Table.hs index 6297737818a6..718ff2dbe9cf 100644 --- a/src/Text/Pandoc/Readers/LaTeX/Table.hs +++ b/src/Text/Pandoc/Readers/LaTeX/Table.hs @@ -7,7 +7,7 @@ where import Data.Functor (($>)) import Text.Pandoc.Class import Text.Pandoc.Readers.LaTeX.Parsing -import Text.Pandoc.Readers.LaTeX.Types +import Text.Pandoc.TeX import Text.Pandoc.Builder as B import qualified Data.Map as M import Data.Text (Text) @@ -355,18 +355,18 @@ addTableCaption :: PandocMonad m => Blocks -> LP m Blocks addTableCaption = walkM go where go (Table attr c spec th tb tf) = do st <- getState + let capt = fromMaybe c $ sCaption st let mblabel = sLastLabel st - capt <- case (sCaption st, mblabel) of - (Just ils, Nothing) -> return $ caption Nothing (plain ils) - (Just ils, Just lab) -> do + case mblabel of + Nothing -> return () + Just lab -> do num <- getNextNumber sLastTableNum setState st{ sLastTableNum = num , sLabels = M.insert lab [Str (renderDottedNum num)] (sLabels st) } - return $ caption Nothing (plain ils) -- add number?? - (Nothing, _) -> return c + -- add num to caption? let attr' = case (attr, mblabel) of ((_,classes,kvs), Just ident) -> (ident,classes,kvs) diff --git a/src/Text/Pandoc/Readers/Man.hs b/src/Text/Pandoc/Readers/Man.hs index 1141af66f6a3..0f88b77b2944 100644 --- a/src/Text/Pandoc/Readers/Man.hs +++ b/src/Text/Pandoc/Readers/Man.hs @@ -16,7 +16,7 @@ module Text.Pandoc.Readers.Man (readMan) where import Data.Char (toLower) import Data.Default (Default) -import Control.Monad (liftM, mzero, guard, void) +import Control.Monad (mzero, guard, void) import Control.Monad.Trans (lift) import Control.Monad.Except (throwError) import Data.Maybe (catMaybes, isJust) @@ -24,15 +24,12 @@ import Data.List (intersperse) import qualified Data.Text as T import Text.Pandoc.Builder as B import Text.Pandoc.Class.PandocMonad (PandocMonad(..), report) -import Text.Pandoc.Error (PandocError (PandocParsecError)) import Text.Pandoc.Logging (LogMessage(..)) import Text.Pandoc.Options import Text.Pandoc.Parsing import Text.Pandoc.Walk (query) -import Text.Pandoc.Shared (mapLeft) import Text.Pandoc.Readers.Roff -- TODO explicit imports -import qualified Text.Parsec as Parsec -import Text.Parsec.Pos (updatePosString) +import qualified Text.Pandoc.Parsing as P import qualified Data.Foldable as Foldable data ManState = ManState { readerOptions :: ReaderOptions @@ -45,7 +42,7 @@ instance Default ManState where , metadata = nullMeta , tableCellsPlain = True } -type ManParser m = ParserT [RoffToken] ManState m +type ManParser m = P.ParsecT [RoffToken] ManState m -- | Read man (troff) from an input string and return a Pandoc document. @@ -57,21 +54,18 @@ readMan opts s = do let Sources inps = toSources s tokenz <- mconcat <$> mapM (uncurry lexRoff) inps let state = def {readerOptions = opts} :: ManState - let fixError (PandocParsecError _ e) = PandocParsecError (Sources inps) e - fixError e = e eitherdoc <- readWithMTokens parseMan state (Foldable.toList . unRoffTokens $ tokenz) - either (throwError . fixError) return eitherdoc + either (throwError . fromParsecError (Sources inps)) return eitherdoc readWithMTokens :: PandocMonad m - => ParserT [RoffToken] ManState m a -- ^ parser + => ParsecT [RoffToken] ManState m a -- ^ parser -> ManState -- ^ initial state -> [RoffToken] -- ^ input - -> m (Either PandocError a) + -> m (Either ParseError a) readWithMTokens parser state input = - let leftF = PandocParsecError mempty - in mapLeft leftF `liftM` runParserT parser state "source" input + runParserT parser state "source" input parseMan :: PandocMonad m => ManParser m Pandoc @@ -180,14 +174,16 @@ parseNewParagraph = do -- Parser: [RoffToken] -> Pandoc -- -msatisfy :: Monad m => (RoffToken -> Bool) -> ParserT [RoffToken] st m RoffToken -msatisfy predic = tokenPrim show nextPos testTok +msatisfy :: Monad m + => (RoffToken -> Bool) -> P.ParsecT [RoffToken] st m RoffToken +msatisfy predic = P.tokenPrim show nextPos testTok where testTok t = if predic t then Just t else Nothing nextPos _pos _x (ControlLine _ _ pos':_) = pos' - nextPos pos _x _xs = updatePosString - (setSourceColumn - (setSourceLine pos $ sourceLine pos + 1) 1) "" + nextPos pos _x _xs = P.updatePosString + (P.setSourceColumn + (P.setSourceLine pos $ + P.sourceLine pos + 1) 1) "" mtoken :: PandocMonad m => ManParser m RoffToken mtoken = msatisfy (const True) @@ -431,7 +427,7 @@ listItem mbListType = try $ do (arg1 : _) -> do let cs = linePartsToText arg1 let cs' = if not (T.any (== '.') cs || T.any (== ')') cs) then cs <> "." else cs - let lt = case Parsec.runParser anyOrderedListMarker defaultParserState + let lt = case P.runParser anyOrderedListMarker defaultParserState "list marker" cs' of Right (start, listtype, listdelim) | cs == cs' -> Ordered (start, listtype, listdelim) diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index 1e12a23144db..42df8b9859b1 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -51,11 +51,12 @@ import Text.Pandoc.Readers.HTML (htmlInBalanced, htmlTag, isBlockTag, isCommentTag, isInlineTag, isTextTag) import Text.Pandoc.Readers.LaTeX (applyMacros, rawLaTeXBlock, rawLaTeXInline) import Text.Pandoc.Shared +import Text.Pandoc.URI (escapeURI, isURI) import Text.Pandoc.XML (fromEntities) import Text.Pandoc.Readers.Metadata (yamlBsToMeta, yamlBsToRefs, yamlMetaBlock) -- import Debug.Trace (traceShowId) -type MarkdownParser m = ParserT Sources ParserState m +type MarkdownParser m = ParsecT Sources ParserState m type F = Future ParserState @@ -157,14 +158,14 @@ inList = do ctx <- stateParserContext <$> getState guard (ctx == ListItemState) -spnl :: PandocMonad m => ParserT Sources st m () +spnl :: PandocMonad m => ParsecT Sources st m () spnl = try $ do skipSpaces optional newline skipSpaces notFollowedBy (char '\n') -spnl' :: PandocMonad m => ParserT Sources st m Text +spnl' :: PandocMonad m => ParsecT Sources st m Text spnl' = try $ do xs <- many spaceChar ys <- option "" $ try $ (:) <$> newline @@ -188,11 +189,11 @@ skipNonindentSpaces = do tabStop <- getOption readerTabStop gobbleAtMostSpaces (tabStop - 1) <* notFollowedBy spaceChar -litChar :: PandocMonad m => MarkdownParser m Char -litChar = escapedChar' +litChar :: PandocMonad m => MarkdownParser m Text +litChar = T.singleton <$> escapedChar' <|> characterReference - <|> noneOf "\n" - <|> try (newline >> notFollowedBy blankline >> return ' ') + <|> T.singleton <$> noneOf "\n" + <|> try (newline >> notFollowedBy blankline >> return " ") -- | Parse a sequence of inline elements between square brackets, -- including inlines between balanced pairs of square brackets. @@ -355,8 +356,9 @@ referenceKey = try $ do notFollowedBy' $ guardEnabled Ext_mmd_link_attributes >> try (spnl <* keyValAttr) notFollowedBy' (() <$ reference) - many1Char $ notFollowedBy space >> litChar - let betweenAngles = try $ char '<' >> manyTillChar litChar (char '>') + mconcat <$> many1 (notFollowedBy space *> litChar) + let betweenAngles = try $ char '<' >> + mconcat <$> (manyTill litChar (char '>')) rebase <- option False (True <$ guardEnabled Ext_rebase_relative_paths) src <- (if rebase then rebasePath pos else id) <$> (try betweenAngles <|> sourceURL) @@ -394,7 +396,7 @@ quotedTitle c = try $ do char c notFollowedBy spaces let pEnder = try $ char c >> notFollowedBy (satisfy isAlphaNum) - let regChunk = many1Char (noneOf ['\\','\n','&',c]) <|> countChar 1 litChar + let regChunk = many1Char (noneOf ['\\','\n','&',c]) <|> litChar let nestedChunk = (\x -> T.singleton c <> x <> T.singleton c) <$> quotedTitle c T.unwords . T.words . T.concat <$> manyTill (nestedChunk <|> regChunk) pEnder @@ -582,15 +584,16 @@ registerImplicitHeader raw attr@(ident, _, _) | T.null raw = return () | otherwise = do let key = toKey $ "[" <> raw <> "]" - updateState $ \s -> - s { stateHeaderKeys = M.insert key (("#" <> ident,""), attr) + updateState $ \s -> -- don't override existing headers + s { stateHeaderKeys = M.insertWith (\_new old -> old) + key (("#" <> ident,""), attr) (stateHeaderKeys s) } -- -- hrule block -- -hrule :: PandocMonad m => ParserT Sources st m (F Blocks) +hrule :: PandocMonad m => ParsecT Sources st m (F Blocks) hrule = try $ do skipSpaces start <- satisfy isHruleChar @@ -610,7 +613,7 @@ indentedLine = indentSpaces >> anyLineNewline blockDelimiter :: PandocMonad m => (Char -> Bool) -> Maybe Int - -> ParserT Sources ParserState m Int + -> ParsecT Sources ParserState m Int blockDelimiter f len = try $ do skipNonindentSpaces c <- lookAhead (satisfy f) @@ -651,8 +654,8 @@ keyValAttr :: PandocMonad m => MarkdownParser m (Attr -> Attr) keyValAttr = try $ do key <- identifier char '=' - val <- T.pack <$> enclosed (char '"') (char '"') litChar - <|> T.pack <$> enclosed (char '\'') (char '\'') litChar + val <- mconcat <$> enclosed (char '"') (char '"') litChar + <|> mconcat <$> enclosed (char '\'') (char '\'') litChar <|> ("" <$ try (string "\"\"")) <|> ("" <$ try (string "''")) <|> manyChar (escapedChar' <|> noneOf " \t\n\r}") @@ -688,9 +691,13 @@ codeBlockFenced = try $ do rawattr <- (Left <$> (guardEnabled Ext_raw_attribute >> try rawAttribute)) <|> - (Right <$> option ("",[],[]) - ((guardEnabled Ext_fenced_code_attributes >> try attributes) - <|> ((\x -> ("",[toLanguageId x],[])) <$> many1Char nonspaceChar))) + (Right <$> (do + languageId <- option Nothing (Just . toLanguageId <$> try (many1Char $ satisfy (\x -> x `notElem` ['`', '{', '}'] && not (isSpace x)))) + skipMany spaceChar + maybeAttr <- option Nothing (Just <$> (guardEnabled Ext_fenced_code_attributes >> try attributes)) + return $ case maybeAttr of + Nothing -> ("", maybeToList languageId, []) + Just (elementId, classes, attrs) -> (elementId, maybe classes (: classes) languageId, attrs))) blankline contents <- T.intercalate "\n" <$> manyTill (gobbleAtMostSpaces indentLevel >> anyLine) @@ -754,7 +761,7 @@ lhsCodeBlockBirdWith c = try $ do blanklines return $ T.intercalate "\n" lns' -birdTrackLine :: PandocMonad m => Char -> ParserT Sources st m Text +birdTrackLine :: PandocMonad m => Char -> ParsecT Sources st m Text birdTrackLine c = try $ do char c -- allow html tags on left margin: @@ -1199,7 +1206,7 @@ lineBlock = do -- and the length including trailing space. dashedLine :: PandocMonad m => Char - -> ParserT Sources st m (Int, Int) + -> ParsecT Sources st m (Int, Int) dashedLine ch = do dashes <- many1 (char ch) sp <- many spaceChar @@ -1295,7 +1302,8 @@ tableCaption = do guardEnabled Ext_table_captions try $ do skipNonindentSpaces - (string ":" <* notFollowedBy (satisfy isPunctuation)) <|> string "Table:" + (string ":" <* notFollowedBy (satisfy isPunctuation)) <|> + (oneOf ['T','t'] >> string "able:") trimInlinesF <$> inlines1 <* blanklines -- Parse a simple table with '---' header and one line per row. @@ -1428,7 +1436,7 @@ pipeTableCell = return $ B.plain <$> result) <|> return mempty -pipeTableHeaderPart :: PandocMonad m => ParserT Sources st m (Alignment, Int) +pipeTableHeaderPart :: PandocMonad m => ParsecT Sources st m (Alignment, Int) pipeTableHeaderPart = try $ do skipMany spaceChar left <- optionMaybe (char ':') @@ -1444,7 +1452,7 @@ pipeTableHeaderPart = try $ do (Just _,Just _) -> AlignCenter, len) -- Succeed only if current line contains a pipe. -scanForPipe :: PandocMonad m => ParserT Sources st m () +scanForPipe :: PandocMonad m => ParsecT Sources st m () scanForPipe = do Sources inps <- getInput let ln = case inps of @@ -1576,7 +1584,7 @@ symbol = do <|> try (do lookAhead $ char '\\' notFollowedBy' (() <$ rawTeXBlock) char '\\') - return $ return $ B.str $ T.singleton result + return $ return $ B.str $! T.singleton result -- parses inline code, between n `s and n `s code :: PandocMonad m => MarkdownParser m (F Inlines) @@ -1601,8 +1609,8 @@ code = try $ do (guardEnabled Ext_inline_code_attributes >> try attributes)) return $ return $ case rawattr of - Left syn -> B.rawInline syn result - Right attr -> B.codeWith attr result + Left syn -> B.rawInline syn $! result + Right attr -> B.codeWith attr $! result math :: PandocMonad m => MarkdownParser m (F Inlines) math = (return . B.displayMath <$> (mathDisplay >>= applyMacros)) @@ -1721,7 +1729,7 @@ whitespace = spaceChar >> return <$> (lb <|> regsp) "whitespace" where lb = spaceChar >> skipMany spaceChar >> option B.space (endline >> return B.linebreak) regsp = skipMany spaceChar >> return B.space -nonEndline :: PandocMonad m => ParserT Sources st m Char +nonEndline :: PandocMonad m => ParsecT Sources st m Char nonEndline = satisfy (/='\n') str :: PandocMonad m => MarkdownParser m (F Inlines) @@ -1790,11 +1798,11 @@ source = do skipSpaces let urlChunk = try parenthesizedChars - <|> (notFollowedBy (oneOf " )") >> countChar 1 litChar) + <|> (notFollowedBy (oneOf " )") >> litChar) <|> try (many1Char spaceChar <* notFollowedBy (oneOf "\"')")) let sourceURL = T.unwords . T.words . T.concat <$> many urlChunk let betweenAngles = try $ - char '<' >> manyTillChar litChar (char '>') + char '<' >> mconcat <$> (manyTill litChar (char '>')) src <- try betweenAngles <|> sourceURL tit <- option "" $ try $ spnl >> linkTitle skipSpaces @@ -2005,7 +2013,7 @@ rawLaTeXInline' = do s <- rawLaTeXInline return $ return $ B.rawInline "tex" s -- "tex" because it might be context -rawConTeXtEnvironment :: PandocMonad m => ParserT Sources st m Text +rawConTeXtEnvironment :: PandocMonad m => ParsecT Sources st m Text rawConTeXtEnvironment = try $ do string "\\start" completion <- inBrackets (letter <|> digit <|> spaceChar) @@ -2014,7 +2022,7 @@ rawConTeXtEnvironment = try $ do (try $ string "\\stop" >> textStr completion) return $ "\\start" <> completion <> T.concat contents <> "\\stop" <> completion -inBrackets :: PandocMonad m => ParserT Sources st m Char -> ParserT Sources st m Text +inBrackets :: PandocMonad m => ParsecT Sources st m Char -> ParsecT Sources st m Text inBrackets parser = do char '[' contents <- manyChar parser diff --git a/src/Text/Pandoc/Readers/MediaWiki.hs b/src/Text/Pandoc/Readers/MediaWiki.hs index 0fdfa8b84a69..e7fae679c513 100644 --- a/src/Text/Pandoc/Readers/MediaWiki.hs +++ b/src/Text/Pandoc/Readers/MediaWiki.hs @@ -34,8 +34,8 @@ import Text.Pandoc.Class.PandocMonad (PandocMonad (..)) import Text.Pandoc.Definition import Text.Pandoc.Logging import Text.Pandoc.Options -import Text.Pandoc.Parsing hiding (nested, tableCaption) -import Text.Pandoc.Readers.HTML (htmlTag, isBlockTag, isCommentTag) +import Text.Pandoc.Parsing hiding (tableCaption) +import Text.Pandoc.Readers.HTML (htmlTag, isBlockTag, isCommentTag, toAttr) import Text.Pandoc.Shared (safeRead, stringify, stripTrailingNewlines, trim, splitTextBy, tshow, formatCode) import Text.Pandoc.XML (fromEntities) @@ -69,7 +69,7 @@ data MWState = MWState { mwOptions :: ReaderOptions , mwInTT :: Bool } -type MWParser m = ParserT Sources MWState m +type MWParser m = ParsecT Sources MWState m instance HasReaderOptions MWState where extractReaderOptions = mwOptions @@ -86,17 +86,6 @@ instance HasLogMessages MWState where -- auxiliary functions -- --- This is used to prevent exponential blowups for things like: --- ''a'''a''a'''a''a'''a''a'''a -nested :: PandocMonad m => MWParser m a -> MWParser m a -nested p = do - nestlevel <- mwMaxNestingLevel `fmap` getState - guard $ nestlevel > 0 - updateState $ \st -> st{ mwMaxNestingLevel = mwMaxNestingLevel st - 1 } - res <- p - updateState $ \st -> st{ mwMaxNestingLevel = nestlevel } - return res - specialChars :: [Char] specialChars = "'[]<=&*{}|\":\\" @@ -222,22 +211,21 @@ table = do optional rowsep hasheader <- option False $ True <$ lookAhead (skipSpaces *> char '!') (cellspecs',hdr) <- unzip <$> tableRow - let widths = map ((tableWidth *) . snd) cellspecs' + let widths = map (tableWidth *) cellspecs' let restwidth = tableWidth - sum widths let zerocols = length $ filter (==0.0) widths let defaultwidth = if zerocols == 0 || zerocols == length widths then ColWidthDefault else ColWidth $ restwidth / fromIntegral zerocols let widths' = map (\w -> if w > 0 then ColWidth w else defaultwidth) widths - let cellspecs = zip (map fst cellspecs') widths' + let cellspecs = zip (calculateAlignments hdr) widths' rows' <- many $ try $ rowsep *> (map snd <$> tableRow) optional blanklines tableEnd - let cols = length hdr let (headers,rows) = if hasheader then (hdr, rows') - else (replicate cols mempty, hdr:rows') - let toRow = Row nullAttr . map B.simpleCell + else ([], hdr:rows') + let toRow = Row nullAttr toHeaderRow l = [toRow l | not (null l)] return $ B.table (B.simpleCaption $ B.plain caption) cellspecs @@ -245,6 +233,12 @@ table = do [TableBody nullAttr 0 [] $ map toRow rows] (TableFoot nullAttr []) +calculateAlignments :: [Cell] -> [Alignment] +calculateAlignments = map cellAligns + where + cellAligns :: Cell -> Alignment + cellAligns (Cell _ align _ _ _) = align + parseAttrs :: PandocMonad m => MWParser m [(Text,Text)] parseAttrs = many1 parseAttr @@ -252,7 +246,9 @@ parseAttr :: PandocMonad m => MWParser m (Text, Text) parseAttr = try $ do skipMany spaceChar k <- many1Char letter + skipMany spaceChar char '=' + skipMany spaceChar v <- (char '"' >> many1TillChar (satisfy (/='\n')) (char '"')) <|> many1Char (satisfy $ \c -> not (isSpace c) && c /= '|') return (k,v) @@ -289,6 +285,7 @@ cellsep = try $ do tableCaption :: PandocMonad m => MWParser m Inlines tableCaption = try $ do + optional rowsep guardColumnOne skipSpaces sym "|+" @@ -296,14 +293,14 @@ tableCaption = try $ do trimInlines . mconcat <$> many (notFollowedBy (cellsep <|> rowsep) *> inline) -tableRow :: PandocMonad m => MWParser m [((Alignment, Double), Blocks)] +tableRow :: PandocMonad m => MWParser m [(Double, Cell)] tableRow = try $ skipMany htmlComment *> many tableCell -tableCell :: PandocMonad m => MWParser m ((Alignment, Double), Blocks) +tableCell :: PandocMonad m => MWParser m (Double, Cell) tableCell = try $ do cellsep skipMany spaceChar - attrs <- option [] $ try $ parseAttrs <* skipSpaces <* char '|' <* + attribs <- option [] $ try $ parseAttrs <* skipSpaces <* char '|' <* notFollowedBy (char '|') skipMany spaceChar pos' <- getPosition @@ -311,15 +308,23 @@ tableCell = try $ do ((snd <$> withRaw table) <|> countChar 1 anyChar)) bs <- parseFromString (do setPosition pos' mconcat <$> many block) ls - let align = case lookup "align" attrs of + let align = case lookup "align" attribs of Just "left" -> AlignLeft Just "right" -> AlignRight Just "center" -> AlignCenter _ -> AlignDefault - let width = case lookup "width" attrs of + let width = case lookup "width" attribs of Just xs -> fromMaybe 0.0 $ parseWidth xs Nothing -> 0.0 - return ((align, width), bs) + let rowspan = RowSpan . fromMaybe 1 $ + safeRead =<< lookup "rowspan" attribs + let colspan = ColSpan . fromMaybe 1 $ + safeRead =<< lookup "colspan" attribs + let handledAttribs = ["align", "colspan", "rowspan"] + attribs' = [ (k, v) | (k, v) <- attribs + , k `notElem` handledAttribs + ] + return (width, B.cellWith (toAttr attribs') align rowspan colspan bs) parseWidth :: Text -> Maybe Double parseWidth s = @@ -690,12 +695,12 @@ inlinesBetween start end = trimInlines . mconcat <$> try (start >> many1Till inline end) emph :: PandocMonad m => MWParser m Inlines -emph = B.emph <$> nested (inlinesBetween start end) +emph = B.emph <$> inlinesBetween start end where start = sym "''" end = try $ notFollowedBy' (() <$ strong) >> sym "''" strong :: PandocMonad m => MWParser m Inlines -strong = B.strong <$> nested (inlinesBetween start end) +strong = B.strong <$> inlinesBetween start end where start = sym "'''" end = sym "'''" @@ -704,6 +709,6 @@ doubleQuotes = do guardEnabled Ext_smart inTT <- mwInTT <$> getState guard (not inTT) - B.doubleQuoted <$> nested (inlinesBetween openDoubleQuote closeDoubleQuote) + B.doubleQuoted <$> inlinesBetween openDoubleQuote closeDoubleQuote where openDoubleQuote = sym "\"" >> lookAhead nonspaceChar closeDoubleQuote = try $ sym "\"" diff --git a/src/Text/Pandoc/Readers/Metadata.hs b/src/Text/Pandoc/Readers/Metadata.hs index 15f981c2533e..209c3ec6f5f7 100644 --- a/src/Text/Pandoc/Readers/Metadata.hs +++ b/src/Text/Pandoc/Readers/Metadata.hs @@ -35,9 +35,9 @@ import Text.Pandoc.Parsing hiding (tableWith, parse) import qualified Text.Pandoc.UTF8 as UTF8 yamlBsToMeta :: (PandocMonad m, HasLastStrPosition st) - => ParserT Sources st m (Future st MetaValue) + => ParsecT Sources st m (Future st MetaValue) -> B.ByteString - -> ParserT Sources st m (Future st Meta) + -> ParsecT Sources st m (Future st Meta) yamlBsToMeta pMetaValue bstr = do case Yaml.decodeAllEither' bstr of Right (Object o:_) -> fmap Meta <$> yamlMap pMetaValue o @@ -50,10 +50,10 @@ yamlBsToMeta pMetaValue bstr = do -- Returns filtered list of references. yamlBsToRefs :: (PandocMonad m, HasLastStrPosition st) - => ParserT Sources st m (Future st MetaValue) + => ParsecT Sources st m (Future st MetaValue) -> (Text -> Bool) -- ^ Filter for id -> B.ByteString - -> ParserT Sources st m (Future st [MetaValue]) + -> ParsecT Sources st m (Future st [MetaValue]) yamlBsToRefs pMetaValue idpred bstr = case Yaml.decodeAllEither' bstr of Right (Object m : _) -> do @@ -74,27 +74,31 @@ yamlBsToRefs pMetaValue idpred bstr = $ T.pack $ Yaml.prettyPrintParseException err' normalizeMetaValue :: (PandocMonad m, HasLastStrPosition st) - => ParserT Sources st m (Future st MetaValue) + => ParsecT Sources st m (Future st MetaValue) -> Text - -> ParserT Sources st m (Future st MetaValue) + -> ParsecT Sources st m (Future st MetaValue) normalizeMetaValue pMetaValue x = -- Note: a standard quoted or unquoted YAML value will -- not end in a newline, but a "block" set off with -- `|` or `>` will. if "\n" `T.isSuffixOf` (T.dropWhileEnd isSpaceChar x) -- see #6823 - then parseFromString' pMetaValue (x <> "\n") - else parseFromString' asInlines x + then parseFromString' pMetaValue (x <> "\n\n") + else parseFromString' asInlines (T.dropWhile isSpaceOrNlChar x) + -- see #8358 where asInlines = fmap b2i <$> pMetaValue b2i (MetaBlocks bs) = MetaInlines (blocksToInlines bs) b2i y = y isSpaceChar ' ' = True isSpaceChar '\t' = True isSpaceChar _ = False + isSpaceOrNlChar '\r' = True + isSpaceOrNlChar '\n' = True + isSpaceOrNlChar c = isSpaceChar c yamlToMetaValue :: (PandocMonad m, HasLastStrPosition st) - => ParserT Sources st m (Future st MetaValue) + => ParsecT Sources st m (Future st MetaValue) -> Value - -> ParserT Sources st m (Future st MetaValue) + -> ParsecT Sources st m (Future st MetaValue) yamlToMetaValue pMetaValue v = case v of String t -> normalizeMetaValue pMetaValue t @@ -112,9 +116,9 @@ yamlToMetaValue pMetaValue v = Object o -> fmap MetaMap <$> yamlMap pMetaValue o yamlMap :: (PandocMonad m, HasLastStrPosition st) - => ParserT Sources st m (Future st MetaValue) + => ParsecT Sources st m (Future st MetaValue) -> Object - -> ParserT Sources st m (Future st (M.Map Text MetaValue)) + -> ParsecT Sources st m (Future st (M.Map Text MetaValue)) yamlMap pMetaValue o = do case fromJSON (Object o) of Error err' -> throwError $ PandocParseError $ T.pack err' @@ -131,8 +135,8 @@ yamlMap pMetaValue o = do -- | Parse a YAML metadata block using the supplied 'MetaValue' parser. yamlMetaBlock :: (HasLastStrPosition st, PandocMonad m) - => ParserT Sources st m (Future st MetaValue) - -> ParserT Sources st m (Future st Meta) + => ParsecT Sources st m (Future st MetaValue) + -> ParsecT Sources st m (Future st Meta) yamlMetaBlock parser = try $ do string "---" blankline @@ -143,5 +147,5 @@ yamlMetaBlock parser = try $ do optional blanklines yamlBsToMeta parser $ UTF8.fromText rawYaml -stopLine :: Monad m => ParserT Sources st m () +stopLine :: Monad m => ParsecT Sources st m () stopLine = try $ (string "---" <|> string "...") >> blankline >> return () diff --git a/src/Text/Pandoc/Readers/Muse.hs b/src/Text/Pandoc/Readers/Muse.hs index a0d4534f1d75..c185897284ff 100644 --- a/src/Text/Pandoc/Readers/Muse.hs +++ b/src/Text/Pandoc/Readers/Muse.hs @@ -23,7 +23,7 @@ import Control.Monad.Reader import Control.Monad.Except (throwError) import Data.Bifunctor import Data.Default -import Data.List (transpose, uncons) +import Data.List (transpose) import qualified Data.Map as M import qualified Data.Set as Set import Data.Maybe (fromMaybe, isNothing, maybeToList) @@ -33,7 +33,6 @@ import Text.Pandoc.Builder (Blocks, Inlines, underline) import qualified Text.Pandoc.Builder as B import Text.Pandoc.Class.PandocMonad (PandocMonad (..)) import Text.Pandoc.Definition -import Text.Pandoc.Error (PandocError (PandocParsecError)) import Text.Pandoc.Logging import Text.Pandoc.Options import Text.Pandoc.Parsing @@ -49,7 +48,7 @@ readMuse opts s = do res <- flip runReaderT def $ runParserT parseMuse def{ museOptions = opts } (initialSourceName sources) sources case res of - Left e -> throwError $ PandocParsecError sources e + Left e -> throwError $ fromParsecError sources e Right d -> return d type F = Future MuseState @@ -83,7 +82,7 @@ instance Default MuseEnv where , museInPara = False } -type MuseParser m = ParserT Sources MuseState (ReaderT MuseEnv m) +type MuseParser m = ParsecT Sources MuseState (ReaderT MuseEnv m) instance HasReaderOptions MuseState where extractReaderOptions = museOptions @@ -156,7 +155,7 @@ firstColumn = getPosition >>= \pos -> guard (sourceColumn pos == 1) -- * Parsers -- | Parse end-of-line, which can be either a newline or end-of-file. -eol :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m () +eol :: (Stream s m Char, UpdateSourcePos s Char) => ParsecT s st m () eol = void newline <|> eof getIndent :: PandocMonad m @@ -652,7 +651,10 @@ museToPandocTable (MuseTable caption headers body footers) = [TableBody nullAttr 0 [] $ map toRow $ rows ++ body ++ footers] (TableFoot nullAttr []) where attrs = (AlignDefault, ColWidthDefault) <$ transpose (headers ++ body ++ footers) - (headRow, rows) = fromMaybe ([], []) $ uncons headers + (headRow, rows) = + case headers of + (r:rs) -> (r, rs) + [] -> ([], []) toRow = Row nullAttr . map B.simpleCell toHeaderRow l = [toRow l | not (null l)] diff --git a/src/Text/Pandoc/Readers/Odt.hs b/src/Text/Pandoc/Readers/ODT.hs similarity index 75% rename from src/Text/Pandoc/Readers/Odt.hs rename to src/Text/Pandoc/Readers/ODT.hs index c274b6fd492a..4139c35be1db 100644 --- a/src/Text/Pandoc/Readers/Odt.hs +++ b/src/Text/Pandoc/Readers/ODT.hs @@ -1,6 +1,6 @@ {-# LANGUAGE OverloadedStrings #-} {- | - Module : Text.Pandoc.Reader.Odt + Module : Text.Pandoc.Reader.ODT Copyright : Copyright (C) 2015 Martin Linnemann License : GNU GPL, version 2 or above @@ -11,7 +11,7 @@ Entry point to the odt reader. -} -module Text.Pandoc.Readers.Odt ( readOdt ) where +module Text.Pandoc.Readers.ODT ( readODT ) where import Codec.Archive.Zip import Text.Pandoc.XML.Light @@ -32,41 +32,41 @@ import Text.Pandoc.MediaBag import Text.Pandoc.Options import qualified Text.Pandoc.UTF8 as UTF8 -import Text.Pandoc.Readers.Odt.ContentReader -import Text.Pandoc.Readers.Odt.StyleReader +import Text.Pandoc.Readers.ODT.ContentReader +import Text.Pandoc.Readers.ODT.StyleReader -import Text.Pandoc.Readers.Odt.Generic.Fallible -import Text.Pandoc.Readers.Odt.Generic.XMLConverter +import Text.Pandoc.Readers.ODT.Generic.Fallible +import Text.Pandoc.Readers.ODT.Generic.XMLConverter import Text.Pandoc.Shared (filteredFilesFromArchive) -readOdt :: PandocMonad m +readODT :: PandocMonad m => ReaderOptions -> B.ByteString -> m Pandoc -readOdt opts bytes = case readOdt' opts bytes of +readODT opts bytes = case readODT' opts bytes of Right (doc, mb) -> do P.setMediaBag mb return doc Left e -> throwError e -- -readOdt' :: ReaderOptions +readODT' :: ReaderOptions -> B.ByteString -> Either PandocError (Pandoc, MediaBag) -readOdt' _ bytes = bytesToOdt bytes-- of +readODT' _ bytes = bytesToODT bytes-- of -- Right (pandoc, mediaBag) -> Right (pandoc , mediaBag) -- Left err -> Left err -- -bytesToOdt :: B.ByteString -> Either PandocError (Pandoc, MediaBag) -bytesToOdt bytes = case toArchiveOrFail bytes of - Right archive -> archiveToOdt archive +bytesToODT :: B.ByteString -> Either PandocError (Pandoc, MediaBag) +bytesToODT bytes = case toArchiveOrFail bytes of + Right archive -> archiveToODT archive Left err -> Left $ PandocParseError $ "Could not unzip ODT: " <> T.pack err -- -archiveToOdt :: Archive -> Either PandocError (Pandoc, MediaBag) -archiveToOdt archive = do +archiveToODT :: Archive -> Either PandocError (Pandoc, MediaBag) +archiveToODT archive = do let onFailure msg Nothing = Left $ PandocParseError msg onFailure _ (Just x) = Right x contentEntry <- onFailure "Could not find content.xml" @@ -79,11 +79,11 @@ archiveToOdt archive = do (\_ -> Left $ PandocParseError "Could not read styles") Right (chooseMax (readStylesAt stylesElem ) (readStylesAt contentElem)) - let filePathIsOdtMedia :: FilePath -> Bool - filePathIsOdtMedia fp = + let filePathIsODTMedia :: FilePath -> Bool + filePathIsODTMedia fp = let (dir, name) = splitFileName fp in (dir == "Pictures/") || (dir /= "./" && name == "content.xml") - let media = filteredFilesFromArchive archive filePathIsOdtMedia + let media = filteredFilesFromArchive archive filePathIsODTMedia let startState = readerState styles media either (\_ -> Left $ PandocParseError "Could not convert opendocument") Right (runConverter' read_body startState contentElem) diff --git a/src/Text/Pandoc/Readers/Odt/Arrows/State.hs b/src/Text/Pandoc/Readers/ODT/Arrows/State.hs similarity index 96% rename from src/Text/Pandoc/Readers/Odt/Arrows/State.hs rename to src/Text/Pandoc/Readers/ODT/Arrows/State.hs index 96515bf56506..742f6e9ee5b2 100644 --- a/src/Text/Pandoc/Readers/Odt/Arrows/State.hs +++ b/src/Text/Pandoc/Readers/ODT/Arrows/State.hs @@ -1,7 +1,7 @@ {-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE TupleSections #-} {- | - Module : Text.Pandoc.Readers.Odt.Arrows.State + Module : Text.Pandoc.Readers.ODT.Arrows.State Copyright : Copyright (C) 2015 Martin Linnemann License : GNU GPL, version 2 or above @@ -17,14 +17,14 @@ Most of these might be implemented without access to innards, but it's much faster and easier to implement this way. -} -module Text.Pandoc.Readers.Odt.Arrows.State where +module Text.Pandoc.Readers.ODT.Arrows.State where import Control.Arrow import qualified Control.Category as Cat import Control.Monad import Data.List (foldl') -import Text.Pandoc.Readers.Odt.Arrows.Utils -import Text.Pandoc.Readers.Odt.Generic.Fallible +import Text.Pandoc.Readers.ODT.Arrows.Utils +import Text.Pandoc.Readers.ODT.Generic.Fallible newtype ArrowState state a b = ArrowState diff --git a/src/Text/Pandoc/Readers/Odt/Arrows/Utils.hs b/src/Text/Pandoc/Readers/ODT/Arrows/Utils.hs similarity index 94% rename from src/Text/Pandoc/Readers/Odt/Arrows/Utils.hs rename to src/Text/Pandoc/Readers/ODT/Arrows/Utils.hs index a067895ec4a4..339bff1cb3f4 100644 --- a/src/Text/Pandoc/Readers/Odt/Arrows/Utils.hs +++ b/src/Text/Pandoc/Readers/ODT/Arrows/Utils.hs @@ -1,5 +1,5 @@ {- | - Module : Text.Pandoc.Readers.Odt.Arrows.Utils + Module : Text.Pandoc.Readers.ODT.Arrows.Utils Copyright : Copyright (C) 2015 Martin Linnemann License : GNU GPL, version 2 or above @@ -19,14 +19,14 @@ with an equivalent return value. -} -- We export everything -module Text.Pandoc.Readers.Odt.Arrows.Utils where +module Text.Pandoc.Readers.ODT.Arrows.Utils where import Prelude hiding (Applicative(..)) import Control.Arrow import Control.Monad (join) -import Text.Pandoc.Readers.Odt.Generic.Fallible -import Text.Pandoc.Readers.Odt.Generic.Utils +import Text.Pandoc.Readers.ODT.Generic.Fallible +import Text.Pandoc.Readers.ODT.Generic.Utils and2 :: (Arrow a) => a b c -> a b c' -> a b (c,c') and2 = (&&&) @@ -184,12 +184,6 @@ a >>?! f = a >>> right f -> FallibleArrow a x f c a >>?% f = a >>?^ uncurry f ---- -(^>>?%) :: (ArrowChoice a) - => (x -> Either f (b,b')) - -> (b -> b' -> c) - -> FallibleArrow a x f c -a ^>>?% f = arr a >>?^ uncurry f --- (>>?%?) :: (ArrowChoice a) @@ -200,7 +194,7 @@ a >>?%? f = a >>?^? uncurry f infixr 1 >>?, >>?^, >>?^? infixr 1 ^>>?, >>?! -infixr 1 >>?%, ^>>?%, >>?%? +infixr 1 >>?%, >>?%? -- | An arrow version of a short-circuit (<|>) ifFailedDo :: (ArrowChoice a) diff --git a/src/Text/Pandoc/Readers/Odt/Base.hs b/src/Text/Pandoc/Readers/ODT/Base.hs similarity index 61% rename from src/Text/Pandoc/Readers/Odt/Base.hs rename to src/Text/Pandoc/Readers/ODT/Base.hs index 2c07b1c1129f..4a99f5ad6e4c 100644 --- a/src/Text/Pandoc/Readers/Odt/Base.hs +++ b/src/Text/Pandoc/Readers/ODT/Base.hs @@ -1,5 +1,5 @@ {- | - Module : Text.Pandoc.Readers.Odt.Base + Module : Text.Pandoc.Readers.ODT.Base Copyright : Copyright (C) 2015 Martin Linnemann License : GNU GPL, version 2 or above @@ -10,12 +10,12 @@ Core types of the odt reader. -} -module Text.Pandoc.Readers.Odt.Base where +module Text.Pandoc.Readers.ODT.Base where -import Text.Pandoc.Readers.Odt.Generic.XMLConverter -import Text.Pandoc.Readers.Odt.Namespaces +import Text.Pandoc.Readers.ODT.Generic.XMLConverter +import Text.Pandoc.Readers.ODT.Namespaces -type OdtConverterState s = XMLConverterState Namespace s +type ODTConverterState s = XMLConverterState Namespace s type XMLReader s a b = FallibleXMLConverter Namespace s a b diff --git a/src/Text/Pandoc/Readers/Odt/ContentReader.hs b/src/Text/Pandoc/Readers/ODT/ContentReader.hs similarity index 90% rename from src/Text/Pandoc/Readers/Odt/ContentReader.hs rename to src/Text/Pandoc/Readers/ODT/ContentReader.hs index 25573549eedb..97f51d5fa98c 100644 --- a/src/Text/Pandoc/Readers/Odt/ContentReader.hs +++ b/src/Text/Pandoc/Readers/ODT/ContentReader.hs @@ -8,7 +8,7 @@ {-# LANGUAGE ViewPatterns #-} {-# LANGUAGE OverloadedStrings #-} {- | - Module : Text.Pandoc.Readers.Odt.ContentReader + Module : Text.Pandoc.Readers.ODT.ContentReader Copyright : Copyright (C) 2015 Martin Linnemann License : GNU GPL, version 2 or above @@ -19,7 +19,7 @@ The core of the odt reader that converts odt features into Pandoc types. -} -module Text.Pandoc.Readers.Odt.ContentReader +module Text.Pandoc.Readers.ODT.ContentReader ( readerState , read_body ) where @@ -46,16 +46,17 @@ import Text.Pandoc.Shared import Text.Pandoc.Extensions (extensionsFromList, Extension(..)) import qualified Text.Pandoc.UTF8 as UTF8 -import Text.Pandoc.Readers.Odt.Base -import Text.Pandoc.Readers.Odt.Namespaces -import Text.Pandoc.Readers.Odt.StyleReader +import Text.Pandoc.Readers.ODT.Base +import Text.Pandoc.Readers.ODT.Namespaces +import Text.Pandoc.Readers.ODT.StyleReader -import Text.Pandoc.Readers.Odt.Arrows.State (foldS) -import Text.Pandoc.Readers.Odt.Arrows.Utils -import Text.Pandoc.Readers.Odt.Generic.Fallible -import Text.Pandoc.Readers.Odt.Generic.Utils -import Text.Pandoc.Readers.Odt.Generic.XMLConverter +import Text.Pandoc.Readers.ODT.Arrows.State (foldS) +import Text.Pandoc.Readers.ODT.Arrows.Utils +import Text.Pandoc.Readers.ODT.Generic.Fallible +import Text.Pandoc.Readers.ODT.Generic.Utils +import Text.Pandoc.Readers.ODT.Generic.XMLConverter +import Network.URI (parseRelativeReference, URI(uriPath)) import qualified Data.Set as Set -------------------------------------------------------------------------------- @@ -145,37 +146,37 @@ insertMedia' (fp, bs) state@ReaderState{..} -- Reader type and associated tools -------------------------------------------------------------------------------- -type OdtReader a b = XMLReader ReaderState a b +type ODTReader a b = XMLReader ReaderState a b -type OdtReaderSafe a b = XMLReaderSafe ReaderState a b +type ODTReaderSafe a b = XMLReaderSafe ReaderState a b -- | Extract something from the styles -fromStyles :: (a -> Styles -> b) -> OdtReaderSafe a b +fromStyles :: (a -> Styles -> b) -> ODTReaderSafe a b fromStyles f = keepingTheValue (getExtraState >>^ styleSet) >>% f -- -getStyleByName :: OdtReader StyleName Style +getStyleByName :: ODTReader StyleName Style getStyleByName = fromStyles lookupStyle >>^ maybeToChoice -- -findStyleFamily :: OdtReader Style StyleFamily +findStyleFamily :: ODTReader Style StyleFamily findStyleFamily = fromStyles getStyleFamily >>^ maybeToChoice -- -lookupListStyle :: OdtReader StyleName ListStyle +lookupListStyle :: ODTReader StyleName ListStyle lookupListStyle = fromStyles lookupListStyleByName >>^ maybeToChoice -- -switchCurrentListStyle :: OdtReaderSafe (Maybe ListStyle) (Maybe ListStyle) +switchCurrentListStyle :: ODTReaderSafe (Maybe ListStyle) (Maybe ListStyle) switchCurrentListStyle = keepingTheValue getExtraState >>% swapCurrentListStyle >>> first setExtraState >>^ snd -- -pushStyle :: OdtReaderSafe Style Style +pushStyle :: ODTReaderSafe Style Style pushStyle = keepingTheValue ( ( keepingTheValue getExtraState >>% pushStyle' @@ -185,7 +186,7 @@ pushStyle = keepingTheValue ( >>^ fst -- -popStyle :: OdtReaderSafe x x +popStyle :: ODTReaderSafe x x popStyle = keepingTheValue ( getExtraState >>> arr popStyle' @@ -194,11 +195,11 @@ popStyle = keepingTheValue ( >>^ fst -- -getCurrentListLevel :: OdtReaderSafe _x ListLevel +getCurrentListLevel :: ODTReaderSafe _x ListLevel getCurrentListLevel = getExtraState >>^ currentListLevel -- -updateMediaWithResource :: OdtReaderSafe (FilePath, B.ByteString) (FilePath, B.ByteString) +updateMediaWithResource :: ODTReaderSafe (FilePath, B.ByteString) (FilePath, B.ByteString) updateMediaWithResource = keepingTheValue ( (keepingTheValue getExtraState >>% insertMedia' @@ -207,7 +208,7 @@ updateMediaWithResource = keepingTheValue ( ) >>^ fst -lookupResource :: OdtReaderSafe FilePath (FilePath, B.ByteString) +lookupResource :: ODTReaderSafe FilePath (FilePath, B.ByteString) lookupResource = proc target -> do state <- getExtraState -< () case lookup target (getMediaEnv state) of @@ -231,7 +232,7 @@ uniqueIdentFrom baseIdent usedIdents = -- | First argument: basis for a new "pretty" anchor if none exists yet -- Second argument: a key ("ugly" anchor) -- Returns: saved "pretty" anchor or created new one -getPrettyAnchor :: OdtReaderSafe (AnchorPrefix, Anchor) Anchor +getPrettyAnchor :: ODTReaderSafe (AnchorPrefix, Anchor) Anchor getPrettyAnchor = proc (baseIdent, uglyAnchor) -> do state <- getExtraState -< () case lookupPrettyAnchor uglyAnchor state of @@ -242,7 +243,7 @@ getPrettyAnchor = proc (baseIdent, uglyAnchor) -> do -- | Input: basis for a new header anchor -- Output: saved new anchor -getHeaderAnchor :: OdtReaderSafe Inlines Anchor +getHeaderAnchor :: ODTReaderSafe Inlines Anchor getHeaderAnchor = proc title -> do state <- getExtraState -< () let exts = extensionsFromList [Ext_auto_identifiers] @@ -256,7 +257,7 @@ getHeaderAnchor = proc title -> do -------------------------------------------------------------------------------- -- -readStyleByName :: OdtReader _x (StyleName, Style) +readStyleByName :: ODTReader _x (StyleName, Style) readStyleByName = findAttr NsText "style-name" >>? keepingTheValue getStyleByName >>^ liftE where @@ -265,11 +266,11 @@ readStyleByName = liftE (_, Left v) = Left v -- -isStyleToTrace :: OdtReader Style Bool +isStyleToTrace :: ODTReader Style Bool isStyleToTrace = findStyleFamily >>?^ (==FaText) -- -withNewStyle :: OdtReaderSafe x Inlines -> OdtReaderSafe x Inlines +withNewStyle :: ODTReaderSafe x Inlines -> ODTReaderSafe x Inlines withNewStyle a = proc x -> do fStyle <- readStyleByName -< () case fStyle of @@ -403,7 +404,7 @@ getParaModifier Style{..} | Just props <- paraProperties styleProperties = False -- -constructPara :: OdtReaderSafe Blocks Blocks -> OdtReaderSafe Blocks Blocks +constructPara :: ODTReaderSafe Blocks Blocks -> ODTReaderSafe Blocks Blocks constructPara reader = proc blocks -> do fStyle <- readStyleByName -< blocks case fStyle of @@ -458,7 +459,7 @@ getListConstructor ListLevelStyle{..} = -- state must be switched before and after the call to the child converter -- while in the latter the child converter can be called directly. -- If anything goes wrong, a default ordered-list-constructor is used. -constructList :: OdtReaderSafe x [Blocks] -> OdtReaderSafe x Blocks +constructList :: ODTReaderSafe x [Blocks] -> ODTReaderSafe x Blocks constructList reader = proc x -> do modifyExtraState (shiftListLevel 1) -< () listLevel <- getCurrentListLevel -< () @@ -501,7 +502,7 @@ constructList reader = proc x -> do -- Readers -------------------------------------------------------------------------------- -type ElementMatcher result = (Namespace, ElementName, OdtReader result result) +type ElementMatcher result = (Namespace, ElementName, ODTReader result result) type InlineMatcher = ElementMatcher Inlines @@ -516,7 +517,7 @@ firstMatch = FirstMatch . Alt . Just -- matchingElement :: (Monoid e) => Namespace -> ElementName - -> OdtReaderSafe e e + -> ODTReaderSafe e e -> ElementMatcher e matchingElement ns name reader = (ns, name, asResultAccumulator reader) where @@ -526,14 +527,14 @@ matchingElement ns name reader = (ns, name, asResultAccumulator reader) -- matchChildContent' :: (Monoid result) => [ElementMatcher result] - -> OdtReaderSafe _x result + -> ODTReaderSafe _x result matchChildContent' ls = returnV mempty >>> matchContent' ls -- matchChildContent :: (Monoid result) => [ElementMatcher result] - -> OdtReaderSafe (result, XML.Content) result - -> OdtReaderSafe _x result + -> ODTReaderSafe (result, XML.Content) result + -> ODTReaderSafe _x result matchChildContent ls fallback = returnV mempty >>> matchContent ls fallback -------------------------------------------- @@ -546,11 +547,11 @@ matchChildContent ls fallback = returnV mempty >>> matchContent ls fallback -- -- | Open Document allows several consecutive spaces if they are marked up -read_plain_text :: OdtReaderSafe (Inlines, XML.Content) Inlines +read_plain_text :: ODTReaderSafe (Inlines, XML.Content) Inlines read_plain_text = fst ^&&& read_plain_text' >>% recover where -- fallible version - read_plain_text' :: OdtReader (Inlines, XML.Content) Inlines + read_plain_text' :: ODTReader (Inlines, XML.Content) Inlines read_plain_text' = ( second ( arr extractText ) >>^ spreadChoice >>?! second text ) @@ -677,9 +678,24 @@ read_list_element listElement = matchingElement NsText listElement ( matchChildContent' [ read_paragraph , read_header , read_list + , read_section ] ) +---------------------- +-- Sections +---------------------- + +read_section :: ElementMatcher Blocks +read_section = matchingElement NsText "section" + $ liftA (divWith nullAttr) + $ matchChildContent' [ read_paragraph + , read_header + , read_list + , read_table + , read_section + ] + ---------------------- -- Links @@ -688,7 +704,8 @@ read_list_element listElement = matchingElement NsText listElement read_link :: InlineMatcher read_link = matchingElement NsText "a" $ liftA3 link - ( findAttrTextWithDefault NsXLink "href" "" ) + ( findAttrTextWithDefault NsXLink "href" "" + >>> arr fixRelativeLink ) ( findAttrTextWithDefault NsOffice "title" "" ) ( matchChildContent [ read_span , read_note @@ -700,6 +717,14 @@ read_link = matchingElement NsText "a" , read_reference_ref ] read_plain_text ) +fixRelativeLink :: T.Text -> T.Text +fixRelativeLink uri = + case parseRelativeReference (T.unpack uri) of + Nothing -> uri + Just u -> + case uriPath u of + '.':'.':'/':xs -> tshow $ u{ uriPath = xs } + _ -> uri ------------------------- -- Footnotes @@ -774,7 +799,7 @@ read_frame = matchingElement NsDraw "frame" >>> foldS read_frame_child >>> arr fold -read_frame_child :: OdtReaderSafe XML.Element (FirstMatch Inlines) +read_frame_child :: ODTReaderSafe XML.Element (FirstMatch Inlines) read_frame_child = proc child -> case elName child of "image" -> read_frame_img -< child @@ -782,7 +807,7 @@ read_frame_child = "text-box" -> read_frame_text_box -< child _ -> returnV mempty -< () -read_frame_img :: OdtReaderSafe XML.Element (FirstMatch Inlines) +read_frame_img :: ODTReaderSafe XML.Element (FirstMatch Inlines) read_frame_img = proc img -> do src <- executeIn (findAttr' NsXLink "href") -< img @@ -810,7 +835,7 @@ image_attributes x y = dim name (Just v) = [(name, v)] dim _ Nothing = [] -read_frame_mathml :: OdtReaderSafe XML.Element (FirstMatch Inlines) +read_frame_mathml :: ODTReaderSafe XML.Element (FirstMatch Inlines) read_frame_mathml = proc obj -> do src <- executeIn (findAttr' NsXLink "href") -< obj @@ -824,7 +849,7 @@ read_frame_mathml = Left _ -> returnV mempty -< () Right exps -> arr (firstMatch . displayMath . writeTeX) -< exps -read_frame_text_box :: OdtReaderSafe XML.Element (FirstMatch Inlines) +read_frame_text_box :: ODTReaderSafe XML.Element (FirstMatch Inlines) read_frame_text_box = proc box -> do paragraphs <- executeIn (matchChildContent' [ read_paragraph ]) -< box arr read_img_with_caption -< toList paragraphs @@ -847,19 +872,19 @@ _ANCHOR_PREFIX_ :: T.Text _ANCHOR_PREFIX_ = "anchor" -- -readAnchorAttr :: OdtReader _x Anchor +readAnchorAttr :: ODTReader _x Anchor readAnchorAttr = findAttrText NsText "name" -- | Beware: may fail -findAnchorName :: OdtReader AnchorPrefix Anchor +findAnchorName :: ODTReader AnchorPrefix Anchor findAnchorName = ( keepingTheValue readAnchorAttr >>^ spreadChoice ) >>?! getPrettyAnchor -- -maybeAddAnchorFrom :: OdtReader Inlines AnchorPrefix - -> OdtReaderSafe Inlines Inlines +maybeAddAnchorFrom :: ODTReader Inlines AnchorPrefix + -> ODTReaderSafe Inlines Inlines maybeAddAnchorFrom anchorReader = keepingTheValue (anchorReader >>? findAnchorName >>?^ toAnchorElem) >>> @@ -888,14 +913,14 @@ read_reference_start = matchingElement NsText "reference-mark-start" $ maybeAddAnchorFrom readAnchorAttr -- | Beware: may fail -findAnchorRef :: OdtReader _x Anchor +findAnchorRef :: ODTReader _x Anchor findAnchorRef = ( findAttrText NsText "ref-name" >>?^ (_ANCHOR_PREFIX_,) ) >>?! getPrettyAnchor -- -maybeInAnchorRef :: OdtReaderSafe Inlines Inlines +maybeInAnchorRef :: ODTReaderSafe Inlines Inlines maybeInAnchorRef = proc inlines -> do fRef <- findAnchorRef -< () case fRef of @@ -923,10 +948,11 @@ read_reference_ref = matchingElement NsText "reference-ref" -- Entry point ---------------------- -read_text :: OdtReaderSafe _x Pandoc +read_text :: ODTReaderSafe _x Pandoc read_text = matchChildContent' [ read_header , read_paragraph , read_list + , read_section , read_table ] >>^ doc @@ -940,7 +966,7 @@ post_process' (Table attr _ specs th tb tf : Div ("", ["caption"], _) blks : xs) = Table attr (Caption Nothing blks) specs th tb tf : post_process' xs post_process' bs = bs -read_body :: OdtReader _x (Pandoc, MediaBag) +read_body :: ODTReader _x (Pandoc, MediaBag) read_body = executeInSub NsOffice "body" $ executeInSub NsOffice "text" $ liftAsSuccess diff --git a/src/Text/Pandoc/Readers/Odt/Generic/Fallible.hs b/src/Text/Pandoc/Readers/ODT/Generic/Fallible.hs similarity index 91% rename from src/Text/Pandoc/Readers/Odt/Generic/Fallible.hs rename to src/Text/Pandoc/Readers/ODT/Generic/Fallible.hs index 2ec2174a620d..c6f45ced107b 100644 --- a/src/Text/Pandoc/Readers/Odt/Generic/Fallible.hs +++ b/src/Text/Pandoc/Readers/ODT/Generic/Fallible.hs @@ -1,5 +1,5 @@ {- | - Module : Text.Pandoc.Readers.Odt.Generic.Fallible + Module : Text.Pandoc.Readers.ODT.Generic.Fallible Copyright : Copyright (C) 2015 Martin Linnemann License : GNU GPL, version 2 or above @@ -17,7 +17,7 @@ compatible instances of "ArrowChoice". -} -- We export everything -module Text.Pandoc.Readers.Odt.Generic.Fallible where +module Text.Pandoc.Readers.ODT.Generic.Fallible where -- | Default for now. Will probably become a class at some point. type Failure = () @@ -35,12 +35,7 @@ eitherToMaybe :: Either _l a -> Maybe a eitherToMaybe (Left _) = Nothing eitherToMaybe (Right a) = Just a --- | > fromLeft f === either f id -fromLeft :: (a -> b) -> Either a b -> b -fromLeft f (Left a) = f a -fromLeft _ (Right b) = b - --- | > recover a === fromLeft (const a) === either (const a) id +-- | > recover a === either (const a) id recover :: a -> Either _f a -> a recover a (Left _) = a recover _ (Right a) = a diff --git a/src/Text/Pandoc/Readers/Odt/Generic/Namespaces.hs b/src/Text/Pandoc/Readers/ODT/Generic/Namespaces.hs similarity index 92% rename from src/Text/Pandoc/Readers/Odt/Generic/Namespaces.hs rename to src/Text/Pandoc/Readers/ODT/Generic/Namespaces.hs index 78a7fc0b2188..d7310d2e531c 100644 --- a/src/Text/Pandoc/Readers/Odt/Generic/Namespaces.hs +++ b/src/Text/Pandoc/Readers/ODT/Generic/Namespaces.hs @@ -1,5 +1,5 @@ {- | - Module : Text.Pandoc.Readers.Odt.Generic.Namespaces + Module : Text.Pandoc.Readers.ODT.Generic.Namespaces Copyright : Copyright (C) 2015 Martin Linnemann License : GNU GPL, version 2 or above @@ -11,7 +11,7 @@ A class containing a set of namespace identifiers. Used to convert between typesafe Haskell namespace identifiers and unsafe "real world" namespaces. -} -module Text.Pandoc.Readers.Odt.Generic.Namespaces where +module Text.Pandoc.Readers.ODT.Generic.Namespaces where import qualified Data.Map as M import Data.Text (Text) diff --git a/src/Text/Pandoc/Readers/Odt/Generic/SetMap.hs b/src/Text/Pandoc/Readers/ODT/Generic/SetMap.hs similarity index 87% rename from src/Text/Pandoc/Readers/Odt/Generic/SetMap.hs rename to src/Text/Pandoc/Readers/ODT/Generic/SetMap.hs index 0e4fa0990fba..be586803bc15 100644 --- a/src/Text/Pandoc/Readers/Odt/Generic/SetMap.hs +++ b/src/Text/Pandoc/Readers/ODT/Generic/SetMap.hs @@ -1,5 +1,5 @@ {- | - Module : Text.Pandoc.Readers.Odt.Generic.SetMap + Module : Text.Pandoc.Readers.ODT.Generic.SetMap Copyright : Copyright (C) 2015 Martin Linnemann License : GNU GPL, version 2 or above @@ -10,7 +10,7 @@ A map of values to sets of values. -} -module Text.Pandoc.Readers.Odt.Generic.SetMap where +module Text.Pandoc.Readers.ODT.Generic.SetMap where import qualified Data.Map as M import qualified Data.Set as S diff --git a/src/Text/Pandoc/Readers/Odt/Generic/Utils.hs b/src/Text/Pandoc/Readers/ODT/Generic/Utils.hs similarity index 97% rename from src/Text/Pandoc/Readers/Odt/Generic/Utils.hs rename to src/Text/Pandoc/Readers/ODT/Generic/Utils.hs index edefe3c704e6..fe85ef6f22b9 100644 --- a/src/Text/Pandoc/Readers/Odt/Generic/Utils.hs +++ b/src/Text/Pandoc/Readers/ODT/Generic/Utils.hs @@ -1,6 +1,6 @@ {-# LANGUAGE ViewPatterns #-} {- | - Module : Text.Pandoc.Reader.Odt.Generic.Utils + Module : Text.Pandoc.Reader.ODT.Generic.Utils Copyright : Copyright (C) 2015 Martin Linnemann License : GNU GPL, version 2 or above @@ -11,7 +11,7 @@ General utility functions for the odt reader. -} -module Text.Pandoc.Readers.Odt.Generic.Utils +module Text.Pandoc.Readers.ODT.Generic.Utils ( uncurry3 , uncurry4 , uncurry5 diff --git a/src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs b/src/Text/Pandoc/Readers/ODT/Generic/XMLConverter.hs similarity index 97% rename from src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs rename to src/Text/Pandoc/Readers/ODT/Generic/XMLConverter.hs index 6740d44ad134..fc0cd21e52ae 100644 --- a/src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs +++ b/src/Text/Pandoc/Readers/ODT/Generic/XMLConverter.hs @@ -4,7 +4,7 @@ {-# LANGUAGE LambdaCase #-} {-# LANGUAGE PatternGuards #-} {- | - Module : Text.Pandoc.Readers.Odt.Generic.XMLConverter + Module : Text.Pandoc.Readers.ODT.Generic.XMLConverter Copyright : Copyright (C) 2015 Martin Linnemann License : GNU GPL, version 2 or above @@ -17,7 +17,7 @@ It might be sufficient to define this reader as a comonad, but there is not a lot of use in trying. -} -module Text.Pandoc.Readers.Odt.Generic.XMLConverter +module Text.Pandoc.Readers.ODT.Generic.XMLConverter ( ElementName , XMLConverterState , XMLConverter @@ -40,7 +40,6 @@ module Text.Pandoc.Readers.Odt.Generic.XMLConverter , findAttrText' , findAttr , findAttrText -, findAttrWithDefault , findAttrTextWithDefault , readAttr , readAttr' @@ -69,11 +68,11 @@ import Data.List (foldl') import qualified Text.Pandoc.XML.Light as XML -import Text.Pandoc.Readers.Odt.Arrows.State -import Text.Pandoc.Readers.Odt.Arrows.Utils -import Text.Pandoc.Readers.Odt.Generic.Namespaces -import Text.Pandoc.Readers.Odt.Generic.Utils -import Text.Pandoc.Readers.Odt.Generic.Fallible +import Text.Pandoc.Readers.ODT.Arrows.State +import Text.Pandoc.Readers.ODT.Arrows.Utils +import Text.Pandoc.Readers.ODT.Generic.Namespaces +import Text.Pandoc.Readers.ODT.Generic.Utils +import Text.Pandoc.Readers.ODT.Generic.Fallible -------------------------------------------------------------------------------- -- Basis types for readability @@ -495,15 +494,6 @@ findAttrText nsID attrName = findAttr' nsID attrName >>> maybeToChoice --- | Return value as string or return provided default value -findAttrWithDefault :: (NameSpaceID nsID) - => nsID -> AttributeName - -> AttributeValue - -> XMLConverter nsID extraState x AttributeValue -findAttrWithDefault nsID attrName deflt - = findAttr' nsID attrName - >>^ fromMaybe deflt - -- | Return value as string or return provided default value findAttrTextWithDefault :: (NameSpaceID nsID) => nsID -> AttributeName diff --git a/src/Text/Pandoc/Readers/Odt/Namespaces.hs b/src/Text/Pandoc/Readers/ODT/Namespaces.hs similarity index 96% rename from src/Text/Pandoc/Readers/Odt/Namespaces.hs rename to src/Text/Pandoc/Readers/ODT/Namespaces.hs index 70741c28d37f..77ca21165501 100644 --- a/src/Text/Pandoc/Readers/Odt/Namespaces.hs +++ b/src/Text/Pandoc/Readers/ODT/Namespaces.hs @@ -1,6 +1,6 @@ {-# LANGUAGE OverloadedStrings #-} {- | - Module : Text.Pandoc.Reader.Odt.Namespaces + Module : Text.Pandoc.Reader.ODT.Namespaces Copyright : Copyright (C) 2015 Martin Linnemann License : GNU GPL, version 2 or above @@ -11,14 +11,14 @@ Namespaces used in odt files. -} -module Text.Pandoc.Readers.Odt.Namespaces ( Namespace (..) +module Text.Pandoc.Readers.ODT.Namespaces ( Namespace (..) ) where import qualified Data.Map as M (empty, insert) import Data.Maybe (fromMaybe, listToMaybe) import Data.Text (Text) import qualified Data.Text as T -import Text.Pandoc.Readers.Odt.Generic.Namespaces +import Text.Pandoc.Readers.ODT.Generic.Namespaces instance NameSpaceID Namespace where diff --git a/src/Text/Pandoc/Readers/Odt/StyleReader.hs b/src/Text/Pandoc/Readers/ODT/StyleReader.hs similarity index 98% rename from src/Text/Pandoc/Readers/Odt/StyleReader.hs rename to src/Text/Pandoc/Readers/ODT/StyleReader.hs index 7337194cb5e0..dadd37dccd9b 100644 --- a/src/Text/Pandoc/Readers/Odt/StyleReader.hs +++ b/src/Text/Pandoc/Readers/ODT/StyleReader.hs @@ -4,7 +4,7 @@ {-# LANGUAGE TupleSections #-} {-# LANGUAGE OverloadedStrings #-} {- | - Module : Text.Pandoc.Readers.Odt.StyleReader + Module : Text.Pandoc.Readers.ODT.StyleReader Copyright : Copyright (C) 2015 Martin Linnemann License : GNU GPL, version 2 or above @@ -15,7 +15,7 @@ Reader for the style information in an odt document. -} -module Text.Pandoc.Readers.Odt.StyleReader +module Text.Pandoc.Readers.ODT.StyleReader ( Style (..) , StyleName , StyleFamily (..) @@ -56,15 +56,15 @@ import qualified Text.Pandoc.XML.Light as XML import Text.Pandoc.Shared (safeRead, tshow) -import Text.Pandoc.Readers.Odt.Arrows.Utils +import Text.Pandoc.Readers.ODT.Arrows.Utils -import Text.Pandoc.Readers.Odt.Generic.Fallible -import qualified Text.Pandoc.Readers.Odt.Generic.SetMap as SM -import Text.Pandoc.Readers.Odt.Generic.Utils -import Text.Pandoc.Readers.Odt.Generic.XMLConverter +import Text.Pandoc.Readers.ODT.Generic.Fallible +import qualified Text.Pandoc.Readers.ODT.Generic.SetMap as SM +import Text.Pandoc.Readers.ODT.Generic.Utils +import Text.Pandoc.Readers.ODT.Generic.XMLConverter -import Text.Pandoc.Readers.Odt.Base -import Text.Pandoc.Readers.Odt.Namespaces +import Text.Pandoc.Readers.ODT.Base +import Text.Pandoc.Readers.ODT.Namespaces readStylesAt :: XML.Element -> Fallible Styles readStylesAt e = runConverter' readAllStyles mempty e diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs index 17011f657d8a..176e0412fc69 100644 --- a/src/Text/Pandoc/Readers/Org.hs +++ b/src/Text/Pandoc/Readers/Org.hs @@ -1,4 +1,3 @@ -{-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Readers.Org Copyright : Copyright (C) 2014-2022 Albert Krewinkel diff --git a/src/Text/Pandoc/Readers/Org/Inlines.hs b/src/Text/Pandoc/Readers/Org/Inlines.hs index 8b2b8318524c..9e7f7e6e5c0b 100644 --- a/src/Text/Pandoc/Readers/Org/Inlines.hs +++ b/src/Text/Pandoc/Readers/Org/Inlines.hs @@ -126,8 +126,12 @@ linebreak :: PandocMonad m => OrgParser m (F Inlines) linebreak = try $ pure B.linebreak <$ string "\\\\" <* skipSpaces <* newline str :: PandocMonad m => OrgParser m (F Inlines) -str = return . B.str <$> many1Char (noneOf $ specialChars ++ "\n\r ") +str = return . B.str <$> + ( many1Char (noneOf $ specialChars ++ "\n\r ") >>= updatePositions' ) <* updateLastStrPos + where + updatePositions' str' = str' <$ + maybe mzero (updatePositions . snd) (T.unsnoc str') -- | An endline character that can be treated as a space, not a structural -- break. This should reflect the values of the Emacs variable @@ -380,8 +384,9 @@ orgRefCiteKey = endOfCitation = try $ do many $ satisfy isCiteKeySpecialChar satisfy $ not . isCiteKeyChar - in try $ satisfy isCiteKeyChar `many1TillChar` lookAhead endOfCitation - + in try $ do + optional (char '&') -- this is used in org-ref v3 + satisfy isCiteKeyChar `many1TillChar` lookAhead endOfCitation -- | Supported citation types. Only a small subset of org-ref types is -- supported for now. TODO: rewrite this, use LaTeX reader as template. @@ -679,7 +684,6 @@ rawMathBetween s e = try $ textStr s *> manyTillChar anyChar (try $ textStr e) emphasisStart :: PandocMonad m => Char -> OrgParser m Char emphasisStart c = try $ do guard =<< afterEmphasisPreChar - guard =<< notAfterString char c lookAhead (noneOf emphasisForbiddenBorderChars) pushToInlineCharStack c @@ -792,8 +796,8 @@ notAfterForbiddenBorderChar = do subOrSuperExpr :: PandocMonad m => OrgParser m (F Inlines) subOrSuperExpr = try $ simpleSubOrSuperText <|> - (choice [ charsInBalanced '{' '}' (noneOf "\n\r") - , enclosing ('(', ')') <$> charsInBalanced '(' ')' (noneOf "\n\r") + (choice [ charsInBalanced '{' '}' (T.singleton <$> noneOf "\n\r") + , enclosing ('(', ')') <$> charsInBalanced '(' ')' (T.singleton <$> noneOf "\n\r") ] >>= parseFromString (mconcat <$> many inline)) where enclosing (left, right) s = T.cons left $ T.snoc s right diff --git a/src/Text/Pandoc/Readers/Org/Meta.hs b/src/Text/Pandoc/Readers/Org/Meta.hs index 7ee4e1fcce6e..039f063e0591 100644 --- a/src/Text/Pandoc/Readers/Org/Meta.hs +++ b/src/Text/Pandoc/Readers/Org/Meta.hs @@ -27,7 +27,7 @@ import qualified Text.Pandoc.Builder as B import Text.Pandoc.Class.PandocMonad (PandocMonad) import Text.Pandoc.Definition import Text.Pandoc.Shared (blocksToInlines, safeRead) -import Text.Pandoc.Network.HTTP (urlEncode) +import Text.Pandoc.URI (urlEncode) import Control.Monad (mzero, void) import Data.List (intercalate, intersperse) diff --git a/src/Text/Pandoc/Readers/Org/ParserState.hs b/src/Text/Pandoc/Readers/Org/ParserState.hs index cef2fbd40358..4f2d3cb938f2 100644 --- a/src/Text/Pandoc/Readers/Org/ParserState.hs +++ b/src/Text/Pandoc/Readers/Org/ParserState.hs @@ -56,7 +56,7 @@ import Text.Pandoc.Parsing (Future, HasIdentifierList (..), HasQuoteContext (..), HasReaderOptions (..), ParserContext (..), QuoteContext (..), SourcePos, askF, asksF, returnF, runF, trimInlinesF) -import Text.Pandoc.Readers.LaTeX.Types (Macro) +import Text.Pandoc.TeX (Macro) -- | This is used to delay evaluation until all relevant information has been -- parsed and made available in the parser state. diff --git a/src/Text/Pandoc/Readers/Org/Parsing.hs b/src/Text/Pandoc/Readers/Org/Parsing.hs index 21e42876cc1a..0821b6b6997c 100644 --- a/src/Text/Pandoc/Readers/Org/Parsing.hs +++ b/src/Text/Pandoc/Readers/Org/Parsing.hs @@ -114,7 +114,7 @@ import Control.Monad (guard) import Control.Monad.Reader (ReaderT) -- | The parser used to read org files. -type OrgParser m = ParserT Sources OrgParserState (ReaderT OrgParserLocal m) +type OrgParser m = ParsecT Sources OrgParserState (ReaderT OrgParserLocal m) -- -- Adaptions and specializations of parsing utilities @@ -163,7 +163,7 @@ inList = do -- | Parse in different context withContext :: Monad m => ParserContext -- ^ New parser context - -> OrgParser m a -- ^ Parser to run in that context + -> OrgParser m a -- ^ Parsec to run in that context -> OrgParser m a withContext context parser = do oldContext <- orgStateParserContext <$> getState @@ -173,7 +173,7 @@ withContext context parser = do return result -- --- Parser state functions +-- Parsec state functions -- -- | Get an export setting. diff --git a/src/Text/Pandoc/Readers/Org/Shared.hs b/src/Text/Pandoc/Readers/Org/Shared.hs index f4164411604d..7e31db0934d9 100644 --- a/src/Text/Pandoc/Readers/Org/Shared.hs +++ b/src/Text/Pandoc/Readers/Org/Shared.hs @@ -23,7 +23,6 @@ import qualified Data.Text as T import System.FilePath (isValid, takeExtension) import qualified System.FilePath.Posix as Posix import qualified System.FilePath.Windows as Windows -import Text.Pandoc.Shared (elemText) -- | Check whether the given string looks like the path to of URL of an image. isImageFilename :: Text -> Bool @@ -58,7 +57,7 @@ cleanLinkText s isUrl :: Text -> Bool isUrl cs = let (scheme, path) = T.break (== ':') cs - in T.all (\c -> isAlphaNum c || c `elemText` ".-") scheme + in T.all (\c -> isAlphaNum c || T.any (== c) ".-") scheme && not (T.null path) -- | Creates an key-value pair marking the original language name specified for diff --git a/src/Text/Pandoc/Readers/RIS.hs b/src/Text/Pandoc/Readers/RIS.hs index f73df813568b..36335105ce87 100644 --- a/src/Text/Pandoc/Readers/RIS.hs +++ b/src/Text/Pandoc/Readers/RIS.hs @@ -29,7 +29,7 @@ import Citeproc (Reference(..), ItemId(..), Val(..), Date(..), DateParts(..), import Text.Pandoc.Builder as B import Text.Pandoc.Class (PandocMonad) import Text.Pandoc.Citeproc.MetaValue (referenceToMetaValue) -import Text.Pandoc.Citeproc.BibTeX (toName) +import Text.Pandoc.Citeproc.Name (toName, NameOpts(..)) import Control.Monad.Except (throwError) import qualified Data.Text as T import Data.Text (Text) @@ -59,7 +59,7 @@ readRIS _opts inp = do B.doc mempty Left e -> throwError e -type RISParser m = ParserT Sources () m +type RISParser m = ParsecT Sources () m risLine :: PandocMonad m => RISParser m (Text, Text) risLine = do @@ -140,7 +140,9 @@ risRecordToReference keys = addId $ foldr go defref keys M.insert (toVariable k) (FancyVal v) (referenceVariables r) } addName k v r = - let new = toName [] . B.toList . B.text $ v + let new = toName NameOpts{ nameOptsPrefixIsNonDroppingParticle = False + , nameOptsUseJuniorComma = False } + . B.toList . B.text $ v f Nothing = Just (NamesVal new) f (Just (NamesVal ns)) = Just (NamesVal (new ++ ns)) f (Just x) = Just x diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs index 0b824ad33d99..cd7d9e4cbabe 100644 --- a/src/Text/Pandoc/Readers/RST.hs +++ b/src/Text/Pandoc/Readers/RST.hs @@ -36,6 +36,7 @@ import Text.Pandoc.Logging import Text.Pandoc.Options import Text.Pandoc.Parsing import Text.Pandoc.Shared +import Text.Pandoc.URI import qualified Text.Pandoc.UTF8 as UTF8 import Data.Time.Format import System.FilePath (takeDirectory) @@ -55,7 +56,7 @@ readRST opts s = do Right result -> return result Left e -> throwError e -type RSTParser m = ParserT Sources ParserState m +type RSTParser m = ParsecT Sources ParserState m -- -- Constants and data structure definitions @@ -355,7 +356,7 @@ singleHeader' = try $ do -- hrule block -- -hrule :: Monad m => ParserT Sources st m Blocks +hrule :: Monad m => ParsecT Sources st m Blocks hrule = try $ do chr <- oneOf underlineChars count 3 (char chr) @@ -370,7 +371,7 @@ hrule = try $ do -- read a line indented by a given string indentedLine :: (HasReaderOptions st, Monad m) - => Int -> ParserT Sources st m Text + => Int -> ParsecT Sources st m Text indentedLine indents = try $ do lookAhead spaceChar gobbleAtMostSpaces indents @@ -379,7 +380,7 @@ indentedLine indents = try $ do -- one or more indented lines, possibly separated by blank lines. -- any amount of indentation will work. indentedBlock :: (HasReaderOptions st, Monad m) - => ParserT Sources st m Text + => ParsecT Sources st m Text indentedBlock = try $ do indents <- length <$> lookAhead (many1 spaceChar) lns <- many1 $ try $ do b <- option "" blanklines @@ -388,20 +389,20 @@ indentedBlock = try $ do optional blanklines return $ T.unlines lns -quotedBlock :: Monad m => ParserT Sources st m Text +quotedBlock :: Monad m => ParsecT Sources st m Text quotedBlock = try $ do quote <- lookAhead $ oneOf "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~" lns <- many1 $ lookAhead (char quote) >> anyLine optional blanklines return $ T.unlines lns -codeBlockStart :: Monad m => ParserT Sources st m Char +codeBlockStart :: Monad m => ParsecT Sources st m Char codeBlockStart = string "::" >> blankline >> blankline -codeBlock :: Monad m => ParserT Sources ParserState m Blocks +codeBlock :: Monad m => ParsecT Sources ParserState m Blocks codeBlock = try $ codeBlockStart >> codeBlockBody -codeBlockBody :: Monad m => ParserT Sources ParserState m Blocks +codeBlockBody :: Monad m => ParsecT Sources ParserState m Blocks codeBlockBody = do lang <- stateRstHighlight <$> getState try $ B.codeBlockWith ("", maybeToList lang, []) . stripTrailingNewlines <$> @@ -417,14 +418,14 @@ lhsCodeBlock = try $ do return $ B.codeBlockWith ("", ["haskell","literate"], []) $ T.intercalate "\n" lns -latexCodeBlock :: Monad m => ParserT Sources st m [Text] +latexCodeBlock :: Monad m => ParsecT Sources st m [Text] latexCodeBlock = try $ do try (latexBlockLine "\\begin{code}") many1Till anyLine (try $ latexBlockLine "\\end{code}") where latexBlockLine s = skipMany spaceChar >> string s >> blankline -birdCodeBlock :: Monad m => ParserT Sources st m [Text] +birdCodeBlock :: Monad m => ParsecT Sources st m [Text] birdCodeBlock = filterSpace <$> many1 birdTrackLine where filterSpace lns = -- if (as is normal) there is always a space after >, drop it @@ -432,7 +433,7 @@ birdCodeBlock = filterSpace <$> many1 birdTrackLine then map (T.drop 1) lns else lns -birdTrackLine :: Monad m => ParserT Sources st m Text +birdTrackLine :: Monad m => ParsecT Sources st m Text birdTrackLine = char '>' >> anyLine -- @@ -509,7 +510,7 @@ definitionList :: PandocMonad m => RSTParser m Blocks definitionList = B.definitionList <$> many1 definitionListItem -- parses bullet list start and returns its length (inc. following whitespace) -bulletListStart :: Monad m => ParserT Sources st m Int +bulletListStart :: Monad m => ParsecT Sources st m Int bulletListStart = try $ do notFollowedBy' hrule -- because hrules start out just like lists marker <- oneOf bulletListMarkers @@ -871,7 +872,7 @@ csvTableDirective top fields rawcsv = do let res = parseCSV opts rawcsv' case (<>) <$> header' <*> res of Left e -> - throwError $ PandocParsecError "csv table" e + throwError $ fromParsecError (toSources rawcsv') e Right rawrows -> do let singleParaToPlain bs = case B.toList bs of @@ -1106,7 +1107,7 @@ quotedReferenceName = try $ do -- plus isolated (no two adjacent) internal hyphens, underscores, -- periods, colons and plus signs; no whitespace or other characters -- are allowed. -simpleReferenceName :: Monad m => ParserT Sources st m Text +simpleReferenceName :: Monad m => ParsecT Sources st m Text simpleReferenceName = do x <- alphaNum xs <- many $ alphaNum @@ -1125,7 +1126,7 @@ referenceKey = do -- return enough blanks to replace key return $ T.replicate (sourceLine endPos - sourceLine startPos) "\n" -targetURI :: Monad m => ParserT Sources st m Text +targetURI :: Monad m => ParsecT Sources st m Text targetURI = do skipSpaces optional $ try $ newline >> notFollowedBy blankline @@ -1253,13 +1254,13 @@ headerBlock = do -- - ensure that rightmost column span does not need to reach end -- - require at least 2 columns -dashedLine :: Monad m => Char -> ParserT Sources st m (Int, Int) +dashedLine :: Monad m => Char -> ParsecT Sources st m (Int, Int) dashedLine ch = do dashes <- many1 (char ch) sp <- many (char ' ') return (length dashes, length $ dashes ++ sp) -simpleDashedLines :: Monad m => Char -> ParserT Sources st m [(Int,Int)] +simpleDashedLines :: Monad m => Char -> ParsecT Sources st m [(Int,Int)] simpleDashedLines ch = try $ many1 (dashedLine ch) -- Parse a table row separator @@ -1383,7 +1384,7 @@ hyphens = do -- don't want to treat endline after hyphen or dash as a space return $ B.str result -escapedChar :: Monad m => ParserT Sources st m Inlines +escapedChar :: Monad m => ParsecT Sources st m Inlines escapedChar = do c <- escaped anyChar return $ if c == ' ' || c == '\n' || c == '\r' -- '\ ' is null in RST diff --git a/src/Text/Pandoc/Readers/RTF.hs b/src/Text/Pandoc/Readers/RTF.hs index c29a33b32f24..5d09ac26cd9e 100644 --- a/src/Text/Pandoc/Readers/RTF.hs +++ b/src/Text/Pandoc/Readers/RTF.hs @@ -184,7 +184,7 @@ instance Default Properties where , gInTable = False } -type RTFParser m = ParserT Sources RTFState m +type RTFParser m = ParsecT Sources RTFState m data ListType = Bullet | Ordered ListAttributes deriving (Show, Eq) @@ -251,7 +251,7 @@ tok = do hyph <- option False $ True <$ char '-' rest <- many digit if null rest - then return $! Nothing + then return Nothing else do let pstr = T.pack rest case TR.decimal pstr of @@ -259,7 +259,7 @@ tok = do return $! Just $! if hyph then (-1) * i else i - _ -> return $! Nothing + _ -> return Nothing hexVal = do char '\'' x <- hexDigit diff --git a/src/Text/Pandoc/Readers/Roff.hs b/src/Text/Pandoc/Readers/Roff.hs index 47f16ef4b065..429b90ce16ca 100644 --- a/src/Text/Pandoc/Readers/Roff.hs +++ b/src/Text/Pandoc/Readers/Roff.hs @@ -121,16 +121,16 @@ instance Default RoffState where , afterConditional = False } -type RoffLexer m = ParserT Sources RoffState m +type RoffLexer m = ParsecT Sources RoffState m -- -- Lexer: T.Text -> RoffToken -- -eofline :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s u m () +eofline :: (Stream s m Char, UpdateSourcePos s Char) => ParsecT s u m () eofline = void newline <|> eof <|> () <$ lookAhead (string "\\}") -spacetab :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s u m Char +spacetab :: (Stream s m Char, UpdateSourcePos s Char) => ParsecT s u m Char spacetab = char ' ' <|> char '\t' characterCodeMap :: M.Map T.Text Char @@ -503,7 +503,7 @@ lexConditional mname = do expression :: PandocMonad m => RoffLexer m (Maybe Bool) expression = do - raw <- charsInBalanced '(' ')' (satisfy (/= '\n')) + raw <- charsInBalanced '(' ')' (T.singleton <$> (satisfy (/= '\n'))) <|> many1Char nonspaceChar returnValue $ case raw of diff --git a/src/Text/Pandoc/Readers/TWiki.hs b/src/Text/Pandoc/Readers/TWiki.hs index 7ce4e593c3e8..0fb54120b062 100644 --- a/src/Text/Pandoc/Readers/TWiki.hs +++ b/src/Text/Pandoc/Readers/TWiki.hs @@ -26,7 +26,7 @@ import qualified Text.Pandoc.Builder as B import Text.Pandoc.Class.PandocMonad (PandocMonad (..)) import Text.Pandoc.Definition import Text.Pandoc.Options -import Text.Pandoc.Parsing hiding (enclosed, nested) +import Text.Pandoc.Parsing hiding (enclosed) import Text.Pandoc.Readers.HTML (htmlTag, isCommentTag) import Text.Pandoc.Shared (tshow) import Text.Pandoc.XML (fromEntities) @@ -43,7 +43,7 @@ readTWiki opts s = do Left e -> throwError e Right d -> return d -type TWParser = ParserT Sources ParserState +type TWParser = ParsecT Sources ParserState -- -- utility functions @@ -52,15 +52,6 @@ type TWParser = ParserT Sources ParserState tryMsg :: Text -> TWParser m a -> TWParser m a tryMsg msg p = try p T.unpack msg -nested :: PandocMonad m => TWParser m a -> TWParser m a -nested p = do - nestlevel <- stateMaxNestingLevel <$> getState - guard $ nestlevel > 0 - updateState $ \st -> st{ stateMaxNestingLevel = stateMaxNestingLevel st - 1 } - res <- p - updateState $ \st -> st{ stateMaxNestingLevel = nestlevel } - return res - htmlElement :: PandocMonad m => Text -> TWParser m (Attr, Text) htmlElement tag = tryMsg tag $ do (TagOpen _ attr, _) <- htmlTag (~== TagOpen tag []) @@ -85,7 +76,7 @@ parseHtmlContentWithAttrs tag parser = do parsedContent <- try $ parseContent content return (attr, parsedContent) where - parseContent = parseFromString' $ nested $ manyTill parser endOfContent + parseContent = parseFromString' $ manyTill parser endOfContent endOfContent = try $ skipMany blankline >> skipSpaces >> eof parseCharHtmlContentWithAttrs :: PandocMonad m @@ -402,7 +393,7 @@ nestedInlines :: (Show a, PandocMonad m) nestedInlines end = innerSpace <|> nestedInline where innerSpace = try $ whitespace <* notFollowedBy end - nestedInline = notFollowedBy whitespace >> nested inline + nestedInline = notFollowedBy whitespace >> inline strong :: PandocMonad m => TWParser m B.Inlines strong = try $ B.strong <$> enclosed (char '*') nestedInlines @@ -456,7 +447,7 @@ autoLink = try $ do | otherwise = isAlphaNum c str :: PandocMonad m => TWParser m B.Inlines -str = B.str <$> (many1Char alphaNum <|> countChar 1 characterReference) +str = B.str <$> (many1Char alphaNum <|> characterReference) nop :: PandocMonad m => TWParser m B.Inlines nop = try $ (void exclamation <|> void nopTag) >> followContent diff --git a/src/Text/Pandoc/Readers/Textile.hs b/src/Text/Pandoc/Readers/Textile.hs index 45ef1e2607f4..8922d2b353cf 100644 --- a/src/Text/Pandoc/Readers/Textile.hs +++ b/src/Text/Pandoc/Readers/Textile.hs @@ -67,7 +67,7 @@ readTextile opts s = do Right result -> return result Left e -> throwError e -type TextileParser = ParserT Sources ParserState +type TextileParser = ParsecT Sources ParserState -- | Generate a Pandoc ADT from a textile document parseTextile :: PandocMonad m => TextileParser m Pandoc @@ -445,7 +445,7 @@ inlineParsers = [ str , link , image , mark - , B.str . T.singleton <$> characterReference + , B.str <$> characterReference , smartPunctuation inline , symbol ] @@ -681,9 +681,9 @@ langAttr = do -- | Parses material surrounded by a parser. surrounded :: (PandocMonad m, Show t) - => ParserT Sources st m t -- ^ surrounding parser - -> ParserT Sources st m a -- ^ content parser (to be used repeatedly) - -> ParserT Sources st m [a] + => ParsecT Sources st m t -- ^ surrounding parser + -> ParsecT Sources st m a -- ^ content parser (to be used repeatedly) + -> ParsecT Sources st m [a] surrounded border = enclosed (border *> notFollowedBy (oneOf " \t\n\r")) (try border) @@ -713,5 +713,5 @@ groupedInlineMarkup = try $ do char ']' return $ sp1 <> result <> sp2 -eof' :: Monad m => ParserT Sources s m Char +eof' :: Monad m => ParsecT Sources s m Char eof' = '\n' <$ eof diff --git a/src/Text/Pandoc/Readers/TikiWiki.hs b/src/Text/Pandoc/Readers/TikiWiki.hs index 2275646b9094..b4f8a214e46b 100644 --- a/src/Text/Pandoc/Readers/TikiWiki.hs +++ b/src/Text/Pandoc/Readers/TikiWiki.hs @@ -29,7 +29,7 @@ import Text.Pandoc.Class.PandocMonad (PandocMonad (..)) import Text.Pandoc.Definition import Text.Pandoc.Logging (Verbosity (..)) import Text.Pandoc.Options -import Text.Pandoc.Parsing hiding (enclosed, nested) +import Text.Pandoc.Parsing hiding (enclosed) import Text.Pandoc.Shared (safeRead) import Text.Pandoc.XML (fromEntities) import Text.Printf (printf) @@ -46,7 +46,7 @@ readTikiWiki opts s = do Left e -> throwError e Right d -> return d -type TikiWikiParser = ParserT Sources ParserState +type TikiWikiParser = ParsecT Sources ParserState -- -- utility functions @@ -58,15 +58,6 @@ tryMsg msg p = try p T.unpack msg skip :: TikiWikiParser m a -> TikiWikiParser m () skip parser = Control.Monad.void parser -nested :: PandocMonad m => TikiWikiParser m a -> TikiWikiParser m a -nested p = do - nestlevel <- stateMaxNestingLevel <$> getState - guard $ nestlevel > 0 - updateState $ \st -> st{ stateMaxNestingLevel = stateMaxNestingLevel st - 1 } - res <- p - updateState $ \st -> st{ stateMaxNestingLevel = nestlevel } - return res - -- -- main parser -- @@ -450,7 +441,7 @@ nestedInlines :: (Show a, PandocMonad m) => TikiWikiParser m a -> TikiWikiParser nestedInlines end = innerSpace <|> nestedInline where innerSpace = try $ whitespace <* notFollowedBy end - nestedInline = notFollowedBy whitespace >> nested inline + nestedInline = notFollowedBy whitespace >> inline -- {img attId="39" imalign="right" link="http://info.tikiwiki.org" alt="Panama Hat"} -- @@ -594,7 +585,7 @@ noparse = try $ do return $ B.str $ T.pack body str :: PandocMonad m => TikiWikiParser m B.Inlines -str = fmap B.str (T.pack <$> many1 alphaNum <|> countChar 1 characterReference) +str = fmap B.str (T.pack <$> many1 alphaNum <|> characterReference) symbol :: PandocMonad m => TikiWikiParser m B.Inlines symbol = fmap B.str (countChar 1 nonspaceChar) diff --git a/src/Text/Pandoc/Readers/Txt2Tags.hs b/src/Text/Pandoc/Readers/Txt2Tags.hs index b5cf5a0f3588..6e399434256d 100644 --- a/src/Text/Pandoc/Readers/Txt2Tags.hs +++ b/src/Text/Pandoc/Readers/Txt2Tags.hs @@ -33,9 +33,10 @@ import Data.Time (defaultTimeLocale) import Text.Pandoc.Definition import Text.Pandoc.Options import Text.Pandoc.Parsing hiding (space, spaces, uri) -import Text.Pandoc.Shared (compactify, compactifyDL, escapeURI) +import Text.Pandoc.Shared (compactify, compactifyDL) +import Text.Pandoc.URI (escapeURI) -type T2T = ParserT Sources ParserState (Reader T2TMeta) +type T2T = ParsecT Sources ParserState (Reader T2TMeta) -- | An object for the T2T macros meta information -- the contents of each field is simply substituted verbatim into the file @@ -402,7 +403,7 @@ tagged = do target <- getTarget inlineMarkup (T.singleton <$> anyChar) (B.rawInline target) '\'' id --- Parser for markup indicated by a double character. +-- Parsec for markup indicated by a double character. -- Inline markup is greedy and glued -- Greedy meaning ***a*** = Bold [Str "*a*"] -- Glued meaning that markup must be tight to content diff --git a/src/Text/Pandoc/Readers/Vimwiki.hs b/src/Text/Pandoc/Readers/Vimwiki.hs index 794993ef48dd..5683de9a6960 100644 --- a/src/Text/Pandoc/Readers/Vimwiki.hs +++ b/src/Text/Pandoc/Readers/Vimwiki.hs @@ -70,19 +70,19 @@ import Text.Pandoc.Definition (Attr, Block (BulletList, OrderedList), ListNumberStyle (..), Pandoc (..), nullMeta) import Text.Pandoc.Options (ReaderOptions) -import Text.Pandoc.Parsing (ParserState, ParserT, blanklines, emailAddress, +import Text.Pandoc.Parsing (ParserState, ParsecT, blanklines, emailAddress, many1Till, orderedListMarker, readWithM, registerHeader, spaceChar, stateMeta, stateOptions, uri, manyTillChar, manyChar, textStr, many1Char, countChar, many1TillChar, alphaNum, anyChar, char, newline, noneOf, oneOf, - space, spaces, string) + space, spaces, string, choice, eof, lookAhead, + many1, many, manyTill, notFollowedBy, + skipMany1, try, option, + updateState, getState, (<|>)) import Text.Pandoc.Sources (ToSources(..), Sources) -import Text.Pandoc.Shared (splitTextBy, stringify, stripFirstAndLast, - isURI, tshow) -import Text.Parsec.Combinator (between, choice, eof, lookAhead, many1, - manyTill, notFollowedBy, option, skipMany1) -import Text.Parsec.Prim (getState, many, try, updateState, (<|>)) +import Text.Pandoc.Shared (splitTextBy, stringify, stripFirstAndLast, tshow) +import Text.Pandoc.URI (isURI) readVimwiki :: (PandocMonad m, ToSources a) => ReaderOptions @@ -95,7 +95,7 @@ readVimwiki opts s = do Left e -> throwError e Right result -> return result -type VwParser = ParserT Sources ParserState +type VwParser = ParsecT Sources ParserState -- constants @@ -345,8 +345,9 @@ blocksThenInline = try $ do listTodoMarker :: PandocMonad m => VwParser m Inlines listTodoMarker = try $ do - x <- between (many spaceChar >> char '[') (char ']' >> spaceChar) - (oneOf " .oOX") + x <- (many spaceChar >> char '[') + *> oneOf " .oOX" + <* (char ']' >> spaceChar) return $ makeListMarkerSpan x makeListMarkerSpan :: Char -> Inlines @@ -506,7 +507,7 @@ bareURL = try $ do strong :: PandocMonad m => VwParser m Inlines strong = try $ do - s <- lookAhead $ between (char '*') (char '*') (many1 $ noneOf "*") + s <- lookAhead $ char '*' *> many1 (noneOf "*") <* char '*' guard $ (head s `notElem` spaceChars) && (last s `notElem` spaceChars) char '*' @@ -520,7 +521,7 @@ makeId i = T.concat (stringify <$> toList i) emph :: PandocMonad m => VwParser m Inlines emph = try $ do - s <- lookAhead $ between (char '_') (char '_') (many1 $ noneOf "_") + s <- lookAhead $ char '_' *> many1 (noneOf "_") <* char '_' guard $ (head s `notElem` spaceChars) && (last s `notElem` spaceChars) char '_' diff --git a/src/Text/Pandoc/Scripting.hs b/src/Text/Pandoc/Scripting.hs new file mode 100644 index 000000000000..29027dd6d6a2 --- /dev/null +++ b/src/Text/Pandoc/Scripting.hs @@ -0,0 +1,58 @@ +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE ImpredicativeTypes #-} +{-# LANGUAGE ScopedTypeVariables #-} +{- | +Module : Text.Pandoc.Scripting +Copyright : © 2022 Albert Krewinkel +License : GPL-2.0-or-later +Maintainer : Albert Krewinkel + +Central data structure for scripting engines. +-} +module Text.Pandoc.Scripting + ( ScriptingEngine (..) + , noEngine + ) +where + +import Control.Monad.Except (throwError) +import Control.Monad.IO.Class (MonadIO) +import Data.Text (Text) +import Text.Pandoc.Class.PandocMonad (PandocMonad) +import Text.Pandoc.Definition (Pandoc) +import Text.Pandoc.Error (PandocError (PandocNoScriptingEngine)) +import Text.Pandoc.Filter.Environment (Environment) +import Text.Pandoc.Format (ExtensionsConfig) +import Text.Pandoc.Templates (Template) +import Text.Pandoc.Readers (Reader) +import Text.Pandoc.Writers (Writer) + +-- | Structure to define a scripting engine. +data ScriptingEngine = ScriptingEngine + { engineName :: Text -- ^ Name of the engine. + + , engineApplyFilter :: forall m. (PandocMonad m, MonadIO m) + => Environment -> [String] -> FilePath + -> Pandoc -> m Pandoc + -- ^ Use the scripting engine to run a filter. + + , engineReadCustom :: forall m. (PandocMonad m, MonadIO m) + => FilePath -> m (Reader m, ExtensionsConfig) + -- ^ Function to parse input into a 'Pandoc' document. + + , engineWriteCustom :: forall m. (PandocMonad m, MonadIO m) + => FilePath + -> m (Writer m, ExtensionsConfig, m (Template Text)) + -- ^ Invoke the given script file to convert to any custom format. + } + +noEngine :: ScriptingEngine +noEngine = ScriptingEngine + { engineName = "none" + , engineApplyFilter = \_env _args _fp _doc -> + throwError PandocNoScriptingEngine + , engineReadCustom = \_fp -> + throwError PandocNoScriptingEngine + , engineWriteCustom = \_fp -> + throwError PandocNoScriptingEngine + } diff --git a/src/Text/Pandoc/SelfContained.hs b/src/Text/Pandoc/SelfContained.hs index a17f9338b6c3..721ad591a611 100644 --- a/src/Text/Pandoc/SelfContained.hs +++ b/src/Text/Pandoc/SelfContained.hs @@ -32,11 +32,12 @@ import Text.Pandoc.Class.PandocMonad (PandocMonad (..), fetchItem, import Text.Pandoc.Logging import Text.Pandoc.Error (PandocError(..)) import Text.Pandoc.MIME (MimeType) -import Text.Pandoc.Shared (isURI, renderTags', trim, tshow) +import Text.Pandoc.Shared (renderTags', trim, tshow) +import Text.Pandoc.URI (isURI) import Text.Pandoc.UTF8 (toString, toText, fromText) -import Text.Parsec (ParsecT, runParserT) +import Text.Pandoc.Parsing (ParsecT, runParserT) +import qualified Text.Pandoc.Parsing as P import Control.Monad.Except (throwError, catchError) -import qualified Text.Parsec as P isOk :: Char -> Bool isOk c = isAscii c && isAlphaNum c diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs index 35a854bf6083..3efe8a538477 100644 --- a/src/Text/Pandoc/Shared.hs +++ b/src/Text/Pandoc/Shared.hs @@ -22,13 +22,9 @@ module Text.Pandoc.Shared ( splitBy, splitTextBy, splitTextByIndices, - ordNub, - findM, -- * Text processing inquotes, tshow, - elemText, - notElemText, stripTrailingNewlines, trim, triml, @@ -38,9 +34,7 @@ module Text.Pandoc.Shared ( camelCaseToHyphenated, camelCaseStrToHyphenated, toRomanNumeral, - escapeURI, tabFilter, - crFilter, -- * Date/time normalizeDate, -- * Pandoc block and inline list processing @@ -48,7 +42,6 @@ module Text.Pandoc.Shared ( extractSpaces, removeFormatting, deNote, - deLink, stringify, capitalize, compactify, @@ -57,6 +50,7 @@ module Text.Pandoc.Shared ( makeSections, uniqueIdent, inlineListToIdentifier, + textToIdentifier, isHeaderBlock, headerShift, stripEmptyParagraphs, @@ -66,7 +60,6 @@ module Text.Pandoc.Shared ( taskListItemToAscii, handleTaskListItem, addMetaField, - makeMeta, eastAsianLineBreakFilter, htmlSpanLikeElements, filterIpynbOutput, @@ -75,14 +68,9 @@ module Text.Pandoc.Shared ( renderTags', -- * File handling inDirectory, + makeCanonical, collapseFilePath, - uriPathToPath, filteredFilesFromArchive, - -- * URI handling - schemes, - isURI, - -- * Error handling - mapLeft, -- * for squashing blocks blocksToInlines, blocksToInlines', @@ -90,11 +78,7 @@ module Text.Pandoc.Shared ( defaultBlocksSeparator, -- * Safe read safeRead, - safeStrRead, - -- * User data directory - defaultUserDataDir, - -- * Version - pandocVersion + safeStrRead ) where import Codec.Archive.Zip @@ -102,11 +86,10 @@ import qualified Control.Exception as E import Control.Monad (MonadPlus (..), msum, unless) import qualified Control.Monad.State.Strict as S import qualified Data.ByteString.Lazy as BL -import qualified Data.Bifunctor as Bifunctor +import Data.Containers.ListUtils (nubOrd) import Data.Char (isAlpha, isLower, isSpace, isUpper, toLower, isAlphaNum, generalCategory, GeneralCategory(NonSpacingMark, SpacingCombiningMark, EnclosingMark, ConnectorPunctuation)) -import Data.Containers.ListUtils (nubOrd) import Data.List (find, intercalate, intersperse, sortOn, foldl', groupBy) import qualified Data.Map as M import Data.Maybe (mapMaybe, fromMaybe) @@ -114,9 +97,6 @@ import Data.Monoid (Any (..)) import Data.Sequence (ViewL (..), ViewR (..), viewl, viewr) import qualified Data.Set as Set import qualified Data.Text as T -import Data.Version (showVersion) -import Network.URI (URI (uriScheme), escapeURIString, parseURI) -import Paths_pandoc (version) import System.Directory import System.FilePath (isPathSeparator, splitDirectories) import qualified System.FilePath.Posix as Posix @@ -132,10 +112,6 @@ import Text.Pandoc.Generic (bottomUp) import Text.DocLayout (charWidth) import Text.Pandoc.Walk --- | Version number of pandoc library. -pandocVersion :: T.Text -pandocVersion = T.pack $ showVersion version - -- -- List processing -- @@ -176,23 +152,6 @@ splitAt' n xs | n <= 0 = ([],xs) splitAt' n (x:xs) = (x:ys,zs) where (ys,zs) = splitAt' (n - charWidth x) xs --- | Remove duplicates from a list. -ordNub :: (Ord a) => [a] -> [a] -ordNub = nubOrd -{-# INLINE ordNub #-} - --- | Returns the first element in a foldable structure for that the --- monadic predicate holds true, and @Nothing@ if no such element --- exists. -findM :: forall m t a. (Monad m, Foldable t) - => (a -> m Bool) -> t a -> m (Maybe a) -findM p = foldr go (pure Nothing) - where - go :: a -> m (Maybe a) -> m (Maybe a) - go x acc = do - b <- p x - if b then pure (Just x) else acc - -- -- Text processing -- @@ -205,14 +164,6 @@ inquotes txt = T.cons '\"' (T.snoc txt '\"') tshow :: Show a => a -> T.Text tshow = T.pack . show --- | @True@ exactly when the @Char@ appears in the @Text@. -elemText :: Char -> T.Text -> Bool -elemText c = T.any (== c) - --- | @True@ exactly when the @Char@ does not appear in the @Text@. -notElemText :: Char -> T.Text -> Bool -notElemText c = T.all (/= c) - -- | Strip trailing newlines from string. stripTrailingNewlines :: T.Text -> T.Text stripTrailingNewlines = T.dropWhileEnd (== '\n') @@ -295,12 +246,6 @@ toRomanNumeral x | x >= 1 = "I" <> toRomanNumeral (x - 1) | otherwise = "" --- | Escape whitespace and some punctuation characters in URI. -escapeURI :: T.Text -> T.Text -escapeURI = T.pack . escapeURIString (not . needsEscaping) . T.unpack - where needsEscaping c = isSpace c || c `elemText` "<>|\"{}[]^`" - - -- | Convert tabs to spaces. Tabs will be preserved if tab stop is set to 0. tabFilter :: Int -- ^ Tab stop -> T.Text -- ^ Input @@ -315,11 +260,6 @@ tabFilter tabStop = T.unlines . map go . T.lines (tabStop - (T.length s1 `mod` tabStop)) (T.pack " ") <> go (T.drop 1 s2) -{-# DEPRECATED crFilter "readers filter crs automatically" #-} --- | Strip out DOS line endings. -crFilter :: T.Text -> T.Text -crFilter = T.filter (/= '\r') - -- -- Date/time -- @@ -401,12 +341,6 @@ deNote :: Inline -> Inline deNote (Note _) = Str "" deNote x = x --- {- DEPRECATED deLink "deLink will be removed in a future version" -} --- | Turns links into spans, keeping just the link text. -deLink :: Inline -> Inline -deLink (Link _ ils _) = Span nullAttr ils -deLink x = x - -- | Convert pandoc structure to a string with formatting removed. -- Footnotes are skipped (since we don't want their contents in link -- labels). @@ -497,12 +431,10 @@ isPara :: Block -> Bool isPara (Para _) = True isPara _ = False --- | Convert Pandoc inline list to plain text identifier. HTML --- identifiers must start with a letter, and may contain only --- letters, digits, and the characters _-. +-- | Convert Pandoc inline list to plain text identifier. inlineListToIdentifier :: Extensions -> [Inline] -> T.Text inlineListToIdentifier exts = - dropNonLetter . filterAscii . toIdent . stringify . walk unEmojify + textToIdentifier exts . stringify . walk unEmojify where unEmojify :: [Inline] -> [Inline] unEmojify @@ -511,6 +443,12 @@ inlineListToIdentifier exts = | otherwise = id unEmoji (Span ("",["emoji"],[("data-emoji",ename)]) _) = Str ename unEmoji x = x + +-- | Convert string to plain text identifier. +textToIdentifier :: Extensions -> T.Text -> T.Text +textToIdentifier exts = + dropNonLetter . filterAscii . toIdent + where dropNonLetter | extensionEnabled Ext_gfm_auto_identifiers exts = id | otherwise = T.dropWhile (not . isAlpha) @@ -597,7 +535,7 @@ makeSections numbering mbBaseLevel bs = combineAttr :: Attr -> Attr -> Attr combineAttr (id1, classes1, kvs1) (id2, classes2, kvs2) = (if T.null id1 then id2 else id1, - ordNub (classes1 ++ classes2), + nubOrd (classes1 ++ classes2), foldr (\(k,v) kvs -> case lookup k kvs of Nothing -> (k,v):kvs Just _ -> kvs) mempty (kvs1 ++ kvs2)) @@ -715,14 +653,6 @@ addMetaField key val (Meta meta) = tolist (MetaList ys) = ys tolist y = [y] --- | Create 'Meta' from old-style title, authors, date. This is --- provided to ease the transition from the old API. -makeMeta :: [Inline] -> [[Inline]] -> [Inline] -> Meta -makeMeta title authors date = - addMetaField "title" (B.fromList title) - $ addMetaField "author" (map B.fromList authors) - $ addMetaField "date" (B.fromList date) nullMeta - -- | Remove soft breaks between East Asian characters. eastAsianLineBreakFilter :: Pandoc -> Pandoc eastAsianLineBreakFilter = bottomUp go @@ -825,12 +755,14 @@ inDirectory path action = E.bracket setCurrentDirectory (const $ setCurrentDirectory path >> action) --- --- Error reporting --- - -mapLeft :: (a -> b) -> Either a c -> Either b c -mapLeft = Bifunctor.first +-- | Canonicalizes a file path by removing redundant @.@ and @..@. +makeCanonical :: FilePath -> FilePath +makeCanonical = Posix.joinPath . transformPathParts . splitDirectories + where transformPathParts = reverse . foldl' go [] + go as "." = as + go ("..":as) ".." = ["..", ".."] <> as + go (_:as) ".." = as + go as x = x : as -- | Remove intermediate "." and ".." directories from a path. -- @@ -856,19 +788,6 @@ collapseFilePath = Posix.joinPath . reverse . foldl' go [] . splitDirectories isSingleton _ = Nothing checkPathSeperator = fmap isPathSeparator . isSingleton --- | Converts the path part of a file: URI to a regular path. --- On windows, @/c:/foo@ should be @c:/foo@. --- On linux, @/foo@ should be @/foo@. -uriPathToPath :: T.Text -> FilePath -uriPathToPath (T.unpack -> path) = -#ifdef _WINDOWS - case path of - '/':ps -> ps - ps -> ps -#else - path -#endif - -- -- File selection from the archive -- @@ -879,70 +798,6 @@ filteredFilesFromArchive zf f = fileAndBinary :: Archive -> FilePath -> Maybe (FilePath, BL.ByteString) fileAndBinary a fp = findEntryByPath fp a >>= \e -> Just (fp, fromEntry e) --- --- IANA URIs --- - --- | Schemes from http://www.iana.org/assignments/uri-schemes.html plus --- the unofficial schemes doi, javascript, isbn, pmid. -schemes :: Set.Set T.Text -schemes = Set.fromList - -- Official IANA schemes - [ "aaa", "aaas", "about", "acap", "acct", "acr", "adiumxtra", "afp", "afs" - , "aim", "appdata", "apt", "attachment", "aw", "barion", "beshare", "bitcoin" - , "blob", "bolo", "browserext", "callto", "cap", "chrome", "chrome-extension" - , "cid", "coap", "coaps", "com-eventbrite-attendee", "content", "crid", "cvs" - , "data", "dav", "dict", "dis", "dlna-playcontainer", "dlna-playsingle" - , "dns", "dntp", "dtn", "dvb", "ed2k", "example", "facetime", "fax", "feed" - , "feedready", "file", "filesystem", "finger", "fish", "ftp", "geo", "gg" - , "git", "gizmoproject", "go", "gopher", "graph", "gtalk", "h323", "ham" - , "hcp", "http", "https", "hxxp", "hxxps", "hydrazone", "iax", "icap", "icon" - , "im", "imap", "info", "iotdisco", "ipn", "ipp", "ipps", "irc", "irc6" - , "ircs", "iris", "iris.beep", "iris.lwz", "iris.xpc", "iris.xpcs" - , "isostore", "itms", "jabber", "jar", "jms", "keyparc", "lastfm", "ldap" - , "ldaps", "lvlt", "magnet", "mailserver", "mailto", "maps", "market" - , "message", "mid", "mms", "modem", "mongodb", "moz", "ms-access" - , "ms-browser-extension", "ms-drive-to", "ms-enrollment", "ms-excel" - , "ms-gamebarservices", "ms-getoffice", "ms-help", "ms-infopath" - , "ms-media-stream-id", "ms-officeapp", "ms-project", "ms-powerpoint" - , "ms-publisher", "ms-search-repair", "ms-secondary-screen-controller" - , "ms-secondary-screen-setup", "ms-settings", "ms-settings-airplanemode" - , "ms-settings-bluetooth", "ms-settings-camera", "ms-settings-cellular" - , "ms-settings-cloudstorage", "ms-settings-connectabledevices" - , "ms-settings-displays-topology", "ms-settings-emailandaccounts" - , "ms-settings-language", "ms-settings-location", "ms-settings-lock" - , "ms-settings-nfctransactions", "ms-settings-notifications" - , "ms-settings-power", "ms-settings-privacy", "ms-settings-proximity" - , "ms-settings-screenrotation", "ms-settings-wifi", "ms-settings-workplace" - , "ms-spd", "ms-sttoverlay", "ms-transit-to", "ms-virtualtouchpad" - , "ms-visio", "ms-walk-to", "ms-whiteboard", "ms-whiteboard-cmd", "ms-word" - , "msnim", "msrp", "msrps", "mtqp", "mumble", "mupdate", "mvn", "news", "nfs" - , "ni", "nih", "nntp", "notes", "ocf", "oid", "onenote", "onenote-cmd" - , "opaquelocktoken", "pack", "palm", "paparazzi", "pkcs11", "platform", "pop" - , "pres", "prospero", "proxy", "pwid", "psyc", "qb", "query", "redis" - , "rediss", "reload", "res", "resource", "rmi", "rsync", "rtmfp", "rtmp" - , "rtsp", "rtsps", "rtspu", "secondlife", "service", "session", "sftp", "sgn" - , "shttp", "sieve", "sip", "sips", "skype", "smb", "sms", "smtp", "snews" - , "snmp", "soap.beep", "soap.beeps", "soldat", "spotify", "ssh", "steam" - , "stun", "stuns", "submit", "svn", "tag", "teamspeak", "tel", "teliaeid" - , "telnet", "tftp", "things", "thismessage", "tip", "tn3270", "tool", "turn" - , "turns", "tv", "udp", "unreal", "urn", "ut2004", "v-event", "vemmi" - , "ventrilo", "videotex", "vnc", "view-source", "wais", "webcal", "wpid" - , "ws", "wss", "wtai", "wyciwyg", "xcon", "xcon-userid", "xfire" - , "xmlrpc.beep", "xmlrpc.beeps", "xmpp", "xri", "ymsgr", "z39.50", "z39.50r" - , "z39.50s" - -- Unofficial schemes - , "doi", "isbn", "javascript", "pmid" - ] - --- | Check if the string is a valid URL with a IANA or frequently used but --- unofficial scheme (see @schemes@). -isURI :: T.Text -> Bool -isURI = maybe False hasKnownScheme . parseURI . T.unpack - where - hasKnownScheme = (`Set.member` schemes) . T.toLower . - T.filter (/= ':') . T.pack . uriScheme - --- --- Squash blocks into inlines --- @@ -1006,24 +861,3 @@ safeStrRead s = case reads s of (d,x):_ | all isSpace x -> return d _ -> mzero --- --- User data directory --- - --- | Return appropriate user data directory for platform. We use --- XDG_DATA_HOME (or its default value), but for backwards compatibility, --- we fall back to the legacy user data directory ($HOME/.pandoc on *nix) --- if the XDG_DATA_HOME is missing and this exists. If neither directory --- is present, we return the XDG data directory. If the XDG data directory --- is not defined (e.g. because we are in an environment where $HOME is --- not defined), we return the empty string. -defaultUserDataDir :: IO FilePath -defaultUserDataDir = do - xdgDir <- E.catch (getXdgDirectory XdgData "pandoc") - (\(_ :: E.SomeException) -> return mempty) - legacyDir <- getAppUserDataDirectory "pandoc" - xdgExists <- doesDirectoryExist xdgDir - legacyDirExists <- doesDirectoryExist legacyDir - if not xdgExists && legacyDirExists - then return legacyDir - else return xdgDir diff --git a/src/Text/Pandoc/Sources.hs b/src/Text/Pandoc/Sources.hs index f17e566c9597..69dc42812474 100644 --- a/src/Text/Pandoc/Sources.hs +++ b/src/Text/Pandoc/Sources.hs @@ -46,6 +46,8 @@ import Text.Parsec (Stream(..), ParsecT) import Text.Parsec.Pos as P import Data.Text (Text) import qualified Data.Text as T +import qualified Data.ByteString as BS +import qualified Data.ByteString.Lazy as BL import Data.Char (isSpace, isLetter, isAlphaNum, isDigit, isHexDigit) import Data.String (IsString(..)) import qualified Data.List.NonEmpty as NonEmpty @@ -112,6 +114,15 @@ class UpdateSourcePos s c where instance UpdateSourcePos Text Char where updateSourcePos pos c _ = updatePosChar pos c +instance UpdateSourcePos [Char] Char where + updateSourcePos pos c _ = updatePosChar pos c + +instance UpdateSourcePos BS.ByteString Char where + updateSourcePos pos c _ = updatePosChar pos c + +instance UpdateSourcePos BL.ByteString Char where + updateSourcePos pos c _ = updatePosChar pos c + instance UpdateSourcePos Sources Char where updateSourcePos pos c sources = case sources of diff --git a/src/Text/Pandoc/Readers/LaTeX/Types.hs b/src/Text/Pandoc/TeX.hs similarity index 69% rename from src/Text/Pandoc/Readers/LaTeX/Types.hs rename to src/Text/Pandoc/TeX.hs index ce85fc43ac4b..21b161041339 100644 --- a/src/Text/Pandoc/Readers/LaTeX/Types.hs +++ b/src/Text/Pandoc/TeX.hs @@ -1,6 +1,6 @@ {-# LANGUAGE FlexibleInstances #-} {- | - Module : Text.Pandoc.Readers.LaTeX.Types + Module : Text.Pandoc.TeX Copyright : Copyright (C) 2017-2022 John MacFarlane License : GNU GPL, version 2 or above @@ -8,19 +8,19 @@ Stability : alpha Portability : portable -Types for LaTeX tokens and macros. +Types for TeX tokens and macros. -} -module Text.Pandoc.Readers.LaTeX.Types ( Tok(..) - , TokType(..) - , Macro(..) - , ArgSpec(..) - , ExpansionPoint(..) - , MacroScope(..) - , SourcePos - ) +module Text.Pandoc.TeX ( Tok(..) + , TokType(..) + , Macro(..) + , ArgSpec(..) + , ExpansionPoint(..) + , MacroScope(..) + , SourcePos + ) where import Data.Text (Text) -import Text.Parsec.Pos (SourcePos, sourceName) +import Text.Parsec (SourcePos, sourceName) import Text.Pandoc.Sources import Data.List (groupBy) diff --git a/src/Text/Pandoc/Templates.hs b/src/Text/Pandoc/Templates.hs index e5684f29d95c..469401c97372 100644 --- a/src/Text/Pandoc/Templates.hs +++ b/src/Text/Pandoc/Templates.hs @@ -27,8 +27,9 @@ module Text.Pandoc.Templates ( Template import System.FilePath ((<.>), (), takeFileName) import Text.DocTemplates (Template, TemplateMonad(..), compileTemplate, renderTemplate) import Text.Pandoc.Class.CommonState (CommonState(..)) -import Text.Pandoc.Class.PandocMonad (PandocMonad, readDataFile, fetchItem, +import Text.Pandoc.Class.PandocMonad (PandocMonad, fetchItem, getCommonState, modifyCommonState) +import Text.Pandoc.Data (readDataFile) import qualified Text.Pandoc.UTF8 as UTF8 import Control.Monad.Except (catchError, throwError) import Data.Text (Text) @@ -80,8 +81,7 @@ getTemplate tp = UTF8.toText <$> getDefaultTemplate :: PandocMonad m => Text -- ^ Name of writer -> m Text -getDefaultTemplate writer = do - let format = T.takeWhile (`notElem` ("+-" :: String)) writer -- strip off extensions +getDefaultTemplate format = do case format of "native" -> return "" "csljson" -> return "" diff --git a/src/Text/Pandoc/Translations.hs b/src/Text/Pandoc/Translations.hs index 3755285aefd8..91bc25b9524a 100644 --- a/src/Text/Pandoc/Translations.hs +++ b/src/Text/Pandoc/Translations.hs @@ -1,6 +1,4 @@ {-# LANGUAGE CPP #-} -{-# LANGUAGE DeriveGeneric #-} -{-# LANGUAGE GeneralizedNewtypeDeriving #-} {-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Translations @@ -11,88 +9,88 @@ Stability : alpha Portability : portable -Data types for localization. - -Translations are stored in @data/translations/langname.trans@, -where langname can be the full BCP47 language specifier, or -just the language part. File format is: - -> # A comment, ignored -> Figure: Figura -> Index: Indeksi - +Functions for getting localized translations of terms. -} module Text.Pandoc.Translations ( - Term(..) - , Translations - , lookupTerm + module Text.Pandoc.Translations.Types , readTranslations + , getTranslations + , setTranslations + , translateTerm ) where -import Data.Aeson.Types (Value(..), FromJSON(..)) -import qualified Data.Aeson.Types as Aeson -import qualified Data.Map as M +import Text.Pandoc.Translations.Types +import Text.Pandoc.Class (PandocMonad(..), CommonState(..), report) +import Text.Pandoc.Data (readDataFile) +import Text.Pandoc.Error (PandocError(..)) +import Text.Pandoc.Logging (LogMessage(..)) +import Control.Monad.Except (catchError) import qualified Data.Text as T import qualified Data.Yaml as Yaml -import GHC.Generics (Generic) -import Text.Pandoc.Shared (safeRead) import qualified Text.Pandoc.UTF8 as UTF8 import Data.Yaml (prettyPrintParseException) +import Text.Collate.Lang (Lang(..), renderLang) -data Term = - Abstract - | Appendix - | Bibliography - | Cc - | Chapter - | Contents - | Encl - | Figure - | Glossary - | Index - | Listing - | ListOfFigures - | ListOfTables - | Page - | Part - | Preface - | Proof - | References - | See - | SeeAlso - | Table - | To - deriving (Show, Eq, Ord, Generic, Enum, Read) - -newtype Translations = Translations (M.Map Term T.Text) - deriving (Show, Generic, Semigroup, Monoid) - -instance FromJSON Term where - parseJSON (String t) = case safeRead t of - Just t' -> pure t' - Nothing -> Prelude.fail $ "Invalid Term name " ++ - show t - parseJSON invalid = Aeson.typeMismatch "Term" invalid - -instance FromJSON Translations where - parseJSON o@(Object{}) = do - xs <- parseJSON o >>= mapM addItem . M.toList - return $ Translations (M.fromList xs) - where addItem (k,v) = - case safeRead k of - Nothing -> Prelude.fail $ "Invalid Term name " ++ show k - Just t -> - case v of - (String s) -> return (t, T.strip s) - inv -> Aeson.typeMismatch "String" inv - parseJSON invalid = Aeson.typeMismatch "Translations" invalid - -lookupTerm :: Term -> Translations -> Maybe T.Text -lookupTerm t (Translations tm) = M.lookup t tm - +-- | Parse YAML translations. readTranslations :: T.Text -> Either T.Text Translations readTranslations s = case Yaml.decodeAllEither' $ UTF8.fromText s of Left err' -> Left $ T.pack $ prettyPrintParseException err' Right (t:_) -> Right t Right [] -> Left "empty YAML document" + +-- | Select the language to use with 'translateTerm'. +-- Note that this does not read a translation file; +-- that is only done the first time 'translateTerm' is +-- used. +setTranslations :: PandocMonad m => Lang -> m () +setTranslations lang = + modifyCommonState $ \st -> st{ stTranslations = Just (lang, Nothing) } + +-- | Load term map. +getTranslations :: PandocMonad m => m Translations +getTranslations = do + mbtrans <- getsCommonState stTranslations + case mbtrans of + Nothing -> return mempty -- no language defined + Just (_, Just t) -> return t + Just (lang, Nothing) -> do -- read from file + let translationFile = "translations/" <> renderLang lang <> ".yaml" + let fallbackFile = "translations/" <> langLanguage lang <> ".yaml" + let getTrans fp = do + bs <- readDataFile fp + case readTranslations (UTF8.toText bs) of + Left e -> do + report $ CouldNotLoadTranslations (renderLang lang) + (T.pack fp <> ": " <> e) + -- make sure we don't try again... + modifyCommonState $ \st -> + st{ stTranslations = Nothing } + return mempty + Right t -> do + modifyCommonState $ \st -> + st{ stTranslations = Just (lang, Just t) } + return t + catchError (getTrans $ T.unpack translationFile) + (\_ -> + catchError (getTrans $ T.unpack fallbackFile) + (\e -> do + report $ CouldNotLoadTranslations (renderLang lang) + $ case e of + PandocCouldNotFindDataFileError _ -> + "data file " <> fallbackFile <> " not found" + _ -> "" + -- make sure we don't try again... + modifyCommonState $ \st -> st{ stTranslations = Nothing } + return mempty)) + +-- | Get a translation from the current term map. +-- Issue a warning if the term is not defined. +translateTerm :: PandocMonad m => Term -> m T.Text +translateTerm term = do + translations <- getTranslations + case lookupTerm term translations of + Just s -> return s + Nothing -> do + report $ NoTranslation $ T.pack $ show term + return "" diff --git a/src/Text/Pandoc/Translations/Types.hs b/src/Text/Pandoc/Translations/Types.hs new file mode 100644 index 000000000000..370de92179d7 --- /dev/null +++ b/src/Text/Pandoc/Translations/Types.hs @@ -0,0 +1,88 @@ +{-# LANGUAGE CPP #-} +{-# LANGUAGE DeriveGeneric #-} +{-# LANGUAGE GeneralizedNewtypeDeriving #-} +{-# LANGUAGE OverloadedStrings #-} +{- | + Module : Text.Pandoc.Translations.Types + Copyright : Copyright (C) 2017-2022 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane + Stability : alpha + Portability : portable + +Data types for localization. + +Translations are stored in @data/translations/langname.trans@, +where langname can be the full BCP47 language specifier, or +just the language part. File format is: + +> # A comment, ignored +> Figure: Figura +> Index: Indeksi + +-} +module Text.Pandoc.Translations.Types ( + Term(..) + , Translations + , lookupTerm + ) +where +import Data.Aeson.Types (Value(..), FromJSON(..)) +import qualified Data.Aeson.Types as Aeson +import qualified Data.Map as M +import qualified Data.Text as T +import GHC.Generics (Generic) +import Text.Pandoc.Shared (safeRead) + +data Term = + Abstract + | Appendix + | Bibliography + | Cc + | Chapter + | Contents + | Encl + | Figure + | Glossary + | Index + | Listing + | ListOfFigures + | ListOfTables + | Page + | Part + | Preface + | Proof + | References + | See + | SeeAlso + | Table + | To + deriving (Show, Eq, Ord, Generic, Enum, Read) + +newtype Translations = Translations (M.Map Term T.Text) + deriving (Show, Generic, Semigroup, Monoid) + +instance FromJSON Term where + parseJSON (String t) = case safeRead t of + Just t' -> pure t' + Nothing -> Prelude.fail $ "Invalid Term name " ++ + show t + parseJSON invalid = Aeson.typeMismatch "Term" invalid + +instance FromJSON Translations where + parseJSON o@(Object{}) = do + xs <- parseJSON o >>= mapM addItem . M.toList + return $ Translations (M.fromList xs) + where addItem (k,v) = + case safeRead k of + Nothing -> Prelude.fail $ "Invalid Term name " ++ show k + Just t -> + case v of + (String s) -> return (t, T.strip s) + inv -> Aeson.typeMismatch "String" inv + parseJSON invalid = Aeson.typeMismatch "Translations" invalid + +-- | Lookup a term in a 'Translations'. +lookupTerm :: Term -> Translations -> Maybe T.Text +lookupTerm t (Translations tm) = M.lookup t tm diff --git a/src/Text/Pandoc/URI.hs b/src/Text/Pandoc/URI.hs new file mode 100644 index 000000000000..345ada7685fc --- /dev/null +++ b/src/Text/Pandoc/URI.hs @@ -0,0 +1,109 @@ +{-# LANGUAGE ViewPatterns #-} +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE CPP #-} +{- | + Module : Text.Pandoc.URI + Copyright : Copyright (C) 2006-2022 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane + Stability : alpha + Portability : portable +-} +module Text.Pandoc.URI ( urlEncode + , escapeURI + , isURI + , schemes + , uriPathToPath + ) where +import qualified Network.HTTP.Types as HTTP +import qualified Text.Pandoc.UTF8 as UTF8 +import qualified Data.Text as T +import qualified Data.Set as Set +import Data.Char (isSpace) +import Network.URI (URI (uriScheme), parseURI, escapeURIString) + +urlEncode :: T.Text -> T.Text +urlEncode = UTF8.toText . HTTP.urlEncode True . UTF8.fromText + +-- | Escape whitespace and some punctuation characters in URI. +escapeURI :: T.Text -> T.Text +escapeURI = T.pack . escapeURIString (not . needsEscaping) . T.unpack + where needsEscaping c = isSpace c || T.any (== c) "<>|\"{}[]^`" + +-- +-- IANA URIs +-- + +-- | Schemes from http://www.iana.org/assignments/uri-schemes.html plus +-- the unofficial schemes doi, javascript, isbn, pmid. +schemes :: Set.Set T.Text +schemes = Set.fromList + -- Official IANA schemes + [ "aaa", "aaas", "about", "acap", "acct", "acr", "adiumxtra", "afp", "afs" + , "aim", "appdata", "apt", "attachment", "aw", "barion", "beshare", "bitcoin" + , "blob", "bolo", "browserext", "callto", "cap", "chrome", "chrome-extension" + , "cid", "coap", "coaps", "com-eventbrite-attendee", "content", "crid", "cvs" + , "data", "dav", "dict", "dis", "dlna-playcontainer", "dlna-playsingle" + , "dns", "dntp", "dtn", "dvb", "ed2k", "example", "facetime", "fax", "feed" + , "feedready", "file", "filesystem", "finger", "fish", "ftp", "geo", "gg" + , "git", "gizmoproject", "go", "gopher", "graph", "gtalk", "h323", "ham" + , "hcp", "http", "https", "hxxp", "hxxps", "hydrazone", "iax", "icap", "icon" + , "im", "imap", "info", "iotdisco", "ipn", "ipp", "ipps", "irc", "irc6" + , "ircs", "iris", "iris.beep", "iris.lwz", "iris.xpc", "iris.xpcs" + , "isostore", "itms", "jabber", "jar", "jms", "keyparc", "lastfm", "ldap" + , "ldaps", "lvlt", "magnet", "mailserver", "mailto", "maps", "market" + , "message", "mid", "mms", "modem", "mongodb", "moz", "ms-access" + , "ms-browser-extension", "ms-drive-to", "ms-enrollment", "ms-excel" + , "ms-gamebarservices", "ms-getoffice", "ms-help", "ms-infopath" + , "ms-media-stream-id", "ms-officeapp", "ms-project", "ms-powerpoint" + , "ms-publisher", "ms-search-repair", "ms-secondary-screen-controller" + , "ms-secondary-screen-setup", "ms-settings", "ms-settings-airplanemode" + , "ms-settings-bluetooth", "ms-settings-camera", "ms-settings-cellular" + , "ms-settings-cloudstorage", "ms-settings-connectabledevices" + , "ms-settings-displays-topology", "ms-settings-emailandaccounts" + , "ms-settings-language", "ms-settings-location", "ms-settings-lock" + , "ms-settings-nfctransactions", "ms-settings-notifications" + , "ms-settings-power", "ms-settings-privacy", "ms-settings-proximity" + , "ms-settings-screenrotation", "ms-settings-wifi", "ms-settings-workplace" + , "ms-spd", "ms-sttoverlay", "ms-transit-to", "ms-virtualtouchpad" + , "ms-visio", "ms-walk-to", "ms-whiteboard", "ms-whiteboard-cmd", "ms-word" + , "msnim", "msrp", "msrps", "mtqp", "mumble", "mupdate", "mvn", "news", "nfs" + , "ni", "nih", "nntp", "notes", "ocf", "oid", "onenote", "onenote-cmd" + , "opaquelocktoken", "pack", "palm", "paparazzi", "pkcs11", "platform", "pop" + , "pres", "prospero", "proxy", "pwid", "psyc", "qb", "query", "redis" + , "rediss", "reload", "res", "resource", "rmi", "rsync", "rtmfp", "rtmp" + , "rtsp", "rtsps", "rtspu", "secondlife", "service", "session", "sftp", "sgn" + , "shttp", "sieve", "sip", "sips", "skype", "smb", "sms", "smtp", "snews" + , "snmp", "soap.beep", "soap.beeps", "soldat", "spotify", "ssh", "steam" + , "stun", "stuns", "submit", "svn", "tag", "teamspeak", "tel", "teliaeid" + , "telnet", "tftp", "things", "thismessage", "tip", "tn3270", "tool", "turn" + , "turns", "tv", "udp", "unreal", "urn", "ut2004", "v-event", "vemmi" + , "ventrilo", "videotex", "vnc", "view-source", "wais", "webcal", "wpid" + , "ws", "wss", "wtai", "wyciwyg", "xcon", "xcon-userid", "xfire" + , "xmlrpc.beep", "xmlrpc.beeps", "xmpp", "xri", "ymsgr", "z39.50", "z39.50r" + , "z39.50s" + -- Unofficial schemes + , "doi", "isbn", "javascript", "pmid" + ] + +-- | Check if the string is a valid URL with a IANA or frequently used but +-- unofficial scheme (see @schemes@). +isURI :: T.Text -> Bool +isURI = maybe False hasKnownScheme . parseURI . T.unpack + where + hasKnownScheme = (`Set.member` schemes) . T.toLower . + T.filter (/= ':') . T.pack . uriScheme + +-- | Converts the path part of a file: URI to a regular path. +-- On windows, @/c:/foo@ should be @c:/foo@. +-- On linux, @/foo@ should be @/foo@. +uriPathToPath :: T.Text -> FilePath +uriPathToPath (T.unpack -> path) = +#ifdef _WINDOWS + case path of + '/':ps -> ps + ps -> ps +#else + path +#endif diff --git a/src/Text/Pandoc/Version.hs b/src/Text/Pandoc/Version.hs new file mode 100644 index 000000000000..e5d1c2a742ec --- /dev/null +++ b/src/Text/Pandoc/Version.hs @@ -0,0 +1,28 @@ +{-# LANGUAGE CPP #-} +{- | + Module : Text.Pandoc.Version + Copyright : Copyright (C) 2022 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane + Stability : alpha + Portability : portable + +Version information. +-} +module Text.Pandoc.Version ( + pandocVersion, + pandocVersionText + ) where + +import Data.Version (Version, showVersion) +import Paths_pandoc (version) +import qualified Data.Text as T + +-- | Version number of pandoc library. +pandocVersion :: Version +pandocVersion = version + +-- | Text representation of the library's version number. +pandocVersionText :: T.Text +pandocVersionText = T.pack $ showVersion version diff --git a/src/Text/Pandoc/Writers.hs b/src/Text/Pandoc/Writers.hs index f4e1f9040b27..ac1c611e199b 100644 --- a/src/Text/Pandoc/Writers.hs +++ b/src/Text/Pandoc/Writers.hs @@ -1,7 +1,8 @@ {-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE GADTs #-} -{-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE ScopedTypeVariables #-} +{-# LANGUAGE TupleSections #-} {- | Module : Text.Pandoc Copyright : Copyright (C) 2006-2022 John MacFarlane @@ -27,8 +28,8 @@ module Text.Pandoc.Writers , writeConTeXt , writeCslJson , writeDZSlides - , writeDocbook4 - , writeDocbook5 + , writeDocBook4 + , writeDocBook5 , writeDocx , writeDokuWiki , writeEPUB2 @@ -76,14 +77,12 @@ module Text.Pandoc.Writers ) where import Control.Monad.Except (throwError) -import Control.Monad (unless) import Data.Aeson import qualified Data.ByteString.Lazy as BL import Data.Text (Text) -import qualified Data.Text as T -import Text.Pandoc.Shared (tshow) import Text.Pandoc.Class import Text.Pandoc.Definition +import qualified Text.Pandoc.Format as Format import Text.Pandoc.Options import qualified Text.Pandoc.UTF8 as UTF8 import Text.Pandoc.Error @@ -92,7 +91,7 @@ import Text.Pandoc.Writers.BibTeX import Text.Pandoc.Writers.CommonMark import Text.Pandoc.Writers.ConTeXt import Text.Pandoc.Writers.CslJson -import Text.Pandoc.Writers.Docbook +import Text.Pandoc.Writers.DocBook import Text.Pandoc.Writers.Docx import Text.Pandoc.Writers.DokuWiki import Text.Pandoc.Writers.EPUB @@ -148,9 +147,9 @@ writers = [ ,("slideous" , TextWriter writeSlideous) ,("dzslides" , TextWriter writeDZSlides) ,("revealjs" , TextWriter writeRevealJs) - ,("docbook" , TextWriter writeDocbook5) - ,("docbook4" , TextWriter writeDocbook4) - ,("docbook5" , TextWriter writeDocbook5) + ,("docbook" , TextWriter writeDocBook5) + ,("docbook4" , TextWriter writeDocBook4) + ,("docbook5" , TextWriter writeDocBook5) ,("jats" , TextWriter writeJatsArchiving) ,("jats_articleauthoring", TextWriter writeJatsArticleAuthoring) ,("jats_publishing" , TextWriter writeJatsPublishing) @@ -193,29 +192,13 @@ writers = [ ] -- | Retrieve writer, extensions based on formatSpec (format+extensions). -getWriter :: PandocMonad m => Text -> m (Writer m, Extensions) -getWriter s = - case parseFormatSpec s of - Left e -> throwError $ PandocAppError $ - "Error parsing writer format " <> tshow s <> ": " <> tshow e - Right (writerName, extsToEnable, extsToDisable) -> - case lookup writerName writers of - Nothing -> throwError $ - PandocUnknownWriterError writerName - Just w -> do - let allExts = getAllExtensions writerName - let exts = foldr disableExtension - (foldr enableExtension - (getDefaultExtensions writerName) - extsToEnable) extsToDisable - mapM_ (\ext -> - unless (extensionEnabled ext allExts) $ - throwError $ - PandocUnsupportedExtensionError - (T.drop 4 $ T.pack $ show ext) writerName) - (extsToEnable ++ extsToDisable) - return (w, exts) - +getWriter :: PandocMonad m => Format.FlavoredFormat -> m (Writer m, Extensions) +getWriter flvrd = do + let writerName = Format.formatName flvrd + case lookup writerName writers of + Nothing -> throwError $ PandocUnknownWriterError writerName + Just w -> (w,) <$> + Format.applyExtensionsDiff (Format.getExtensionsConfig writerName) flvrd writeJSON :: PandocMonad m => WriterOptions -> Pandoc -> m Text writeJSON _ = return . UTF8.toText . BL.toStrict . encode diff --git a/src/Text/Pandoc/Writers/AnnotatedTable.hs b/src/Text/Pandoc/Writers/AnnotatedTable.hs index 3f69496a90dc..d67ac9230473 100644 --- a/src/Text/Pandoc/Writers/AnnotatedTable.hs +++ b/src/Text/Pandoc/Writers/AnnotatedTable.hs @@ -42,7 +42,6 @@ module Text.Pandoc.Writers.AnnotatedTable where import Control.Monad.RWS.Strict - hiding ( (<>) ) import Data.Generics ( Data , Typeable ) diff --git a/src/Text/Pandoc/Writers/AsciiDoc.hs b/src/Text/Pandoc/Writers/AsciiDoc.hs index 2822b3ef8aaf..1c54584c4c4b 100644 --- a/src/Text/Pandoc/Writers/AsciiDoc.hs +++ b/src/Text/Pandoc/Writers/AsciiDoc.hs @@ -19,7 +19,9 @@ that it has omitted the construct. AsciiDoc: -} module Text.Pandoc.Writers.AsciiDoc (writeAsciiDoc, writeAsciiDoctor) where +import Control.Monad (foldM) import Control.Monad.State.Strict + ( StateT, MonadState(get), gets, modify, evalStateT ) import Data.Char (isPunctuation, isSpace) import Data.List (delete, intercalate, intersperse) import Data.List.NonEmpty (NonEmpty(..)) @@ -35,6 +37,7 @@ import Text.Pandoc.Options import Text.Pandoc.Parsing hiding (blankline, space) import Text.DocLayout import Text.Pandoc.Shared +import Text.Pandoc.URI import Text.Pandoc.Templates (renderTemplate) import Text.Pandoc.Writers.Shared @@ -112,7 +115,7 @@ escapeString t escChar c = T.singleton c -- | Ordered list start parser for use in Para below. -olMarker :: Parser Text ParserState Char +olMarker :: Parsec Text ParserState Char olMarker = do (start, style', delim) <- anyOrderedListMarker if delim == Period && (style' == UpperAlpha || (style' == UpperRoman && diff --git a/src/Text/Pandoc/Writers/BibTeX.hs b/src/Text/Pandoc/Writers/BibTeX.hs index 07e27892cf0c..309690164b22 100644 --- a/src/Text/Pandoc/Writers/BibTeX.hs +++ b/src/Text/Pandoc/Writers/BibTeX.hs @@ -57,5 +57,3 @@ writeBibTeX' variant opts (Pandoc meta _) = do case writerTemplate opts of Nothing -> main Just tpl -> renderTemplate tpl context - - diff --git a/src/Text/Pandoc/Writers/CommonMark.hs b/src/Text/Pandoc/Writers/CommonMark.hs index 13ded1f6239d..95184c5fd758 100644 --- a/src/Text/Pandoc/Writers/CommonMark.hs +++ b/src/Text/Pandoc/Writers/CommonMark.hs @@ -14,4 +14,3 @@ CommonMark: module Text.Pandoc.Writers.CommonMark (writeCommonMark) where import Text.Pandoc.Writers.Markdown (writeCommonMark) - diff --git a/src/Text/Pandoc/Writers/ConTeXt.hs b/src/Text/Pandoc/Writers/ConTeXt.hs index 50e4b4a22412..fc8ff12c9ebf 100644 --- a/src/Text/Pandoc/Writers/ConTeXt.hs +++ b/src/Text/Pandoc/Writers/ConTeXt.hs @@ -14,7 +14,9 @@ Conversion of 'Pandoc' format into ConTeXt. -} module Text.Pandoc.Writers.ConTeXt ( writeConTeXt ) where +import Control.Monad (liftM) import Control.Monad.State.Strict + ( StateT, MonadState(put, get), gets, modify, evalStateT ) import Data.Char (ord, isDigit) import Data.List (intersperse) import Data.List.NonEmpty (NonEmpty ((:|))) @@ -30,6 +32,7 @@ import Text.Pandoc.Logging import Text.Pandoc.Options import Text.DocLayout import Text.Pandoc.Shared +import Text.Pandoc.URI (isURI) import Text.Pandoc.Templates (renderTemplate) import Text.Pandoc.Walk (query) import Text.Pandoc.Writers.Shared @@ -309,9 +312,9 @@ tableToConTeXt (Ann.Table attr caption colspecs thead tbodies tfoot) = do ] setupCols :: [ColSpec] -> Doc Text -setupCols = vcat . map toColSetup . zip [1::Int ..] +setupCols = vcat . zipWith toColSetup [1::Int ..] where - toColSetup (i, (align, width)) = + toColSetup i (align, width) = let opts = filter (not . isEmpty) [ case align of AlignLeft -> "align=right" @@ -537,7 +540,7 @@ inlineToConTeXt (Subscript lst) = do inlineToConTeXt (SmallCaps lst) = do contents <- inlineListToConTeXt lst return $ braces $ "\\sc " <> contents -inlineToConTeXt (Code _ str) | not ('{' `elemText` str || '}' `elemText` str) = +inlineToConTeXt (Code _ str) | not (T.any (\c -> c == '{' || c == '}') str) = return $ "\\type" <> braces (literal str) inlineToConTeXt (Code _ str) = do opts <- gets stOptions diff --git a/src/Text/Pandoc/Writers/Custom.hs b/src/Text/Pandoc/Writers/Custom.hs deleted file mode 100644 index b7c99a1557dd..000000000000 --- a/src/Text/Pandoc/Writers/Custom.hs +++ /dev/null @@ -1,58 +0,0 @@ -{-# LANGUAGE LambdaCase #-} -{-# LANGUAGE OverloadedStrings #-} -{- | - Module : Text.Pandoc.Writers.Custom - Copyright : 2012-2022 John MacFarlane, - License : GNU GPL, version 2 or above - Maintainer : John MacFarlane - -Conversion of 'Pandoc' documents to custom markup using -a Lua writer. --} -module Text.Pandoc.Writers.Custom ( writeCustom ) where -import Control.Exception -import Control.Monad ((<=<)) -import Data.Maybe (fromMaybe) -import Data.Text (Text) -import HsLua -import Control.Monad.IO.Class (MonadIO) -import Text.Pandoc.Class (PandocMonad, findFileWithDataFallback) -import Text.Pandoc.Definition (Pandoc (..)) -import Text.Pandoc.Lua (Global (..), runLua, setGlobals) -import Text.Pandoc.Options (WriterOptions) - -import qualified Text.Pandoc.Lua.Writer.Classic as Classic - --- | Convert Pandoc to custom markup. -writeCustom :: (PandocMonad m, MonadIO m) - => FilePath -> WriterOptions -> Pandoc -> m Text -writeCustom luaFile opts doc = do - luaFile' <- fromMaybe luaFile <$> findFileWithDataFallback "writers" luaFile - either throw pure <=< runLua $ do - setGlobals [ PANDOC_DOCUMENT doc - , PANDOC_SCRIPT_FILE luaFile' - , PANDOC_WRITER_OPTIONS opts - ] - dofileTrace luaFile' >>= \case - OK -> pure () - _ -> throwErrorAsException - -- Most classic writers contain code that throws an error if a global - -- is not present. This would break our check for the existence of a - -- "Writer" function. We resort to raw access for that reason, but - -- could also catch the error instead. - let rawgetglobal x = do - pushglobaltable - pushName x - rawget (nth 2) <* remove (nth 2) -- remove global table - - rawgetglobal "Writer" >>= \case - TypeNil -> do - pop 1 -- remove nil - Classic.runCustom opts doc - _ -> do - -- Writer on top of the stack. Call it with document and writer - -- options as arguments. - push doc - push opts - callTrace 2 1 - forcePeek $ peekText top diff --git a/src/Text/Pandoc/Writers/Docbook.hs b/src/Text/Pandoc/Writers/DocBook.hs similarity index 71% rename from src/Text/Pandoc/Writers/Docbook.hs rename to src/Text/Pandoc/Writers/DocBook.hs index 2c47ba1f52bb..db94a518eedb 100644 --- a/src/Text/Pandoc/Writers/Docbook.hs +++ b/src/Text/Pandoc/Writers/DocBook.hs @@ -1,7 +1,7 @@ {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE PatternGuards #-} {- | - Module : Text.Pandoc.Writers.Docbook + Module : Text.Pandoc.Writers.DocBook Copyright : Copyright (C) 2006-2022 John MacFarlane License : GNU GPL, version 2 or above @@ -9,9 +9,9 @@ Stability : alpha Portability : portable -Conversion of 'Pandoc' documents to Docbook XML. +Conversion of 'Pandoc' documents to DocBook XML. -} -module Text.Pandoc.Writers.Docbook ( writeDocbook4, writeDocbook5 ) where +module Text.Pandoc.Writers.DocBook ( writeDocBook4, writeDocBook5 ) where import Control.Monad.Reader import Data.Generics (everywhere, mkT) import Data.Maybe (isNothing) @@ -27,6 +27,7 @@ import Text.Pandoc.Logging import Text.Pandoc.Options import Text.DocLayout import Text.Pandoc.Shared +import Text.Pandoc.URI import Text.Pandoc.Templates (renderTemplate) import Text.Pandoc.Writers.Shared import Text.Pandoc.Walk @@ -60,9 +61,9 @@ idName DocBook5 = "xml:id" idName DocBook4 = "id" -- | Convert list of authors to a docbook section -authorToDocbook :: PandocMonad m => WriterOptions -> [Inline] -> DB m B.Inlines -authorToDocbook opts name' = do - name <- render Nothing <$> inlinesToDocbook opts name' +authorToDocBook :: PandocMonad m => WriterOptions -> [Inline] -> DB m B.Inlines +authorToDocBook opts name' = do + name <- render Nothing <$> inlinesToDocBook opts name' let colwidth = if writerWrapText opts == WrapAuto then Just $ writerColumns opts else Nothing @@ -84,29 +85,29 @@ authorToDocbook opts name' = do in inTagsSimple "firstname" (literal $ escapeStringForXML firstname) $$ inTagsSimple "surname" (literal $ escapeStringForXML lastname) -writeDocbook4 :: PandocMonad m => WriterOptions -> Pandoc -> m Text -writeDocbook4 opts d = - runReaderT (writeDocbook opts d) DocBook4 +writeDocBook4 :: PandocMonad m => WriterOptions -> Pandoc -> m Text +writeDocBook4 opts d = + runReaderT (writeDocBook opts d) DocBook4 -writeDocbook5 :: PandocMonad m => WriterOptions -> Pandoc -> m Text -writeDocbook5 opts d = - runReaderT (writeDocbook opts d) DocBook5 +writeDocBook5 :: PandocMonad m => WriterOptions -> Pandoc -> m Text +writeDocBook5 opts d = + runReaderT (writeDocBook opts d) DocBook5 --- | Convert Pandoc document to string in Docbook format. -writeDocbook :: PandocMonad m => WriterOptions -> Pandoc -> DB m Text -writeDocbook opts doc = do +-- | Convert Pandoc document to string in DocBook format. +writeDocBook :: PandocMonad m => WriterOptions -> Pandoc -> DB m Text +writeDocBook opts doc = do let Pandoc meta blocks = ensureValidXmlIdentifiers doc let colwidth = if writerWrapText opts == WrapAuto then Just $ writerColumns opts else Nothing let startLvl = getStartLvl opts - let fromBlocks = blocksToDocbook opts . + let fromBlocks = blocksToDocBook opts . makeSections False (Just startLvl) - auths' <- mapM (authorToDocbook opts) $ docAuthors meta + auths' <- mapM (authorToDocBook opts) $ docAuthors meta let meta' = B.setMeta "author" auths' meta metadata <- metaToContext opts fromBlocks - (inlinesToDocbook opts) + (inlinesToDocBook opts) meta' main <- fromBlocks blocks let context = defField "body" main @@ -119,9 +120,9 @@ writeDocbook opts doc = do Nothing -> main Just tpl -> renderTemplate tpl context --- | Convert a list of Pandoc blocks to Docbook. -blocksToDocbook :: PandocMonad m => WriterOptions -> [Block] -> DB m (Doc Text) -blocksToDocbook opts = fmap vcat . mapM (blockToDocbook opts) +-- | Convert a list of Pandoc blocks to DocBook. +blocksToDocBook :: PandocMonad m => WriterOptions -> [Block] -> DB m (Doc Text) +blocksToDocBook opts = fmap vcat . mapM (blockToDocBook opts) -- | Auxiliary function to convert Plain block to Para. plainToPara :: Block -> Block @@ -129,33 +130,33 @@ plainToPara (Plain x) = Para x plainToPara x = x -- | Convert a list of pairs of terms and definitions into a list of --- Docbook varlistentrys. -deflistItemsToDocbook :: PandocMonad m +-- DocBook varlistentrys. +deflistItemsToDocBook :: PandocMonad m => WriterOptions -> [([Inline],[[Block]])] -> DB m (Doc Text) -deflistItemsToDocbook opts items = - vcat <$> mapM (uncurry (deflistItemToDocbook opts)) items +deflistItemsToDocBook opts items = + vcat <$> mapM (uncurry (deflistItemToDocBook opts)) items --- | Convert a term and a list of blocks into a Docbook varlistentry. -deflistItemToDocbook :: PandocMonad m +-- | Convert a term and a list of blocks into a DocBook varlistentry. +deflistItemToDocBook :: PandocMonad m => WriterOptions -> [Inline] -> [[Block]] -> DB m (Doc Text) -deflistItemToDocbook opts term defs = do - term' <- inlinesToDocbook opts term - def' <- blocksToDocbook opts $ concatMap (map plainToPara) defs +deflistItemToDocBook opts term defs = do + term' <- inlinesToDocBook opts term + def' <- blocksToDocBook opts $ concatMap (map plainToPara) defs return $ inTagsIndented "varlistentry" $ inTagsIndented "term" term' $$ inTagsIndented "listitem" def' --- | Convert a list of lists of blocks to a list of Docbook list items. -listItemsToDocbook :: PandocMonad m => WriterOptions -> [[Block]] -> DB m (Doc Text) -listItemsToDocbook opts items = vcat <$> mapM (listItemToDocbook opts) items +-- | Convert a list of lists of blocks to a list of DocBook list items. +listItemsToDocBook :: PandocMonad m => WriterOptions -> [[Block]] -> DB m (Doc Text) +listItemsToDocBook opts items = vcat <$> mapM (listItemToDocBook opts) items --- | Convert a list of blocks into a Docbook list item. -listItemToDocbook :: PandocMonad m => WriterOptions -> [Block] -> DB m (Doc Text) -listItemToDocbook opts item = - inTagsIndented "listitem" <$> blocksToDocbook opts (map plainToPara item) +-- | Convert a list of blocks into a DocBook list item. +listItemToDocBook :: PandocMonad m => WriterOptions -> [Block] -> DB m (Doc Text) +listItemToDocBook opts item = + inTagsIndented "listitem" <$> blocksToDocBook opts (map plainToPara item) -imageToDocbook :: WriterOptions -> Attr -> Text -> Doc Text -imageToDocbook _ attr src = selfClosingTag "imagedata" $ +imageToDocBook :: WriterOptions -> Attr -> Text -> Doc Text +imageToDocBook _ attr src = selfClosingTag "imagedata" $ ("fileref", src) : idAndRole attr <> dims where dims = go Width "width" <> go Height "depth" @@ -163,14 +164,14 @@ imageToDocbook _ attr src = selfClosingTag "imagedata" $ Just a -> [(dstr, tshow a)] Nothing -> [] --- | Convert a Pandoc block element to Docbook. -blockToDocbook :: PandocMonad m => WriterOptions -> Block -> DB m (Doc Text) -blockToDocbook _ Null = return empty +-- | Convert a Pandoc block element to DocBook. +blockToDocBook :: PandocMonad m => WriterOptions -> Block -> DB m (Doc Text) +blockToDocBook _ Null = return empty -- Add ids to paragraphs in divs with ids - this is needed for -- pandoc-citeproc to get link anchors in bibliographies: -blockToDocbook opts (Div (id',"section":_,_) (Header lvl (_,_,attrs) ils : xs)) = do +blockToDocBook opts (Div (id',"section":_,_) (Header lvl (_,_,attrs) ils : xs)) = do version <- ask - -- Docbook doesn't allow sections with no content, so insert some if needed + -- DocBook doesn't allow sections with no content, so insert some if needed let bs = if null xs then [Para []] else xs @@ -192,10 +193,10 @@ blockToDocbook opts (Div (id',"section":_,_) (Header lvl (_,_,attrs) ils : xs)) -- Populate miscAttr with Header.Attr.attributes, filtering out non-valid DocBook section attributes, id, and xml:id miscAttr = filter (isSectionAttr version) attrs attribs = nsAttr <> idAttr <> miscAttr - title' <- inlinesToDocbook opts ils - contents <- blocksToDocbook opts bs + title' <- inlinesToDocBook opts ils + contents <- blocksToDocBook opts bs return $ inTags True tag attribs $ inTagsSimple "title" title' $$ contents -blockToDocbook opts (Div (ident,classes,_) bs) = do +blockToDocBook opts (Div (ident,classes,_) bs) = do version <- ask let identAttribs = [(idName version, ident) | not (T.null ident)] admonitions = ["caution","danger","important","note","tip","warning"] @@ -204,9 +205,9 @@ blockToDocbook opts (Div (ident,classes,_) bs) = do let (mTitleBs, bodyBs) = case bs of -- Matches AST produced by the DocBook reader → Markdown writer → Markdown reader chain. - (Div (_,["title"],_) [Para ts] : rest) -> (Just (inlinesToDocbook opts ts), rest) - -- Matches AST produced by the Docbook reader. - (Div (_,["title"],_) ts : rest) -> (Just (blocksToDocbook opts ts), rest) + (Div (_,["title"],_) [Para ts] : rest) -> (Just (inlinesToDocBook opts ts), rest) + -- Matches AST produced by the DocBook reader. + (Div (_,["title"],_) ts : rest) -> (Just (blocksToDocBook opts ts), rest) _ -> (Nothing, bs) admonitionTitle <- case mTitleBs of Nothing -> return mempty @@ -219,22 +220,22 @@ blockToDocbook opts (Div (ident,classes,_) bs) = do handleDivBody identAttribs [Para lst] = if hasLineBreaks lst then flush . nowrap . inTags False "literallayout" identAttribs - <$> inlinesToDocbook opts lst - else inTags True "para" identAttribs <$> inlinesToDocbook opts lst + <$> inlinesToDocBook opts lst + else inTags True "para" identAttribs <$> inlinesToDocBook opts lst handleDivBody identAttribs bodyBs = do - contents <- blocksToDocbook opts (map plainToPara bodyBs) + contents <- blocksToDocBook opts (map plainToPara bodyBs) return $ (if null identAttribs then mempty else selfClosingTag "anchor" identAttribs) $$ contents -blockToDocbook _ h@Header{} = do +blockToDocBook _ h@Header{} = do -- should be handled by Div section above, except inside lists/blockquotes report $ BlockNotRendered h return empty -blockToDocbook opts (Plain lst) = inlinesToDocbook opts lst +blockToDocBook opts (Plain lst) = inlinesToDocBook opts lst -- title beginning with fig: indicates that the image is a figure -blockToDocbook opts (SimpleFigure attr txt (src, _)) = do - alt <- inlinesToDocbook opts txt +blockToDocBook opts (SimpleFigure attr txt (src, _)) = do + alt <- inlinesToDocBook opts txt let capt = if null txt then empty else inTagsSimple "title" alt @@ -242,17 +243,17 @@ blockToDocbook opts (SimpleFigure attr txt (src, _)) = do capt $$ inTagsIndented "mediaobject" ( inTagsIndented "imageobject" - (imageToDocbook opts attr src) $$ + (imageToDocBook opts attr src) $$ inTagsSimple "textobject" (inTagsSimple "phrase" alt)) -blockToDocbook opts (Para lst) +blockToDocBook opts (Para lst) | hasLineBreaks lst = flush . nowrap . inTagsSimple "literallayout" - <$> inlinesToDocbook opts lst - | otherwise = inTagsIndented "para" <$> inlinesToDocbook opts lst -blockToDocbook opts (LineBlock lns) = - blockToDocbook opts $ linesToPara lns -blockToDocbook opts (BlockQuote blocks) = - inTagsIndented "blockquote" <$> blocksToDocbook opts blocks -blockToDocbook opts (CodeBlock (_,classes,_) str) = return $ + <$> inlinesToDocBook opts lst + | otherwise = inTagsIndented "para" <$> inlinesToDocBook opts lst +blockToDocBook opts (LineBlock lns) = + blockToDocBook opts $ linesToPara lns +blockToDocBook opts (BlockQuote blocks) = + inTagsIndented "blockquote" <$> blocksToDocBook opts blocks +blockToDocBook opts (CodeBlock (_,classes,_) str) = return $ literal (" lang <> ">") <> cr <> flush (literal (escapeStringForXML str) <> cr <> literal "") where lang = if null langs @@ -265,11 +266,11 @@ blockToDocbook opts (CodeBlock (_,classes,_) str) = return $ then [s] else (languagesByExtension syntaxMap) . T.toLower $ s langs = concatMap langsFrom classes -blockToDocbook opts (BulletList lst) = do +blockToDocBook opts (BulletList lst) = do let attribs = [("spacing", "compact") | isTightList lst] - inTags True "itemizedlist" attribs <$> listItemsToDocbook opts lst -blockToDocbook _ (OrderedList _ []) = return empty -blockToDocbook opts (OrderedList (start, numstyle, _) (first:rest)) = do + inTags True "itemizedlist" attribs <$> listItemsToDocBook opts lst +blockToDocBook _ (OrderedList _ []) = return empty +blockToDocBook opts (OrderedList (start, numstyle, _) (first:rest)) = do let numeration = case numstyle of DefaultStyle -> [] Decimal -> [("numeration", "arabic")] @@ -281,34 +282,34 @@ blockToDocbook opts (OrderedList (start, numstyle, _) (first:rest)) = do spacing = [("spacing", "compact") | isTightList (first:rest)] attribs = numeration <> spacing items <- if start == 1 - then listItemsToDocbook opts (first:rest) + then listItemsToDocBook opts (first:rest) else do - first' <- blocksToDocbook opts (map plainToPara first) - rest' <- listItemsToDocbook opts rest + first' <- blocksToDocBook opts (map plainToPara first) + rest' <- listItemsToDocBook opts rest return $ inTags True "listitem" [("override",tshow start)] first' $$ rest' return $ inTags True "orderedlist" attribs items -blockToDocbook opts (DefinitionList lst) = do +blockToDocBook opts (DefinitionList lst) = do let attribs = [("spacing", "compact") | isTightList $ concatMap snd lst] - inTags True "variablelist" attribs <$> deflistItemsToDocbook opts lst -blockToDocbook _ b@(RawBlock f str) + inTags True "variablelist" attribs <$> deflistItemsToDocBook opts lst +blockToDocBook _ b@(RawBlock f str) | f == "docbook" = return $ literal str -- raw XML block | f == "html" = do version <- ask if version == DocBook5 - then return empty -- No html in Docbook5 + then return empty -- No html in DocBook5 else return $ literal str -- allow html for backwards compatibility | otherwise = do report $ BlockNotRendered b return empty -blockToDocbook _ HorizontalRule = return empty -- not semantic -blockToDocbook opts (Table _ blkCapt specs thead tbody tfoot) = do +blockToDocBook _ HorizontalRule = return empty -- not semantic +blockToDocBook opts (Table _ blkCapt specs thead tbody tfoot) = do let (caption, aligns, widths, headers, rows) = toLegacyTable blkCapt specs thead tbody tfoot captionDoc <- if null caption then return empty else inTagsIndented "title" <$> - inlinesToDocbook opts caption + inlinesToDocBook opts caption let tableType = if isEmpty captionDoc then "informaltable" else "table" percent w = tshow (truncate (100*w) :: Integer) <> "*" coltags = vcat $ zipWith (\w al -> selfClosingTag "colspec" @@ -316,9 +317,9 @@ blockToDocbook opts (Table _ blkCapt specs thead tbody tfoot) = do [("align", alignmentToString al)])) widths aligns head' <- if all null headers then return empty - else inTagsIndented "thead" <$> tableRowToDocbook opts headers + else inTagsIndented "thead" <$> tableRowToDocBook opts headers body' <- inTagsIndented "tbody" . vcat <$> - mapM (tableRowToDocbook opts) rows + mapM (tableRowToDocBook opts) rows return $ inTagsIndented tableType $ captionDoc $$ inTags True "tgroup" [("cols", tshow (length aligns))] ( coltags $$ head' $$ body') @@ -340,56 +341,56 @@ alignmentToString alignment = case alignment of AlignCenter -> "center" AlignDefault -> "left" -tableRowToDocbook :: PandocMonad m +tableRowToDocBook :: PandocMonad m => WriterOptions -> [[Block]] -> DB m (Doc Text) -tableRowToDocbook opts cols = - inTagsIndented "row" . vcat <$> mapM (tableItemToDocbook opts) cols +tableRowToDocBook opts cols = + inTagsIndented "row" . vcat <$> mapM (tableItemToDocBook opts) cols -tableItemToDocbook :: PandocMonad m +tableItemToDocBook :: PandocMonad m => WriterOptions -> [Block] -> DB m (Doc Text) -tableItemToDocbook opts item = - inTags True "entry" [] . vcat <$> mapM (blockToDocbook opts) item +tableItemToDocBook opts item = + inTags True "entry" [] . vcat <$> mapM (blockToDocBook opts) item --- | Convert a list of inline elements to Docbook. -inlinesToDocbook :: PandocMonad m => WriterOptions -> [Inline] -> DB m (Doc Text) -inlinesToDocbook opts lst = hcat <$> mapM (inlineToDocbook opts) lst +-- | Convert a list of inline elements to DocBook. +inlinesToDocBook :: PandocMonad m => WriterOptions -> [Inline] -> DB m (Doc Text) +inlinesToDocBook opts lst = hcat <$> mapM (inlineToDocBook opts) lst --- | Convert an inline element to Docbook. -inlineToDocbook :: PandocMonad m => WriterOptions -> Inline -> DB m (Doc Text) -inlineToDocbook _ (Str str) = return $ literal $ escapeStringForXML str -inlineToDocbook opts (Emph lst) = - inTagsSimple "emphasis" <$> inlinesToDocbook opts lst -inlineToDocbook opts (Underline lst) = - inTags False "emphasis" [("role", "underline")] <$> inlinesToDocbook opts lst -inlineToDocbook opts (Strong lst) = - inTags False "emphasis" [("role", "strong")] <$> inlinesToDocbook opts lst -inlineToDocbook opts (Strikeout lst) = +-- | Convert an inline element to DocBook. +inlineToDocBook :: PandocMonad m => WriterOptions -> Inline -> DB m (Doc Text) +inlineToDocBook _ (Str str) = return $ literal $ escapeStringForXML str +inlineToDocBook opts (Emph lst) = + inTagsSimple "emphasis" <$> inlinesToDocBook opts lst +inlineToDocBook opts (Underline lst) = + inTags False "emphasis" [("role", "underline")] <$> inlinesToDocBook opts lst +inlineToDocBook opts (Strong lst) = + inTags False "emphasis" [("role", "strong")] <$> inlinesToDocBook opts lst +inlineToDocBook opts (Strikeout lst) = inTags False "emphasis" [("role", "strikethrough")] <$> - inlinesToDocbook opts lst -inlineToDocbook opts (Superscript lst) = - inTagsSimple "superscript" <$> inlinesToDocbook opts lst -inlineToDocbook opts (Subscript lst) = - inTagsSimple "subscript" <$> inlinesToDocbook opts lst -inlineToDocbook opts (SmallCaps lst) = + inlinesToDocBook opts lst +inlineToDocBook opts (Superscript lst) = + inTagsSimple "superscript" <$> inlinesToDocBook opts lst +inlineToDocBook opts (Subscript lst) = + inTagsSimple "subscript" <$> inlinesToDocBook opts lst +inlineToDocBook opts (SmallCaps lst) = inTags False "emphasis" [("role", "smallcaps")] <$> - inlinesToDocbook opts lst -inlineToDocbook opts (Quoted _ lst) = - inTagsSimple "quote" <$> inlinesToDocbook opts lst -inlineToDocbook opts (Cite _ lst) = - inlinesToDocbook opts lst -inlineToDocbook opts (Span (ident,_,_) ils) = do + inlinesToDocBook opts lst +inlineToDocBook opts (Quoted _ lst) = + inTagsSimple "quote" <$> inlinesToDocBook opts lst +inlineToDocBook opts (Cite _ lst) = + inlinesToDocBook opts lst +inlineToDocBook opts (Span (ident,_,_) ils) = do version <- ask ((if T.null ident then mempty else selfClosingTag "anchor" [(idName version, ident)]) <>) <$> - inlinesToDocbook opts ils -inlineToDocbook _ (Code _ str) = + inlinesToDocBook opts ils +inlineToDocBook _ (Code _ str) = return $ inTagsSimple "literal" $ literal (escapeStringForXML str) -inlineToDocbook opts (Math t str) +inlineToDocBook opts (Math t str) | isMathML (writerHTMLMathMethod opts) = do res <- convertMath writeMathML t str case res of @@ -397,9 +398,9 @@ inlineToDocbook opts (Math t str) $ literal $ T.pack $ Xml.ppcElement conf $ fixNS $ removeAttr r - Left il -> inlineToDocbook opts il + Left il -> inlineToDocBook opts il | otherwise = - texMathToInlines t str >>= inlinesToDocbook opts + texMathToInlines t str >>= inlinesToDocBook opts where tagtype = case t of InlineMath -> "inlineequation" DisplayMath -> "informalequation" @@ -407,24 +408,24 @@ inlineToDocbook opts (Math t str) removeAttr e = e{ Xml.elAttribs = [] } fixNS' qname = qname{ Xml.qPrefix = Just "mml" } fixNS = everywhere (mkT fixNS') -inlineToDocbook _ il@(RawInline f x) +inlineToDocBook _ il@(RawInline f x) | f == "html" || f == "docbook" = return $ literal x | otherwise = do report $ InlineNotRendered il return empty -inlineToDocbook _ LineBreak = return $ literal "\n" +inlineToDocBook _ LineBreak = return $ literal "\n" -- currently ignore, would require the option to add custom -- styles to the document -inlineToDocbook _ Space = return space +inlineToDocBook _ Space = return space -- because we use \n for LineBreak, we can't do soft breaks: -inlineToDocbook _ SoftBreak = return space -inlineToDocbook opts (Link attr txt (src, _)) +inlineToDocBook _ SoftBreak = return space +inlineToDocBook opts (Link attr txt (src, _)) | Just email <- T.stripPrefix "mailto:" src = let emailLink = inTagsSimple "email" $ literal $ escapeStringForXML email in case txt of [Str s] | escapeURI s == email -> return emailLink - _ -> do contents <- inlinesToDocbook opts txt + _ -> do contents <- inlinesToDocBook opts txt return $ contents <+> char '(' <> emailLink <> char ')' | otherwise = do @@ -436,16 +437,16 @@ inlineToDocbook opts (Link attr txt (src, _)) else if version == DocBook5 then inTags False "link" $ ("xlink:href", src) : idAndRole attr else inTags False "ulink" $ ("url", src) : idAndRole attr ) - <$> inlinesToDocbook opts txt -inlineToDocbook opts (Image attr _ (src, tit)) = return $ + <$> inlinesToDocBook opts txt +inlineToDocBook opts (Image attr _ (src, tit)) = return $ let titleDoc = if T.null tit then empty else inTagsIndented "objectinfo" $ inTagsIndented "title" (literal $ escapeStringForXML tit) in inTagsIndented "inlinemediaobject" $ inTagsIndented "imageobject" $ - titleDoc $$ imageToDocbook opts attr src -inlineToDocbook opts (Note contents) = - inTagsIndented "footnote" <$> blocksToDocbook opts contents + titleDoc $$ imageToDocBook opts attr src +inlineToDocBook opts (Note contents) = + inTagsIndented "footnote" <$> blocksToDocBook opts contents isMathML :: HTMLMathMethod -> Bool isMathML MathML = True diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs index 7b4a056b6d39..9abe04b7af04 100644 --- a/src/Text/Pandoc/Writers/Docx.hs +++ b/src/Text/Pandoc/Writers/Docx.hs @@ -18,11 +18,22 @@ Conversion of 'Pandoc' documents to docx. -} module Text.Pandoc.Writers.Docx ( writeDocx ) where import Codec.Archive.Zip + ( Archive(zEntries), + addEntryToArchive, + emptyArchive, + findEntryByPath, + fromArchive, + toArchive, + toEntry, + Entry(eRelativePath) ) import Control.Applicative ((<|>)) +import Control.Monad (MonadPlus(mplus), unless, when) import Control.Monad.Except (catchError, throwError) import Control.Monad.Reader -import Control.Monad.State.Strict + ( asks, MonadReader(local), MonadTrans(lift), ReaderT(runReaderT) ) +import Control.Monad.State.Strict ( StateT(runStateT), gets, modify ) import qualified Data.ByteString.Lazy as BL +import Data.Containers.ListUtils (nubOrd) import Data.Char (isSpace, isLetter) import Data.List (intercalate, isPrefixOf, isSuffixOf) import Data.String (fromString) @@ -35,12 +46,12 @@ import qualified Data.Text.Lazy as TL import Data.Time.Clock.POSIX import Data.Digest.Pure.SHA (sha1, showDigest) import Skylighting -import Text.Collate.Lang (renderLang) -import Text.Pandoc.Class (PandocMonad, report, toLang, translateTerm, - getMediaBag) +import Text.Pandoc.Class (PandocMonad, report, toLang, getMediaBag) +import Text.Pandoc.Translations (translateTerm) import Text.Pandoc.MediaBag (lookupMedia, MediaItem(..)) import qualified Text.Pandoc.Translations as Term import qualified Text.Pandoc.Class.PandocMonad as P +import Text.Pandoc.Data (readDataFile, readDefaultDataFile) import Data.Time import Text.Pandoc.UTF8 (fromTextLazy) import Text.Pandoc.Definition @@ -63,6 +74,7 @@ import Text.TeXMath import Text.Pandoc.Writers.OOXML import Text.Pandoc.XML.Light as XML import Data.Generics (mkT, everywhere) +import Text.Collate.Lang (renderLang, Lang(..)) squashProps :: EnvProps -> [Element] squashProps (EnvProps Nothing es) = es @@ -118,13 +130,13 @@ writeDocx opts doc = do utctime <- P.getTimestamp oldUserDataDir <- P.getUserDataDir P.setUserDataDir Nothing - res <- P.readDefaultDataFile "reference.docx" + res <- readDefaultDataFile "reference.docx" P.setUserDataDir oldUserDataDir let distArchive = toArchive $ BL.fromStrict res refArchive <- case writerReferenceDoc opts of Just f -> toArchive <$> P.readFileLazy f Nothing -> toArchive . BL.fromStrict <$> - P.readDataFile "reference.docx" + readDataFile "reference.docx" parsedDoc <- parseXml refArchive distArchive "word/document.xml" let wname f qn = qPrefix qn == Just "w" && f (qName qn) @@ -153,16 +165,33 @@ writeDocx opts doc = do let addLang :: Element -> Element addLang = case mblang of Nothing -> id - Just l -> everywhere (mkT (go (renderLang l))) + Just l -> everywhere (mkT (go l)) where - go :: Text -> Element -> Element - go l e' - | qName (elName e') == "lang" - = e'{ elAttribs = map (setvalattr l) $ elAttribs e' } - | otherwise = e' - - setvalattr l (XML.Attr qn@(QName "val" _ _) _) = XML.Attr qn l - setvalattr _ x = x + go :: Lang -> Element -> Element + go lang e' + | qName (elName e') == "lang" + = if isEastAsianLang lang + then e'{ elAttribs = + map (setattr "eastAsia" (renderLang lang)) $ + elAttribs e' } + else + if isBidiLang lang + then e'{ elAttribs = + map (setattr "bidi" (renderLang lang)) $ + elAttribs e' } + else e'{ elAttribs = + map (setattr "val" (renderLang lang)) $ + elAttribs e' } + | otherwise = e' + + setattr attrname l (XML.Attr qn@(QName s _ _) _) + | s == attrname = XML.Attr qn l + setattr _ _ x = x + + isEastAsianLang Lang{ langLanguage = lang } = + lang == "zh" || lang == "jp" || lang == "ko" + isBidiLang Lang{ langLanguage = lang } = + lang == "he" || lang == "ar" let stylepath = "word/styles.xml" styledoc <- addLang <$> parseXml refArchive distArchive stylepath @@ -615,7 +644,7 @@ baseListId = 1000 mkNumbering :: [ListMarker] -> [Element] mkNumbering lists = elts ++ zipWith mkNum lists [baseListId..(baseListId + length lists - 1)] - where elts = map mkAbstractNum (ordNub lists) + where elts = map mkAbstractNum (nubOrd lists) maxListLevel :: Int maxListLevel = 8 @@ -934,8 +963,11 @@ blockToOpenXML' _ HorizontalRule = do ("o:hralign","center"), ("o:hrstd","t"),("o:hr","t")] () ] blockToOpenXML' opts (Table attr caption colspecs thead tbodies tfoot) = do + -- Remove extra paragraph indentation due to list items (#5947). + -- This means that tables in lists will not be indented, but it + -- avoids unwanted indentation in each cell. content <- tableToOpenXML opts - (blocksToOpenXML opts) + (local (\env -> env{ envListLevel = -1 }) . blocksToOpenXML opts) (Grid.toTable attr caption colspecs thead tbodies tfoot) let (tableId, _, _) = attr wrapBookmark tableId content diff --git a/src/Text/Pandoc/Writers/Docx/Table.hs b/src/Text/Pandoc/Writers/Docx/Table.hs index 24237e4102bb..a7f1e5fce13c 100644 --- a/src/Text/Pandoc/Writers/Docx/Table.hs +++ b/src/Text/Pandoc/Writers/Docx/Table.hs @@ -14,7 +14,8 @@ module Text.Pandoc.Writers.Docx.Table ( tableToOpenXML ) where -import Control.Monad.State.Strict ( modify, gets, unless ) +import Control.Monad.State.Strict ( modify, gets ) +import Control.Monad ( unless ) import Data.Array ( elems, (!), assocs, indices ) import Data.Text (Text) import Text.Pandoc.Definition @@ -28,14 +29,17 @@ import Text.Pandoc.Definition RowSpan(..), ColSpan(..), ColWidth(ColWidth) ) -import Text.Pandoc.Class.PandocMonad (PandocMonad, translateTerm) +import Text.Pandoc.Class.PandocMonad (PandocMonad) +import Text.Pandoc.Translations (translateTerm) import Text.Pandoc.Writers.Docx.Types ( WS, WriterState(stNextTableNum, stInTable), + WriterEnv(..), setFirstPara, pStyleM, withParaProp, withParaPropM ) +import Control.Monad.Reader (asks) import Text.Pandoc.Shared ( tshow, stringify ) import Text.Pandoc.Options (WriterOptions, isEnabled) import Text.Pandoc.Extensions (Extension(Ext_native_numbering)) @@ -98,6 +102,8 @@ tableToOpenXML opts blocksToOpenXML gridTable = do -- 0×0400 Do not apply column banding conditional formattin let tblLookVal = if hasHeader then (0x20 :: Int) else 0 let (gridCols, tblWattr) = tableLayout (elems colspecs) + listLevel <- asks envListLevel + let indent = (listLevel + 1) * 720 let tbl = mknode "w:tbl" [] ( mknode "w:tblPr" [] ( mknode "w:tblStyle" [("w:val","Table")] () : @@ -110,6 +116,9 @@ tableToOpenXML opts blocksToOpenXML gridTable = do ,("w:noVBand","0") ,("w:val", T.pack $ printf "%04x" tblLookVal) ] () : + mknode "w:jc" [("w:val","start")] () + : [ mknode "w:tblInd" [("w:w", tshow indent),("w:type","dxa")] () + | indent > 0 ] ++ [ mknode "w:tblCaption" [("w:val", captionStr)] () | not (T.null captionStr) ] ) diff --git a/src/Text/Pandoc/Writers/DokuWiki.hs b/src/Text/Pandoc/Writers/DokuWiki.hs index 5972206fdb34..caeec8371fbc 100644 --- a/src/Text/Pandoc/Writers/DokuWiki.hs +++ b/src/Text/Pandoc/Writers/DokuWiki.hs @@ -34,9 +34,11 @@ import Text.Pandoc.Class.PandocMonad (PandocMonad, report) import Text.Pandoc.Definition import Text.Pandoc.ImageSize import Text.Pandoc.Logging -import Text.Pandoc.Options (WrapOption (..), WriterOptions (writerTableOfContents, writerTemplate, writerWrapText)) -import Text.Pandoc.Shared (camelCaseToHyphenated, escapeURI, isURI, linesToPara, +import Text.Pandoc.Options (WrapOption (..), WriterOptions (writerTableOfContents, + writerTemplate, writerWrapText)) +import Text.Pandoc.Shared (camelCaseToHyphenated, linesToPara, removeFormatting, trimr, tshow) +import Text.Pandoc.URI (escapeURI, isURI) import Text.Pandoc.Templates (renderTemplate) import Text.DocLayout (render, literal) import Text.Pandoc.Writers.Shared (defField, metaToContext, toLegacyTable) diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs index a461b31b70ed..8f03ea211cf1 100644 --- a/src/Text/Pandoc/Writers/EPUB.hs +++ b/src/Text/Pandoc/Writers/EPUB.hs @@ -38,6 +38,7 @@ import Text.Pandoc.Builder (fromList, setMeta) import Text.Pandoc.Writers.Shared (ensureValidXmlIdentifiers) import Text.Pandoc.Class (PandocMonad, report) import qualified Text.Pandoc.Class.PandocPure as P +import Text.Pandoc.Data (readDataFile) import qualified Text.Pandoc.Class.PandocMonad as P import Data.Time import Text.Pandoc.Definition @@ -45,7 +46,7 @@ import Text.Pandoc.Error import Text.Pandoc.ImageSize import Text.Pandoc.Logging import Text.Pandoc.MIME (MimeType, extensionFromMimeType, getMimeType) -import Text.Pandoc.Network.HTTP (urlEncode) +import Text.Pandoc.URI (urlEncode) import Text.Pandoc.Options (EPUBVersion (..), HTMLMathMethod (..), ObfuscationMethod (NoObfuscation), WrapOption (..), WriterOptions (..)) @@ -460,7 +461,7 @@ pandocToEPUB version opts doc = do -- stylesheet stylesheets <- case epubStylesheets metadata of [] -> (\x -> [B.fromChunks [x]]) <$> - P.readDataFile "epub.css" + readDataFile "epub.css" fs -> mapM P.readFileLazy fs stylesheetEntries <- zipWithM (\bs n -> mkEntry ("styles/stylesheet" ++ show n ++ ".css") bs) @@ -689,13 +690,13 @@ pandocToEPUB version opts doc = do navPointNode formatter (Div _ bs) = concat <$> mapM (navPointNode formatter) bs navPointNode _ _ = return [] - + -- Create the tocEntry from the metadata together with the sections and title. tocEntry <- createTocEntry meta metadata plainTitle secs navPointNode -- Create the navEntry using the metadata, all of the various writer options, -- the CSS and HTML helpers, the document and toc title as well as the epub version and all of the sections - navEntry <- createNavEntry meta metadata opts opts' vars cssvars writeHtml plainTitle tocTitle version secs navPointNode + navEntry <- createNavEntry meta metadata opts' True vars cssvars writeHtml tocTitle version secs navPointNode -- mimetype mimetypeEntry <- mkEntry "mimetype" $ @@ -740,7 +741,7 @@ createCoverPage :: PandocMonad m => -> (WriterOptions -> Pandoc -> m B8.ByteString) -> Text -> StateT EPUBState m ([Entry], [Entry]) -createCoverPage meta metadata opts' vars cssvars writeHtml plainTitle = +createCoverPage meta metadata opts' vars cssvars writeHtml plainTitle = case epubCoverImage metadata of Nothing -> return ([],[]) Just img -> do @@ -807,7 +808,7 @@ createChapterEntries opts' vars cssvars writeHtml chapters = do -- remove notes or we get doubled footnotes (Pandoc (setMeta "title" (walk removeNote $ fromList xs) nullMeta) bs, - -- Check if the chapters belongs to the frontmatter, + -- Check if the chapters belongs to the frontmatter, -- backmatter of bodymatter defaulting to the body case lookup "epub:type" kvs of Nothing -> "bodymatter" @@ -832,7 +833,7 @@ createChapterEntries opts' vars cssvars writeHtml chapters = do -- | Splits the blocks into chapters and creates a corresponding reftable createChaptersAndReftable :: WriterOptions -> [Block] -> ([Chapter], [(Text, Text)]) createChaptersAndReftable opts secs = (chapters, reftable) - where + where chapterHeaderLevel = writerEpubChapterLevel opts isChapterHeader :: Block -> Bool @@ -845,18 +846,18 @@ createChaptersAndReftable opts secs = (chapters, reftable) -- If the header is of the same level as chapters, create a chapter | chapterHeaderLevel == lvl = Chapter [d] : secsToChapters rest - -- If the header is a level higher than chapters, + -- If the header is a level higher than chapters, -- create a chapter of everything until the next chapter header. | chapterHeaderLevel > lvl = Chapter [Div attr (h:xs)] : secsToChapters ys ++ secsToChapters rest where (xs, ys) = break isChapterHeader bs secsToChapters bs = - -- If this is the last block, keep it as is, + -- If this is the last block, keep it as is, -- otherwise create a chapter for everything until the next chapter header. (if null xs then id else (Chapter xs :)) $ secsToChapters ys where (xs, ys) = break isChapterHeader bs - + -- Convert the sections to initial chapters chapters' = secsToChapters secs @@ -879,7 +880,7 @@ createChaptersAndReftable opts secs = (chapters, reftable) _ -> id) [] (parseTags raw) extractLinkURL' _ _ = [] - + -- Extract references for the reftable from Block elements extractLinkURL :: Int -> Block -> [(T.Text, T.Text)] extractLinkURL num (Div (ident, _, _) _) @@ -972,17 +973,17 @@ createNavEntry :: PandocMonad m => Meta -> EPUBMetadata -> WriterOptions - -> WriterOptions + -> Bool -> Context Text -> (Bool -> Context Text) -> (WriterOptions -> Pandoc -> m B8.ByteString) -> Text - -> Text -> EPUBVersion -> [Block] -> ((Int -> [Inline] -> T.Text -> [Element] -> Element) -> Block -> StateT Int m [Element]) -> StateT EPUBState m Entry -createNavEntry meta metadata opts opts' vars cssvars writeHtml plainTitle tocTitle version secs navPointNode = do +createNavEntry meta metadata opts includeTitlePage + vars cssvars writeHtml tocTitle version secs navPointNode = do let navXhtmlFormatter :: Int -> [Inline] -> T.Text -> [Element] -> Element navXhtmlFormatter n tit src subs = unode "li" ! [("id", "toc-li-" <> tshow n)] $ @@ -996,12 +997,7 @@ createNavEntry meta metadata opts opts' vars cssvars writeHtml plainTitle tocTit parseXMLContents (TL.fromStrict titRendered) titRendered = case P.runPure (writeHtmlStringForEPUB version - opts{ writerTemplate = Nothing - , writerVariables = - Context (M.fromList - [("pagetitle", toVal $ - escapeStringForXML plainTitle)]) - <> writerVariables opts} + opts{ writerTemplate = Nothing } (Pandoc nullMeta [Plain $ walk clean tit])) of Left _ -> stringify tit @@ -1021,11 +1017,12 @@ createNavEntry meta metadata opts opts' vars cssvars writeHtml plainTitle tocTit [ unode "h1" ! [("id","toc-title")] $ tocTitle , unode "ol" ! [("class","toc")] $ tocBlocks ]] let landmarkItems = if version == EPUB3 - then unode "li" + then [ unode "li" [ unode "a" ! [("href", "text/title_page.xhtml") ,("epub:type", "titlepage")] $ - ("Title Page" :: Text) ] : + ("Title Page" :: Text) ] | + includeTitlePage ] ++ [ unode "li" [ unode "a" ! [("href", "text/cover.xhtml") ,("epub:type", "cover")] $ @@ -1045,7 +1042,7 @@ createNavEntry meta metadata opts opts' vars cssvars writeHtml plainTitle tocTit ,("hidden","hidden")] $ [ unode "ol" landmarkItems ] | not (null landmarkItems)] - navData <- lift $ writeHtml opts'{ writerVariables = + navData <- lift $ writeHtml opts{ writerVariables = Context (M.fromList [("navpage", toVal' "true") ,("body-type", toVal' "frontmatter") ]) @@ -1220,8 +1217,10 @@ modifyMediaRef oldsrc = do Just (n,_) -> return $ T.pack n Nothing -> catchError (do (img, mbMime) <- P.fetchItem $ T.pack oldsrc - let ext = maybe (takeExtension (takeWhile (/='?') oldsrc)) T.unpack - (("." <>) <$> (mbMime >>= extensionFromMimeType)) + let ext = maybe + (takeExtension (takeWhile (/='?') oldsrc)) + (T.unpack . ("." <>)) + (mbMime >>= extensionFromMimeType) newName <- getMediaNextNewName ext let newPath = "media/" ++ newName entry <- mkEntry newPath (B.fromChunks . (:[]) $ img) diff --git a/src/Text/Pandoc/Writers/FB2.hs b/src/Text/Pandoc/Writers/FB2.hs index 18da083c3208..8d969d171d1f 100644 --- a/src/Text/Pandoc/Writers/FB2.hs +++ b/src/Text/Pandoc/Writers/FB2.hs @@ -18,9 +18,9 @@ FictionBook is an XML-based e-book format. For more information see: -} module Text.Pandoc.Writers.FB2 (writeFB2) where -import Control.Monad (zipWithM) +import Control.Monad (zipWithM, liftM) import Control.Monad.Except (catchError, throwError) -import Control.Monad.State.Strict (StateT, evalStateT, get, gets, lift, liftM, modify) +import Control.Monad.State.Strict (StateT, evalStateT, get, gets, lift, modify) import Data.ByteString.Base64 (encodeBase64) import Data.Char (isAscii, isControl, isSpace) import Data.Either (lefts, rights) @@ -28,7 +28,7 @@ import Data.List (intercalate) import Data.Text (Text) import qualified Data.Text as T import qualified Data.Text.Lazy as TL -import Text.Pandoc.Network.HTTP (urlEncode) +import Text.Pandoc.URI (urlEncode, isURI) import Text.Pandoc.XML.Light as X import Text.Pandoc.Class.PandocMonad (PandocMonad, report) @@ -37,7 +37,7 @@ import Text.Pandoc.Definition import Text.Pandoc.Error (PandocError(..)) import Text.Pandoc.Logging import Text.Pandoc.Options (HTMLMathMethod (..), WriterOptions (..), def) -import Text.Pandoc.Shared (capitalize, isURI, orderedListMarkers, +import Text.Pandoc.Shared (capitalize, orderedListMarkers, makeSections, tshow, stringify) import Text.Pandoc.Writers.Shared (lookupMetaString, toLegacyTable, ensureValidXmlIdentifiers) diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs index cdbf11e1eea7..3356e39fa8ff 100644 --- a/src/Text/Pandoc/Writers/HTML.hs +++ b/src/Text/Pandoc/Writers/HTML.hs @@ -1,4 +1,5 @@ {-# LANGUAGE LambdaCase #-} +{-# LANGUAGE BangPatterns #-} {-# LANGUAGE MultiWayIf #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE ScopedTypeVariables #-} @@ -29,9 +30,13 @@ module Text.Pandoc.Writers.HTML ( tagWithAttributes ) where import Control.Monad.State.Strict + ( StateT, MonadState(get), gets, modify, evalStateT ) +import Control.Monad ( liftM, when, foldM, unless ) +import Control.Monad.Trans ( MonadTrans(lift) ) import Data.Char (ord) import Data.List (intercalate, intersperse, partition, delete, (\\), foldl') import Data.List.NonEmpty (NonEmpty((:|))) +import Data.Containers.ListUtils (nubOrd) import Data.Maybe (fromMaybe, isJust, isNothing) import qualified Data.Set as Set import Data.Text (Text) @@ -43,11 +48,10 @@ import Text.DocLayout (render, literal, Doc) import Text.Blaze.Internal (MarkupM (Empty), customLeaf, customParent) import Text.DocTemplates (FromContext (lookupContext), Context (..)) import Text.Blaze.Html hiding (contents) -import Text.Pandoc.Translations (Term(Abstract)) import Text.Pandoc.CSS (cssAttributes) import Text.Pandoc.Definition -import Text.Pandoc.Highlighting (formatHtmlBlock, formatHtmlInline, highlight, - styleToCss) +import Text.Pandoc.Highlighting (formatHtmlBlock, formatHtml4Block, + formatHtmlInline, highlight, styleToCss) import Text.Pandoc.ImageSize import Text.Pandoc.Options import Text.Pandoc.Shared @@ -57,7 +61,7 @@ import Text.Pandoc.Walk import Text.Pandoc.Writers.Math import Text.Pandoc.Writers.Shared import qualified Text.Pandoc.Writers.AnnotatedTable as Ann -import Text.Pandoc.Network.HTTP (urlEncode) +import Text.Pandoc.URI (urlEncode) import Text.Pandoc.XML (escapeStringForXML, fromEntities, toEntities, html5Attributes, html4Attributes, rdfaAttributes) import qualified Text.Blaze.XHtml5 as H5 @@ -67,8 +71,8 @@ import System.FilePath (takeBaseName) import Text.Blaze.Html.Renderer.Text (renderHtml) import qualified Text.Blaze.XHtml1.Transitional as H import qualified Text.Blaze.XHtml1.Transitional.Attributes as A -import Text.Pandoc.Class.PandocMonad (PandocMonad, report, - translateTerm) +import Text.Pandoc.Class.PandocMonad (PandocMonad, report) +import Text.Pandoc.Translations (Term(Abstract), translateTerm) import Text.Pandoc.Class.PandocPure (runPure) import Text.Pandoc.Error import Text.Pandoc.Logging @@ -114,21 +118,20 @@ defaultWriterState = WriterState {stNotes= [], stEmittedNotes = 0, stMath = Fals strToHtml :: Text -> Html strToHtml t - | T.any isSpecial t = strToHtml' $ T.unpack t + | T.any isSpecial t = + let !x = foldl' go mempty $ T.groupBy samegroup t + in x | otherwise = toHtml t where - strToHtml' ('\'':xs) = preEscapedString "\'" `mappend` strToHtml' xs - strToHtml' ('"' :xs) = preEscapedString "\"" `mappend` strToHtml' xs - strToHtml' (x:xs) | needsVariationSelector x - = preEscapedString [x, '\xFE0E'] `mappend` - case xs of - ('\xFE0E':ys) -> strToHtml' ys - _ -> strToHtml' xs - strToHtml' xs@(_:_) = case break isSpecial xs of - (_ ,[]) -> toHtml xs - (ys,zs) -> toHtml ys `mappend` strToHtml' zs - strToHtml' [] = "" - isSpecial c = c == '\'' || c == '"' || needsVariationSelector c + samegroup c d = d == '\xFE0E' || not (isSpecial c || isSpecial d) + isSpecial '\'' = True + isSpecial '"' = True + isSpecial c = needsVariationSelector c + go h "\'" = h <> preEscapedString "\'" + go h "\"" = h <> preEscapedString "\"" + go h txt | T.length txt == 1 && T.all needsVariationSelector txt + = h <> preEscapedString (T.unpack txt <> "\xFE0E") + go h txt = h <> toHtml txt -- See #5469: this prevents iOS from substituting emojis. needsVariationSelector :: Char -> Bool @@ -545,9 +548,9 @@ footnoteSection refLocation startCounter notes = do = H5.section ! A.id "footnotes" ! A.class_ className ! customAttribute "epub:type" "footnotes" $ x - | html5 = H5.section ! A.id "footnotes" - ! A.class_ className - ! customAttribute "role" "doc-endnotes" + | html5 = H5.section ! A5.id "footnotes" + ! A5.class_ className + ! A5.role "doc-endnotes" $ x | slideVariant /= NoSlides = H.div ! A.class_ "footnotes slide" $ x | otherwise = H.div ! A.class_ className $ x @@ -679,9 +682,10 @@ attrsToHtml :: PandocMonad m => WriterOptions -> Attr -> StateT WriterState m [Attribute] attrsToHtml opts (id',classes',keyvals) = do attrs <- toAttrs keyvals + let classes'' = filter (not . T.null) classes' return $ [prefixedId opts id' | not (T.null id')] ++ - [A.class_ (toValue $ T.unwords classes') | not (null classes')] ++ attrs + [A.class_ (toValue $ T.unwords classes'') | not (null classes'')] ++ attrs imgAttrsToHtml :: PandocMonad m => WriterOptions -> Attr -> StateT WriterState m [Attribute] @@ -808,9 +812,11 @@ blockToHtmlInner opts (Div (ident, "section":dclasses, dkvs) let inDiv' zs = RawBlock (Format "html") ("
fragmentClass <> "\">") : (zs ++ [RawBlock (Format "html") "
"]) - let breakOnPauses zs = case splitBy isPause zs of + let breakOnPauses zs + | slide = case splitBy isPause zs of [] -> [] y:ys -> y ++ concatMap inDiv' ys + | otherwise = zs let (titleBlocks, innerSecs) = if titleSlide -- title slides have no content of their own @@ -827,7 +833,7 @@ blockToHtmlInner opts (Div (ident, "section":dclasses, dkvs) res <- blockListToHtml opts innerSecs modify $ \st -> st{ stInSection = inSection } return res - let classes' = ordNub $ + let classes' = nubOrd $ ["title-slide" | titleSlide] ++ ["slide" | slide] ++ ["section" | (slide || writerSectionDivs opts) && not html5 ] ++ @@ -878,8 +884,8 @@ blockToHtmlInner opts (Div attr@(ident, classes, kvs') bs) = do , k /= "width" || "column" `notElem` classes] ++ [("style", "width:" <> w <> ";") | "column" `elem` classes , ("width", w) <- kvs'] ++ - [("role", "doc-bibliography") | isCslBibBody && html5] ++ - [("role", "doc-biblioentry") | isCslBibEntry && html5] + [("role", "list") | isCslBibBody && html5] ++ + [("role", "listitem") | isCslBibEntry && html5] let speakerNotes = "notes" `elem` classes -- we don't want incremental output inside speaker notes, see #1394 let opts' = if | speakerNotes -> opts{ writerIncremental = False } @@ -913,7 +919,7 @@ blockToHtmlInner opts (Div attr@(ident, classes, kvs') bs) = do DZSlides -> do t <- addAttrs opts' attr $ H5.div contents' - return $ t ! H5.customAttribute "role" "note" + return $ t ! A5.role "note" NoSlides -> addAttrs opts' attr $ H.div contents' _ -> return mempty @@ -934,6 +940,7 @@ blockToHtmlInner _ HorizontalRule = do html5 <- gets stHtml5 return $ if html5 then H5.hr else H.hr blockToHtmlInner opts (CodeBlock (id',classes,keyvals) rawCode) = do + html5 <- gets stHtml5 id'' <- if T.null id' then do modify $ \st -> st{ stCodeBlockNum = stCodeBlockNum st + 1 } @@ -952,7 +959,8 @@ blockToHtmlInner opts (CodeBlock (id',classes,keyvals) rawCode) = do then T.unlines . map ("> " <>) . T.lines $ rawCode else rawCode hlCode = if isJust (writerHighlightStyle opts) - then highlight (writerSyntaxMap opts) formatHtmlBlock + then highlight (writerSyntaxMap opts) + (if html5 then formatHtmlBlock else formatHtml4Block) (id'',classes',keyvals) adjCode else Left "" case hlCode of @@ -1001,7 +1009,8 @@ blockToHtmlInner opts (Header level (ident,classes,kvs) lst) = do else [ (k, v) | (k, v) <- kvs , k `elem` (["lang", "dir", "title", "style" , "align"] ++ intrinsicEventsHTML4)] - addAttrs opts (ident,classes,kvs') + let classes' = if level > 6 then "heading":classes else classes + addAttrs opts (ident,classes',kvs') $ case level of 1 -> H.h1 contents' 2 -> H.h2 contents' @@ -1009,7 +1018,7 @@ blockToHtmlInner opts (Header level (ident,classes,kvs) lst) = do 4 -> H.h4 contents' 5 -> H.h5 contents' 6 -> H.h6 contents' - _ -> H.p ! A.class_ "heading" $ contents' + _ -> H.p contents' blockToHtmlInner opts (BulletList lst) = do contents <- mapM (listItemToHtml opts) lst let isTaskList = not (null lst) && all isTaskListItem lst @@ -1542,10 +1551,10 @@ inlineToHtml opts inline = do _ -> do report $ InlineNotRendered inline return mempty (Link attr txt (s,_)) | "mailto:" `T.isPrefixOf` s -> do - linkText <- inlineListToHtml opts txt + linkText <- inlineListToHtml opts (removeLinks txt) obfuscateLink opts attr linkText s (Link (ident,classes,kvs) txt (s,tit)) -> do - linkText <- inlineListToHtml opts txt + linkText <- inlineListToHtml opts (removeLinks txt) slideVariant <- gets stSlideVariant let s' = case T.uncons s of Just ('#',xs) -> let prefix = if slideVariant == RevealJsSlides @@ -1610,10 +1619,12 @@ inlineToHtml opts inline = do $ toHtml ref return $ case epubVersion of Just EPUB3 -> link ! customAttribute "epub:type" "noteref" - _ | html5 -> link ! H5.customAttribute - "role" "doc-noteref" + _ | html5 -> link ! A5.role "doc-noteref" _ -> link - (Cite cits il)-> do contents <- inlineListToHtml opts (walk addRoleToLink il) + (Cite cits il)-> do contents <- inlineListToHtml opts + (if html5 + then walk addRoleToLink il + else il) let citationIds = T.unwords $ map citationId cits let result = H.span ! A.class_ "citation" $ contents return $ if html5 @@ -1724,3 +1735,11 @@ isRawHtml f = do html5 <- gets stHtml5 return $ f == Format "html" || ((html5 && f == Format "html5") || f == Format "html4") + +-- We need to remove links from link text, because an
element is +-- not allowed inside another element. +removeLinks :: [Inline] -> [Inline] +removeLinks = walk go + where + go (Link attr ils _) = Span attr ils + go x = x diff --git a/src/Text/Pandoc/Writers/Haddock.hs b/src/Text/Pandoc/Writers/Haddock.hs index dfd89bc5439b..6f8594ff55c5 100644 --- a/src/Text/Pandoc/Writers/Haddock.hs +++ b/src/Text/Pandoc/Writers/Haddock.hs @@ -14,7 +14,9 @@ Conversion of 'Pandoc' documents to haddock markup. Haddock: -} module Text.Pandoc.Writers.Haddock (writeHaddock) where +import Control.Monad (zipWithM) import Control.Monad.State.Strict + ( StateT, MonadState(get), modify, evalStateT ) import Data.Char (isAlphaNum) import Data.Default import Data.Text (Text) @@ -25,6 +27,7 @@ import Text.Pandoc.Logging import Text.Pandoc.Options import Text.DocLayout import Text.Pandoc.Shared +import Text.Pandoc.URI import Text.Pandoc.Templates (renderTemplate) import Text.Pandoc.Writers.Shared diff --git a/src/Text/Pandoc/Writers/ICML.hs b/src/Text/Pandoc/Writers/ICML.hs index 7f0c4d5a78ba..7c39a99c4006 100644 --- a/src/Text/Pandoc/Writers/ICML.hs +++ b/src/Text/Pandoc/Writers/ICML.hs @@ -17,7 +17,9 @@ into InDesign with File -> Place. -} module Text.Pandoc.Writers.ICML (writeICML) where import Control.Monad.Except (catchError) +import Control.Monad (liftM2) import Control.Monad.State.Strict + ( MonadTrans(lift), StateT(runStateT), MonadState(state, get, put) ) import Data.List (intersperse) import Data.Maybe (fromMaybe, maybeToList) import qualified Data.Set as Set @@ -30,6 +32,7 @@ import Text.Pandoc.Logging import Text.Pandoc.Options import Text.DocLayout import Text.Pandoc.Shared +import Text.Pandoc.URI (isURI) import Text.Pandoc.Templates (renderTemplate) import Text.Pandoc.Writers.Math (texMathToInlines) import Text.Pandoc.Writers.Shared @@ -486,7 +489,7 @@ inlineToICML opts style _ (Span (ident, _, kvs) lst) = in inlinesToICML opts (dynamicStyle <> style) ident lst -- ident will be the id of the span, that we need to use down in the hyperlink setter -- if T.null ident --- then +-- then -- else do -- | Convert a list of block elements to an ICML footnote. @@ -559,7 +562,7 @@ makeLinkDest ident cont = vcat [ -- | Create the markup for the content (incl. named destinations) -- | NOTE: since we have no easy way to get actual named dests, we just create them for any short content blocks makeContent :: Text -> Doc Text -> Doc Text -makeContent ident cont +makeContent ident cont | isEmpty cont = empty | not (Text.null ident) = makeLinkDest ident cont | otherwise = inTagsSimple "Content" $ flush cont @@ -620,6 +623,13 @@ imageICML opts style attr (src, _) = do , selfClosingTag "PathPointType" [("Anchor", hw<>" -"<>hh), ("LeftDirection", hw<>" -"<>hh), ("RightDirection", hw<>" -"<>hh)] ] + img = if "data:" `Text.isPrefixOf` src' && "base64," `Text.isInfixOf` src' + then -- see #8398 + inTags True "Contents" [] $ + literal (" + Text.drop 1 (Text.dropWhile (/=',') src') <> "]]>") + else selfClosingTag "Link" [("Self", "ueb"), ("LinkResourceURI", src')] + image = inTags True "Image" [("Self","ue6"), ("ItemTransform", scale<>" -"<>hw<>" -"<>hh)] $ vcat [ @@ -629,7 +639,7 @@ imageICML opts style attr (src, _) = do , ("Right", showFl $ ow*ow / imgWidth) , ("Bottom", showFl $ oh*oh / imgHeight)] ] - , selfClosingTag "Link" [("Self", "ueb"), ("LinkResourceURI", src')] + , img ] doc = inTags True "CharacterStyleRange" attrs $ inTags True "Rectangle" [("Self","uec"), ("StrokeWeight", "0"), diff --git a/src/Text/Pandoc/Writers/Ipynb.hs b/src/Text/Pandoc/Writers/Ipynb.hs index fa6369e629f8..5add1f1751d9 100644 --- a/src/Text/Pandoc/Writers/Ipynb.hs +++ b/src/Text/Pandoc/Writers/Ipynb.hs @@ -14,7 +14,8 @@ Ipynb (Jupyter notebook JSON format) writer for pandoc. -} module Text.Pandoc.Writers.Ipynb ( writeIpynb ) where -import Control.Monad.State +import Control.Monad (foldM) +import Control.Monad.State ( StateT(runStateT), modify ) import qualified Data.Map as M import Data.Maybe (catMaybes, fromMaybe) import Text.Pandoc.Options @@ -29,7 +30,8 @@ import qualified Data.Text as T import qualified Data.Text.Lazy as TL import Data.Aeson as Aeson import qualified Text.Pandoc.UTF8 as UTF8 -import Text.Pandoc.Shared (safeRead, isURI) +import Text.Pandoc.Shared (safeRead) +import Text.Pandoc.URI (isURI) import Text.Pandoc.Writers.Shared (metaToContext') import Text.Pandoc.Writers.Markdown (writePlain, writeMarkdown) import qualified Data.Text.Encoding as TE diff --git a/src/Text/Pandoc/Writers/JATS.hs b/src/Text/Pandoc/Writers/JATS.hs index 7847f2bdad40..ddeadee2173b 100644 --- a/src/Text/Pandoc/Writers/JATS.hs +++ b/src/Text/Pandoc/Writers/JATS.hs @@ -22,6 +22,7 @@ module Text.Pandoc.Writers.JATS , writeJatsArticleAuthoring ) where import Control.Applicative ((<|>)) +import Control.Monad import Control.Monad.Reader import Control.Monad.State import Data.Generics (everywhere, mkT) @@ -41,6 +42,7 @@ import Text.Pandoc.Walk (walk) import Text.Pandoc.Options import Text.DocLayout import Text.Pandoc.Shared +import Text.Pandoc.URI import Text.Pandoc.Templates (renderTemplate) import Text.DocTemplates (Context(..), Val(..)) import Text.Pandoc.Writers.JATS.References (referencesToJATS) @@ -70,6 +72,7 @@ writeJatsArticleAuthoring = writeJats TagSetArticleAuthoring -- | Alias for @'writeJatsArchiving'@. This function exists for backwards -- compatibility, but will be deprecated in the future. Use -- @'writeJatsArchiving'@ instead. +{-# DEPRECATED writeJATS "Use writeJatsArchiving instead" #-} writeJATS :: PandocMonad m => WriterOptions -> Pandoc -> m Text writeJATS = writeJatsArchiving @@ -135,8 +138,11 @@ docToJATS opts (Pandoc meta blocks) = do formatTime defaultTimeLocale "%F" day) ] Just x -> x + title' <- inlinesToJATS opts $ map fixLineBreak + (lookupMetaInlines "title" meta) let context = defField "body" main $ defField "back" back + $ resetField "title" title' $ resetField "date" date $ defField "mathml" (case writerHTMLMathMethod opts of MathML -> True @@ -249,6 +255,12 @@ codeAttr opts (ident,classes,kvs) = (lang, attr) "platforms", "position", "specific-use"]] lang = languageFor opts classes +-- is only allowed as a direct child of or or +-- <article-title> +fixLineBreak :: Inline -> Inline +fixLineBreak LineBreak = RawInline (Format "jats") "<break/>" +fixLineBreak x = x + -- | Convert a Pandoc block element to JATS. blockToJATS :: PandocMonad m => WriterOptions -> Block -> JATS m (Doc Text) blockToJATS _ Null = return empty @@ -257,7 +269,7 @@ blockToJATS opts (Div (id',"section":_,kvs) (Header _lvl _ ils : xs)) = do | not (T.null id')] let otherAttrs = ["sec-type", "specific-use"] let attribs = idAttr ++ [(k,v) | (k,v) <- kvs, k `elem` otherAttrs] - title' <- inlinesToJATS opts ils + title' <- inlinesToJATS opts (map fixLineBreak ils) contents <- blocksToJATS opts xs return $ inTags True "sec" attribs $ inTagsSimple "title" title' $$ contents @@ -287,7 +299,7 @@ blockToJATS opts (Div (ident,_,kvs) bs) = do "content-type", "orientation", "position"]] return $ inTags True "boxed-text" attr contents blockToJATS opts (Header _ _ title) = do - title' <- inlinesToJATS opts title + title' <- inlinesToJATS opts (map fixLineBreak title) return $ inTagsSimple "title" title' -- No Plain, everything needs to be in a block-level tag blockToJATS opts (Plain lst) = blockToJATS opts (Para lst) diff --git a/src/Text/Pandoc/Writers/JATS/References.hs b/src/Text/Pandoc/Writers/JATS/References.hs index 720299f05ad6..4bbca71cbbfe 100644 --- a/src/Text/Pandoc/Writers/JATS/References.hs +++ b/src/Text/Pandoc/Writers/JATS/References.hs @@ -147,13 +147,11 @@ fourDigits :: Int -> Text fourDigits n = T.takeEnd 4 $ "000" <> tshow n toNameElements :: Name -> Doc Text -toNameElements name = - if not (isEmpty nameTags) - then inTags' "name" [] nameTags - else if nameLiteral name == Just "others" -- indicates an "et al." - then "<etal/>" - else nameLiteral name `inNameTag` "string-name" - where +toNameElements name + | not (isEmpty nameTags) = inTags' "name" [] nameTags + | nameLiteral name == Just "others" = "<etal/>" + | otherwise = nameLiteral name `inNameTag` "string-name" + where inNameTag mVal tag = case mVal of Nothing -> empty Just val -> inTags' tag [] . literal $ escapeStringForXML val diff --git a/src/Text/Pandoc/Writers/JATS/Table.hs b/src/Text/Pandoc/Writers/JATS/Table.hs index 56aef3b87cae..7f0c614d044f 100644 --- a/src/Text/Pandoc/Writers/JATS/Table.hs +++ b/src/Text/Pandoc/Writers/JATS/Table.hs @@ -234,8 +234,13 @@ tableCellToJats :: PandocMonad m tableCellToJats opts ctype colAlign (Cell attr align rowspan colspan item) = do blockToJats <- asks jatsBlockWriter inlinesToJats <- asks jatsInlinesWriter + let fixBreak LineBreak = RawInline (Format "jats") "<break/>" + fixBreak x = x let cellContents = \case - [Plain inlines] -> inlinesToJats opts inlines + [Plain inlines] -> inlinesToJats opts + (map fixBreak inlines) + -- Note: <break/> is allowed only as a direct + -- child of <td>, so we don't use walk. blocks -> blockToJats needsWrapInCell opts blocks let tag' = case ctype of BodyCell -> "td" diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs index 2090e7bcd10f..ed5b1973df51 100644 --- a/src/Text/Pandoc/Writers/LaTeX.hs +++ b/src/Text/Pandoc/Writers/LaTeX.hs @@ -20,6 +20,16 @@ module Text.Pandoc.Writers.LaTeX ( , writeBeamer ) where import Control.Monad.State.Strict + ( MonadState(get, put), + gets, + modify, + evalStateT ) +import Control.Monad + ( MonadPlus(mplus), + liftM, + when, + unless ) +import Data.Containers.ListUtils (nubOrd) import Data.Char (isDigit) import Data.List (intersperse, (\\)) import Data.Maybe (catMaybes, fromMaybe, isJust, mapMaybe, isNothing) @@ -37,6 +47,7 @@ import Text.Pandoc.Logging import Text.Pandoc.Options import Text.DocLayout import Text.Pandoc.Shared +import Text.Pandoc.URI import Text.Pandoc.Slides import Text.Pandoc.Walk (query, walk, walkM) import Text.Pandoc.Writers.LaTeX.Caption (getCaption) @@ -67,12 +78,12 @@ writeBeamer options document = pandocToLaTeX :: PandocMonad m => WriterOptions -> Pandoc -> LW m Text pandocToLaTeX options (Pandoc meta blocks) = do - -- Strip off final 'references' header if --natbib or --biblatex + -- Strip off 'references' header if --natbib or --biblatex let method = writerCiteMethod options + let isRefsDiv (Div ("refs",_,_) _) = True + isRefsDiv _ = False let blocks' = if method == Biblatex || method == Natbib - then case reverse blocks of - Div ("refs",_,_) _:xs -> reverse xs - _ -> blocks + then filter (not . isRefsDiv) blocks else blocks -- see if there are internal links let isInternalLink (Link _ _ (s,_)) @@ -123,7 +134,7 @@ pandocToLaTeX options (Pandoc meta blocks) = do titleMeta <- stringToLaTeX TextString $ stringify $ docTitle meta authorsMeta <- mapM (stringToLaTeX TextString . stringify) $ docAuthors meta docLangs <- catMaybes <$> - mapM (toLang . Just) (ordNub (query (extract "lang") blocks)) + mapM (toLang . Just) (nubOrd (query (extract "lang") blocks)) let hasStringValue x = isJust (getField x metadata :: Maybe (Doc Text)) let geometryFromMargins = mconcat $ intersperse ("," :: Doc Text) $ mapMaybe (\(x,y) -> @@ -165,6 +176,7 @@ pandocToLaTeX options (Pandoc meta blocks) = do defField "numbersections" (writerNumberSections options) $ defField "lhs" (stLHS st) $ defField "graphics" (stGraphics st) $ + defField "svg" (stSVG st) $ defField "has-chapters" (stHasChapters st) $ defField "has-frontmatter" (documentClass `elem` frontmatterClasses) $ defField "listings" (writerListings options || stLHS st) $ @@ -208,9 +220,9 @@ pandocToLaTeX options (Pandoc meta blocks) = do maybe id (\l -> defField "lang" (literal $ renderLang l)) mblang $ maybe id (\l -> defField "babel-lang" - (literal $ toBabel l)) mblang + (literal l)) (mblang >>= toBabel) $ defField "babel-otherlangs" - (map (literal . toBabel) docLangs) + (map literal $ mapMaybe toBabel docLangs) $ defField "latex-dir-rtl" ((render Nothing <$> getField "dir" context) == Just ("rtl" :: Text)) context @@ -282,8 +294,8 @@ blockToLaTeX (Div (identifier,"slide":dclasses,dkvs) hasCodeBlock _ = [] let hasCode (Code _ _) = [True] hasCode _ = [] - let classes = ordNub $ dclasses ++ hclasses - let kvs = ordNub $ dkvs ++ hkvs + let classes = nubOrd $ dclasses ++ hclasses + let kvs = nubOrd $ dkvs ++ hkvs let fragile = "fragile" `elem` classes || not (null $ query hasCodeBlock bs ++ query hasCode bs) let frameoptions = ["allowdisplaybreaks", "allowframebreaks", "fragile", @@ -738,10 +750,9 @@ inlineToLaTeX (Span (id',classes,kvs) ils) = do kvToCmd ("dir","ltr") = Just "LR" kvToCmd _ = Nothing langCmds = - case lang of - Just lng -> let l = toBabel lng - in ["foreignlanguage{" <> l <> "}"] - Nothing -> [] + case lang >>= toBabel of + Just l -> ["foreignlanguage{" <> l <> "}"] + Nothing -> [] let cmds = mapMaybe classToCmd classes ++ mapMaybe kvToCmd kvs ++ langCmds contents <- inlineListToLaTeX ils return $ @@ -935,7 +946,9 @@ inlineToLaTeX il@(Image _ _ (src, _)) return empty inlineToLaTeX (Image attr@(_,_,kvs) _ (source, _)) = do setEmptyLine False - modify $ \s -> s{ stGraphics = True } + let isSVG = ".svg" `T.isSuffixOf` source || ".SVG" `T.isSuffixOf` source + modify $ \s -> s{ stGraphics = True + , stSVG = stSVG s || isSVG } opts <- gets stOptions let showDim dir = let d = text (show dir) <> "=" in case dimension dir attr of @@ -959,7 +972,7 @@ inlineToLaTeX (Image attr@(_,_,kvs) _ (source, _)) = do optList = showDim Width <> showDim Height <> maybe [] (\x -> ["page=" <> literal x]) (lookup "page" kvs) <> maybe [] (\x -> ["trim=" <> literal x]) (lookup "trim" kvs) <> - maybe [] (\_ -> ["clip"]) (lookup "clip" kvs) + maybe [] (const ["clip"]) (lookup "clip" kvs) options = if null optList then empty else brackets $ mconcat (intersperse "," optList) @@ -969,7 +982,8 @@ inlineToLaTeX (Image attr@(_,_,kvs) _ (source, _)) = do source'' <- stringToLaTeX URLString source' inHeading <- gets stInHeading return $ - (if inHeading then "\\protect\\includegraphics" else "\\includegraphics") <> + (if inHeading then "\\protect" else "") <> + (if isSVG then "\\includesvg" else "\\includegraphics") <> options <> braces (literal source'') inlineToLaTeX (Note contents) = do setEmptyLine False diff --git a/src/Text/Pandoc/Writers/LaTeX/Lang.hs b/src/Text/Pandoc/Writers/LaTeX/Lang.hs index 3c2bfd262e23..60a2ea8d83d1 100644 --- a/src/Text/Pandoc/Writers/LaTeX/Lang.hs +++ b/src/Text/Pandoc/Writers/LaTeX/Lang.hs @@ -21,125 +21,125 @@ import Text.Collate.Lang (Lang(..)) -- http://mirrors.ctan.org/macros/latex/required/babel/base/babel.pdf -- List of supported languages (slightly outdated): -- http://tug.ctan.org/language/hyph-utf8/doc/generic/hyph-utf8/hyphenation.pdf -toBabel :: Lang -> Text +toBabel :: Lang -> Maybe Text toBabel (Lang "de" _ (Just "AT") vars _ _) - | "1901" `elem` vars = "austrian" - | otherwise = "naustrian" + | "1901" `elem` vars = Just "austrian" + | otherwise = Just "naustrian" toBabel (Lang "de" _ (Just "CH") vars _ _) - | "1901" `elem` vars = "swissgerman" - | otherwise = "nswissgerman" + | "1901" `elem` vars = Just "swissgerman" + | otherwise = Just "nswissgerman" toBabel (Lang "de" _ _ vars _ _) - | "1901" `elem` vars = "german" - | otherwise = "ngerman" -toBabel (Lang "dsb" _ _ _ _ _) = "lowersorbian" + | "1901" `elem` vars = Just "german" + | otherwise = Just "ngerman" +toBabel (Lang "dsb" _ _ _ _ _) = Just "lowersorbian" toBabel (Lang "el" _ _ vars _ _) - | "polyton" `elem` vars = "polutonikogreek" -toBabel (Lang "en" _ (Just "AU") _ _ _) = "australian" -toBabel (Lang "en" _ (Just "CA") _ _ _) = "canadian" -toBabel (Lang "en" _ (Just "GB") _ _ _) = "british" -toBabel (Lang "en" _ (Just "NZ") _ _ _) = "newzealand" -toBabel (Lang "en" _ (Just "UK") _ _ _) = "british" -toBabel (Lang "en" _ (Just "US") _ _ _) = "american" -toBabel (Lang "fr" _ (Just "CA") _ _ _) = "canadien" + | "polyton" `elem` vars = Just "polutonikogreek" +toBabel (Lang "en" _ (Just "AU") _ _ _) = Just "australian" +toBabel (Lang "en" _ (Just "CA") _ _ _) = Just "canadian" +toBabel (Lang "en" _ (Just "GB") _ _ _) = Just "british" +toBabel (Lang "en" _ (Just "NZ") _ _ _) = Just "newzealand" +toBabel (Lang "en" _ (Just "UK") _ _ _) = Just "british" +toBabel (Lang "en" _ (Just "US") _ _ _) = Just "american" +toBabel (Lang "fr" _ (Just "CA") _ _ _) = Just "canadien" toBabel (Lang "fra" _ _ vars _ _) - | "aca" `elem` vars = "acadian" -toBabel (Lang "grc" _ _ _ _ _) = "ancientgreek" -toBabel (Lang "hsb" _ _ _ _ _) = "uppersorbian" + | "aca" `elem` vars = Just "acadian" +toBabel (Lang "grc" _ _ _ _ _) = Just "ancientgreek" +toBabel (Lang "hsb" _ _ _ _ _) = Just "uppersorbian" toBabel (Lang "la" _ _ vars _ _) - | "x-classic" `elem` vars = "classiclatin" -toBabel (Lang "pt" _ (Just "BR") _ _ _) = "brazilian" -toBabel (Lang "sl" _ _ _ _ _) = "slovene" + | "x-classic" `elem` vars = Just "classiclatin" +toBabel (Lang "pt" _ (Just "BR") _ _ _) = Just "brazilian" +toBabel (Lang "sl" _ _ _ _ _) = Just "slovene" toBabel x = commonFromBcp47 x -- Takes a list of the constituents of a BCP47 language code -- and converts it to a string shared by Babel and Polyglossia. -- https://tools.ietf.org/html/bcp47#section-2.1 -commonFromBcp47 :: Lang -> Text -commonFromBcp47 (Lang "sr" (Just "Cyrl") _ _ _ _) = "serbianc" +commonFromBcp47 :: Lang -> Maybe Text +commonFromBcp47 (Lang "sr" (Just "Cyrl") _ _ _ _) = Just "serbianc" commonFromBcp47 (Lang "zh" (Just "Latn") _ vars _ _) - | "pinyin" `elem` vars = "pinyin" + | "pinyin" `elem` vars = Just "pinyin" commonFromBcp47 (Lang l _ _ _ _ _) = fromIso l where - fromIso "af" = "afrikaans" - fromIso "am" = "amharic" - fromIso "ar" = "arabic" - fromIso "as" = "assamese" - fromIso "ast" = "asturian" - fromIso "bg" = "bulgarian" - fromIso "bn" = "bengali" - fromIso "bo" = "tibetan" - fromIso "br" = "breton" - fromIso "ca" = "catalan" - fromIso "cy" = "welsh" - fromIso "cs" = "czech" - fromIso "cop" = "coptic" - fromIso "da" = "danish" - fromIso "dv" = "divehi" - fromIso "el" = "greek" - fromIso "en" = "english" - fromIso "eo" = "esperanto" - fromIso "es" = "spanish" - fromIso "et" = "estonian" - fromIso "eu" = "basque" - fromIso "fa" = "farsi" - fromIso "fi" = "finnish" - fromIso "fr" = "french" - fromIso "fur" = "friulan" - fromIso "ga" = "irish" - fromIso "gd" = "scottish" - fromIso "gez" = "ethiopic" - fromIso "gl" = "galician" - fromIso "gu" = "gujarati" - fromIso "he" = "hebrew" - fromIso "hi" = "hindi" - fromIso "hr" = "croatian" - fromIso "hu" = "magyar" - fromIso "hy" = "armenian" - fromIso "ia" = "interlingua" - fromIso "id" = "indonesian" - fromIso "ie" = "interlingua" - fromIso "is" = "icelandic" - fromIso "it" = "italian" - fromIso "ja" = "japanese" - fromIso "km" = "khmer" - fromIso "kmr" = "kurmanji" - fromIso "kn" = "kannada" - fromIso "ko" = "korean" - fromIso "la" = "latin" - fromIso "lo" = "lao" - fromIso "lt" = "lithuanian" - fromIso "lv" = "latvian" - fromIso "ml" = "malayalam" - fromIso "mn" = "mongolian" - fromIso "mr" = "marathi" - fromIso "nb" = "norsk" - fromIso "nl" = "dutch" - fromIso "nn" = "nynorsk" - fromIso "no" = "norsk" - fromIso "nqo" = "nko" - fromIso "oc" = "occitan" - fromIso "or" = "oriya" - fromIso "pa" = "punjabi" - fromIso "pl" = "polish" - fromIso "pms" = "piedmontese" - fromIso "pt" = "portuguese" - fromIso "rm" = "romansh" - fromIso "ro" = "romanian" - fromIso "ru" = "russian" - fromIso "sa" = "sanskrit" - fromIso "se" = "samin" - fromIso "sk" = "slovak" - fromIso "sq" = "albanian" - fromIso "sr" = "serbian" - fromIso "sv" = "swedish" - fromIso "syr" = "syriac" - fromIso "ta" = "tamil" - fromIso "te" = "telugu" - fromIso "th" = "thai" - fromIso "ti" = "ethiopic" - fromIso "tk" = "turkmen" - fromIso "tr" = "turkish" - fromIso "uk" = "ukrainian" - fromIso "ur" = "urdu" - fromIso "vi" = "vietnamese" - fromIso _ = "" + fromIso "af" = Just "afrikaans" + fromIso "am" = Just "amharic" + fromIso "ar" = Just "arabic" + fromIso "as" = Just "assamese" + fromIso "ast" = Just "asturian" + fromIso "bg" = Just "bulgarian" + fromIso "bn" = Just "bengali" + fromIso "bo" = Just "tibetan" + fromIso "br" = Just "breton" + fromIso "ca" = Just "catalan" + fromIso "cy" = Just "welsh" + fromIso "cs" = Just "czech" + fromIso "cop" = Just "coptic" + fromIso "da" = Just "danish" + fromIso "dv" = Just "divehi" + fromIso "el" = Just "greek" + fromIso "en" = Just "english" + fromIso "eo" = Just "esperanto" + fromIso "es" = Just "spanish" + fromIso "et" = Just "estonian" + fromIso "eu" = Just "basque" + fromIso "fa" = Just "farsi" + fromIso "fi" = Just "finnish" + fromIso "fr" = Just "french" + fromIso "fur" = Just "friulan" + fromIso "ga" = Just "irish" + fromIso "gd" = Just "scottish" + fromIso "gez" = Just "ethiopic" + fromIso "gl" = Just "galician" + fromIso "gu" = Just "gujarati" + fromIso "he" = Just "hebrew" + fromIso "hi" = Just "hindi" + fromIso "hr" = Just "croatian" + fromIso "hu" = Just "magyar" + fromIso "hy" = Just "armenian" + fromIso "ia" = Just "interlingua" + fromIso "id" = Just "indonesian" + fromIso "ie" = Just "interlingua" + fromIso "is" = Just "icelandic" + fromIso "it" = Just "italian" + fromIso "ja" = Just "japanese" + fromIso "km" = Just "khmer" + fromIso "kmr" = Just "kurmanji" + fromIso "kn" = Just "kannada" + fromIso "ko" = Just "korean" + fromIso "la" = Just "latin" + fromIso "lo" = Just "lao" + fromIso "lt" = Just "lithuanian" + fromIso "lv" = Just "latvian" + fromIso "ml" = Just "malayalam" + fromIso "mn" = Just "mongolian" + fromIso "mr" = Just "marathi" + fromIso "nb" = Just "norsk" + fromIso "nl" = Just "dutch" + fromIso "nn" = Just "nynorsk" + fromIso "no" = Just "norsk" + fromIso "nqo" = Just "nko" + fromIso "oc" = Just "occitan" + fromIso "or" = Just "oriya" + fromIso "pa" = Just "punjabi" + fromIso "pl" = Just "polish" + fromIso "pms" = Just "piedmontese" + fromIso "pt" = Just "portuguese" + fromIso "rm" = Just "romansh" + fromIso "ro" = Just "romanian" + fromIso "ru" = Just "russian" + fromIso "sa" = Just "sanskrit" + fromIso "se" = Just "samin" + fromIso "sk" = Just "slovak" + fromIso "sq" = Just "albanian" + fromIso "sr" = Just "serbian" + fromIso "sv" = Just "swedish" + fromIso "syr" = Just "syriac" + fromIso "ta" = Just "tamil" + fromIso "te" = Just "telugu" + fromIso "th" = Just "thai" + fromIso "ti" = Just "ethiopic" + fromIso "tk" = Just "turkmen" + fromIso "tr" = Just "turkish" + fromIso "uk" = Just "ukrainian" + fromIso "ur" = Just "urdu" + fromIso "vi" = Just "vietnamese" + fromIso _ = Nothing diff --git a/src/Text/Pandoc/Writers/LaTeX/Table.hs b/src/Text/Pandoc/Writers/LaTeX/Table.hs index 4fbebd1be1f0..72c20b66e00a 100644 --- a/src/Text/Pandoc/Writers/LaTeX/Table.hs +++ b/src/Text/Pandoc/Writers/LaTeX/Table.hs @@ -14,7 +14,8 @@ Output LaTeX formatted tables. module Text.Pandoc.Writers.LaTeX.Table ( tableToLaTeX ) where -import Control.Monad.State.Strict +import Control.Monad.State.Strict ( gets, modify ) +import Control.Monad (when) import Data.List (intersperse) import qualified Data.List.NonEmpty as NonEmpty import Data.List.NonEmpty (NonEmpty ((:|))) @@ -49,31 +50,42 @@ tableToLaTeX inlnsToLaTeX blksToLaTeX tbl = do let removeNote (Note _) = Span ("", [], []) [] removeNote x = x let colCount = ColumnCount $ length specs - firsthead <- if isEmpty capt || isEmptyHead thead - then return empty - else ($$ text "\\endfirsthead") <$> - headToLaTeX blksToLaTeX colCount thead - head' <- if isEmptyHead thead - then return "\\toprule()" - -- avoid duplicate notes in head and firsthead: - else headToLaTeX blksToLaTeX colCount - (if isEmpty firsthead - then thead - else walk removeNote thead) + -- The first head is not repeated on the following pages. If we were to just + -- use a single head, without a separate first head, then the caption would be + -- repeated on all pages that contain a part of the table. We avoid this by + -- making the caption part of the first head. The downside is that we must + -- duplicate the header rows for this. + head' <- do + let mkHead = headToLaTeX blksToLaTeX colCount + case (not $ isEmpty capt, not $ isEmptyHead thead) of + (False, False) -> return "\\toprule()" + (False, True) -> mkHead thead + (True, False) -> return (capt $$ "\\toprule()" $$ "\\endfirsthead") + (True, True) -> do + -- avoid duplicate notes in head and firsthead: + firsthead <- mkHead thead + repeated <- mkHead (walk removeNote thead) + return $ capt $$ firsthead $$ "\\endfirsthead" $$ repeated rows' <- mapM (rowToLaTeX blksToLaTeX colCount BodyCell) $ - mconcat (map bodyRows tbodies) <> footRows tfoot + mconcat (map bodyRows tbodies) + foot' <- if isEmptyFoot tfoot + then pure empty + else do + lastfoot <- mapM (rowToLaTeX blksToLaTeX colCount BodyCell) $ + footRows tfoot + pure $ "\\midrule()" $$ vcat lastfoot modify $ \s -> s{ stTable = True } notes <- notesToLaTeX <$> gets stNotes return $ "\\begin{longtable}[]" <> braces ("@{}" <> colDescriptors tbl <> "@{}") -- the @{} removes extra space at beginning and end - $$ capt - $$ firsthead $$ head' $$ "\\endhead" - $$ vcat rows' + $$ foot' $$ "\\bottomrule()" + $$ "\\endlastfoot" + $$ vcat rows' $$ "\\end{longtable}" $$ captNotes $$ notes @@ -205,6 +217,10 @@ isEmptyHead :: Ann.TableHead -> Bool isEmptyHead (Ann.TableHead _attr []) = True isEmptyHead (Ann.TableHead _attr rows) = all (null . headerRowCells) rows +isEmptyFoot :: Ann.TableFoot -> Bool +isEmptyFoot (Ann.TableFoot _attr []) = True +isEmptyFoot (Ann.TableFoot _attr rows) = all (null . headerRowCells) rows + -- | Gets all cells in a header row. headerRowCells :: Ann.HeaderRow -> [Ann.Cell] headerRowCells (Ann.HeaderRow _attr _rownum cells) = cells diff --git a/src/Text/Pandoc/Writers/LaTeX/Types.hs b/src/Text/Pandoc/Writers/LaTeX/Types.hs index c06b7e923082..ff5b22cad898 100644 --- a/src/Text/Pandoc/Writers/LaTeX/Types.hs +++ b/src/Text/Pandoc/Writers/LaTeX/Types.hs @@ -35,6 +35,7 @@ data WriterState = , stStrikeout :: Bool -- ^ true if document has strikeout , stUrl :: Bool -- ^ true if document has visible URL link , stGraphics :: Bool -- ^ true if document contains images + , stSVG :: Bool -- ^ true if document contains SVGs , stLHS :: Bool -- ^ true if document has literate haskell code , stHasChapters :: Bool -- ^ true if document has chapters , stCsquotes :: Bool -- ^ true if document uses csquotes @@ -66,6 +67,7 @@ startingState options = , stStrikeout = False , stUrl = False , stGraphics = False + , stSVG = False , stLHS = False , stHasChapters = case writerTopLevelDivision options of TopLevelPart -> True diff --git a/src/Text/Pandoc/Writers/LaTeX/Util.hs b/src/Text/Pandoc/Writers/LaTeX/Util.hs index a0d207a70023..1db4983a13dc 100644 --- a/src/Text/Pandoc/Writers/LaTeX/Util.hs +++ b/src/Text/Pandoc/Writers/LaTeX/Util.hs @@ -37,7 +37,7 @@ import qualified Data.Text as T import Text.Pandoc.Extensions (Extension(Ext_smart)) import Data.Char (isLetter, isSpace, isDigit, isAscii, ord, isAlphaNum) import Text.Printf (printf) -import Text.Pandoc.Shared (safeRead, elemText) +import Text.Pandoc.Shared (safeRead) import qualified Data.Text.Normalize as Normalize import Data.List (uncons) @@ -50,7 +50,7 @@ data StringContext = TextString stringToLaTeX :: PandocMonad m => StringContext -> Text -> LW m Text stringToLaTeX context zs = do opts <- gets stOptions - when ('\x200c' `elemText` zs) $ + when (T.any (== '\x200c') zs) $ modify (\s -> s { stZwnj = True }) return $ T.pack $ foldr (go opts context) mempty $ T.unpack $ @@ -182,7 +182,7 @@ toLabel z = go `fmap` stringToLaTeX URLString z where go = T.concatMap $ \x -> case x of _ | (isLetter x || isDigit x) && isAscii x -> T.singleton x - | x `elemText` "_-+=:;." -> T.singleton x + | T.any (== x) "_-+=:;." -> T.singleton x | otherwise -> T.pack $ "ux" <> printf "%x" (ord x) -- | Puts contents into LaTeX command. @@ -237,9 +237,8 @@ wrapDiv (_,classes,kvs) t = do Just "rtl" -> align "RTL" Just "ltr" -> align "LTR" _ -> id - wrapLang txt = case lang of - Just lng -> let l = toBabel lng - in inCmd "begin" "otherlanguage" + wrapLang txt = case lang >>= toBabel of + Just l -> inCmd "begin" "otherlanguage" <> (braces (literal l)) $$ blankline <> txt <> blankline $$ inCmd "end" "otherlanguage" diff --git a/src/Text/Pandoc/Writers/Man.hs b/src/Text/Pandoc/Writers/Man.hs index 9371c25414d5..4e1651e53816 100644 --- a/src/Text/Pandoc/Writers/Man.hs +++ b/src/Text/Pandoc/Writers/Man.hs @@ -14,7 +14,9 @@ Conversion of 'Pandoc' documents to roff man page format. -} module Text.Pandoc.Writers.Man ( writeMan ) where -import Control.Monad.State.Strict +import Control.Monad ( liftM, zipWithM, forM ) +import Control.Monad.State.Strict ( StateT, gets, modify, evalStateT ) +import Control.Monad.Trans (MonadTrans(lift)) import Data.List (intersperse) import Data.List.NonEmpty (nonEmpty) import Data.Maybe (fromMaybe) @@ -27,6 +29,7 @@ import Text.Pandoc.Logging import Text.Pandoc.Options import Text.DocLayout import Text.Pandoc.Shared +import Text.Pandoc.URI import Text.Pandoc.Templates (renderTemplate) import Text.Pandoc.Writers.Math import Text.Pandoc.Writers.Shared @@ -73,7 +76,7 @@ pandocToMan opts (Pandoc meta blocks) = do $ setFieldsFromTitle $ defField "has-tables" hasTables $ defField "hyphenate" True - $ defField "pandoc-version" pandocVersion metadata + metadata return $ render colwidth $ case writerTemplate opts of Nothing -> main diff --git a/src/Text/Pandoc/Writers/Markdown.hs b/src/Text/Pandoc/Writers/Markdown.hs index f6a207991642..ce508f08d5bd 100644 --- a/src/Text/Pandoc/Writers/Markdown.hs +++ b/src/Text/Pandoc/Writers/Markdown.hs @@ -2,6 +2,7 @@ {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE TupleSections #-} +{-# LANGUAGE BangPatterns #-} {- | Module : Text.Pandoc.Writers.Markdown Copyright : Copyright (C) 2006-2022 John MacFarlane @@ -20,8 +21,9 @@ module Text.Pandoc.Writers.Markdown ( writeCommonMark, writeMarkua, writePlain) where -import Control.Monad.Reader -import Control.Monad.State.Strict +import Control.Monad (foldM, zipWithM, MonadPlus(..), when) +import Control.Monad.Reader ( asks, MonadReader(local) ) +import Control.Monad.State.Strict ( gets, modify ) import Data.Default import Data.List (intersperse, sortOn) import Data.List.NonEmpty (nonEmpty, NonEmpty(..)) @@ -29,6 +31,7 @@ import qualified Data.Map as M import Data.Maybe (fromMaybe, mapMaybe, isNothing) import qualified Data.Set as Set import Data.Text (Text) +import Data.Char (isSpace) import qualified Data.Text as T import Text.HTML.TagSoup (Tag (..), isTagText, parseTags) import Text.Pandoc.Class.PandocMonad (PandocMonad, report) @@ -309,7 +312,7 @@ classOrAttrsToMarkdown ("",[cls],[]) = literal cls classOrAttrsToMarkdown attrs = attrsToMarkdown attrs -- | Ordered list start parser for use in Para below. -olMarker :: Parser Text ParserState () +olMarker :: Parsec Text ParserState () olMarker = do (start, style', delim) <- anyOrderedListMarker if delim == Period && (style' == UpperAlpha || (style' == UpperRoman && @@ -396,7 +399,7 @@ blockToMarkdown' opts (Div attrs ils) = do blockToMarkdown' opts (Plain inlines) = do -- escape if para starts with ordered list marker variant <- asks envVariant - let escapeMarker = T.concatMap $ \x -> if x `elemText` ".()" + let escapeMarker = T.concatMap $ \x -> if T.any (== x) ".()" then T.pack ['\\', x] else T.singleton x let startsWithSpace (Space:_) = True @@ -448,6 +451,8 @@ blockToMarkdown' opts b@(RawBlock f str) = do Commonmark | f `elem` ["gfm", "commonmark", "commonmark_x", "markdown"] -> return $ literal str <> literal "\n" + | f `elem` ["html", "html5", "html4"] + -> return $ literal (removeBlankLinesInHTML str) <> literal "\n" Markdown | f `elem` ["markdown", "markdown_github", "markdown_phpextra", "markdown_mmd", "markdown_strict"] @@ -631,7 +636,7 @@ blockToMarkdown' opts t@(Table _ blkCapt specs thead tbody tfoot) = do (id,) <$> pipeTable opts (all null headers) aligns' widths' rawHeaders rawRows | isEnabled Ext_raw_html opts -> fmap (id,) $ - literal <$> + literal . removeBlankLinesInHTML <$> writeHtml5String opts{ writerTemplate = Nothing } (Pandoc nullMeta [t]) | otherwise -> return (id, literal "[TABLE]") return $ nst (tbl $$ caption'') $$ blankline @@ -822,3 +827,12 @@ lineBreakToSpace :: Inline -> Inline lineBreakToSpace LineBreak = Space lineBreakToSpace SoftBreak = Space lineBreakToSpace x = x + +removeBlankLinesInHTML :: Text -> Text +removeBlankLinesInHTML = T.pack . go False . T.unpack + where go _ [] = [] + go True ('\n':cs) = " " <> go False cs + go False ('\n':cs) = '\n' : go True cs + go !afternewline (!c:cs) + | isSpace c = c : go afternewline cs + | otherwise = c : go False cs diff --git a/src/Text/Pandoc/Writers/Markdown/Inline.hs b/src/Text/Pandoc/Writers/Markdown/Inline.hs index 4e98530b1b3e..c424a69c1167 100644 --- a/src/Text/Pandoc/Writers/Markdown/Inline.hs +++ b/src/Text/Pandoc/Writers/Markdown/Inline.hs @@ -16,8 +16,11 @@ module Text.Pandoc.Writers.Markdown.Inline ( attrsToMarkdown, attrsToMarkua ) where +import Control.Monad (when, liftM2) import Control.Monad.Reader + ( asks, MonadReader(local) ) import Control.Monad.State.Strict + ( MonadState(get), gets, modify ) import Data.Char (isAlphaNum, isDigit) import Data.List (find, intersperse) import Data.List.NonEmpty (nonEmpty) @@ -32,7 +35,7 @@ import Text.Pandoc.Options import Text.Pandoc.Parsing hiding (blankline, blanklines, char, space) import Text.DocLayout import Text.Pandoc.Shared -import Text.Pandoc.Network.HTTP (urlEncode) +import Text.Pandoc.URI (urlEncode, escapeURI, isURI) import Text.Pandoc.Writers.Shared import Text.Pandoc.Walk import Text.Pandoc.Writers.HTML (writeHtml5String) @@ -117,8 +120,8 @@ attrsToMarkdown attribs = braces $ hsep [attribId, attribClasses, attribKeys] attribClasses = case attribs of (_,[],_) -> empty (_,cs,_) -> hsep $ - map (escAttr . ("."<>)) - cs + map (escAttr . ("."<>)) $ + filter (not . T.null) cs attribKeys = case attribs of (_,_,[]) -> empty (_,_,ks) -> hsep $ diff --git a/src/Text/Pandoc/Writers/Markdown/Types.hs b/src/Text/Pandoc/Writers/Markdown/Types.hs index 2e94597be3ee..48eace5176ff 100644 --- a/src/Text/Pandoc/Writers/Markdown/Types.hs +++ b/src/Text/Pandoc/Writers/Markdown/Types.hs @@ -1,4 +1,3 @@ -{-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Writers.Markdown.Types Copyright : Copyright (C) 2006-2022 John MacFarlane @@ -78,5 +77,3 @@ instance Default WriterState , stIds = Set.empty , stNoteNum = 1 } - - diff --git a/src/Text/Pandoc/Writers/MediaWiki.hs b/src/Text/Pandoc/Writers/MediaWiki.hs index f4bdf239cd72..89f65715cdf0 100644 --- a/src/Text/Pandoc/Writers/MediaWiki.hs +++ b/src/Text/Pandoc/Writers/MediaWiki.hs @@ -26,6 +26,7 @@ import Text.Pandoc.Logging import Text.Pandoc.Options import Text.DocLayout (render, literal) import Text.Pandoc.Shared +import Text.Pandoc.URI import Text.Pandoc.Templates (renderTemplate) import Text.Pandoc.Writers.Shared import Text.Pandoc.XML (escapeStringForXML) diff --git a/src/Text/Pandoc/Writers/Ms.hs b/src/Text/Pandoc/Writers/Ms.hs index 28b2c438c375..77f22fc9e9e5 100644 --- a/src/Text/Pandoc/Writers/Ms.hs +++ b/src/Text/Pandoc/Writers/Ms.hs @@ -21,6 +21,9 @@ TODO: module Text.Pandoc.Writers.Ms ( writeMs ) where import Control.Monad.State.Strict + ( gets, modify, evalStateT ) +import Control.Monad ( MonadPlus(mplus), liftM, unless, forM ) +import Data.Containers.ListUtils (nubOrd) import Data.Char (isAscii, isLower, isUpper, ord) import Data.List (intercalate, intersperse) import Data.List.NonEmpty (nonEmpty) @@ -76,7 +79,6 @@ pandocToMs opts (Pandoc meta blocks) = do let context = defField "body" main $ defField "has-inline-math" hasInlineMath $ defField "hyphenate" True - $ defField "pandoc-version" pandocVersion $ defField "toc" (writerTableOfContents opts) $ defField "title-meta" titleMeta $ defField "author-meta" (T.intercalate "; " authorsMeta) @@ -454,7 +456,7 @@ inlineToMs opts (Math InlineMath str) = do Left il -> inlineToMs opts il Right r -> return $ literal "@" <> literal r <> literal "@" inlineToMs opts (Math DisplayMath str) = do - res <- convertMath writeEqn InlineMath str + res <- convertMath writeEqn DisplayMath str case res of Left il -> do contents <- inlineToMs opts il @@ -567,7 +569,7 @@ styleToMs sty = vcat $ colordefs <> map (toMacro sty) alltoktypes colordefs = map toColorDef allcolors toColorDef c = literal (".defcolor " <> hexColor c <> " rgb #" <> hexColor c) - allcolors = catMaybes $ ordNub $ + allcolors = catMaybes $ nubOrd $ [defaultColor sty, backgroundColor sty, lineNumberColor sty, lineNumberBackgroundColor sty] <> concatMap (colorsForToken. snd) (Map.toList (tokenStyles sty)) diff --git a/src/Text/Pandoc/Writers/Muse.hs b/src/Text/Pandoc/Writers/Muse.hs index f3eecceb84c7..803394212713 100644 --- a/src/Text/Pandoc/Writers/Muse.hs +++ b/src/Text/Pandoc/Writers/Muse.hs @@ -25,9 +25,12 @@ However, @\<literal style="html">@ tag is used for HTML raw blocks even though it is supported only in Emacs Muse. -} module Text.Pandoc.Writers.Muse (writeMuse) where +import Control.Monad (zipWithM) import Control.Monad.Except (throwError) import Control.Monad.Reader + ( asks, MonadReader(local), ReaderT(runReaderT) ) import Control.Monad.State.Strict + ( StateT, gets, modify, evalStateT ) import Data.Char (isAlphaNum, isAsciiLower, isAsciiUpper, isDigit, isSpace) import Data.Default import Data.List (intersperse, transpose) @@ -43,6 +46,7 @@ import Text.Pandoc.ImageSize import Text.Pandoc.Options import Text.DocLayout import Text.Pandoc.Shared +import Text.Pandoc.URI import Text.Pandoc.Templates (renderTemplate) import Text.Pandoc.Writers.Math import Text.Pandoc.Writers.Shared diff --git a/src/Text/Pandoc/Writers/Native.hs b/src/Text/Pandoc/Writers/Native.hs index b83ce16889db..dded397ca90b 100644 --- a/src/Text/Pandoc/Writers/Native.hs +++ b/src/Text/Pandoc/Writers/Native.hs @@ -1,4 +1,3 @@ -{-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Writers.Native Copyright : Copyright (C) 2006-2022 John MacFarlane diff --git a/src/Text/Pandoc/Writers/ODT.hs b/src/Text/Pandoc/Writers/ODT.hs index cafa48c0bef2..4025b4ab2c54 100644 --- a/src/Text/Pandoc/Writers/ODT.hs +++ b/src/Text/Pandoc/Writers/ODT.hs @@ -13,6 +13,7 @@ Conversion of 'Pandoc' documents to ODT. -} module Text.Pandoc.Writers.ODT ( writeODT ) where import Codec.Archive.Zip +import Control.Monad import Control.Monad.Except (catchError, throwError) import Control.Monad.State.Strict import qualified Data.ByteString.Lazy as B @@ -27,6 +28,7 @@ import System.FilePath (takeDirectory, takeExtension, (<.>)) import Text.Collate.Lang (Lang (..), renderLang) import Text.Pandoc.Class.PandocMonad (PandocMonad, report, toLang) import qualified Text.Pandoc.Class.PandocMonad as P +import Text.Pandoc.Data (readDataFile) import Text.Pandoc.Definition import Text.Pandoc.Error (PandocError(..)) import Text.Pandoc.ImageSize @@ -34,7 +36,8 @@ import Text.Pandoc.Logging import Text.Pandoc.MIME (extensionFromMimeType, getMimeType) import Text.Pandoc.Options (WrapOption (..), WriterOptions (..)) import Text.DocLayout -import Text.Pandoc.Shared (stringify, pandocVersion, tshow) +import Text.Pandoc.Shared (stringify, tshow) +import Text.Pandoc.Version (pandocVersionText) import Text.Pandoc.Writers.Shared (lookupMetaString, lookupMetaBlocks, fixDisplayMath, getLang, ensureValidXmlIdentifiers) @@ -45,6 +48,7 @@ import Text.Pandoc.XML import Text.Pandoc.XML.Light import Text.TeXMath import qualified Text.XML.Light as XL +import Network.URI (parseRelativeReference, URI(uriPath)) newtype ODTState = ODTState { stEntries :: [Entry] } @@ -59,10 +63,25 @@ writeODT :: PandocMonad m writeODT opts doc = let initState = ODTState{ stEntries = [] } - doc' = ensureValidXmlIdentifiers doc + doc' = fixInternalLinks . ensureValidXmlIdentifiers $ doc in evalStateT (pandocToODT opts doc') initState +-- | ODT internal links are evaluated relative to an imaginary folder +-- structure that mirrors the zip structure. The result is that relative +-- links in the document need to start with `..`. See #3524. +fixInternalLinks :: Pandoc -> Pandoc +fixInternalLinks = walk go + where + go (Link attr ils (src,tit)) = + Link attr ils (fixRel src,tit) + go x = x + fixRel uri = + case parseRelativeReference (T.unpack uri) of + Just u + | not (null (uriPath u)) -> tshow $ u{ uriPath = "../" <> uriPath u } + _ -> uri + -- | Produce an ODT file from a Pandoc document. pandocToODT :: PandocMonad m => WriterOptions -- ^ Writer options @@ -77,7 +96,7 @@ pandocToODT opts doc@(Pandoc meta _) = do case writerReferenceDoc opts of Just f -> liftM toArchive $ lift $ P.readFileLazy f Nothing -> lift $ toArchive . B.fromStrict <$> - P.readDataFile "reference.odt" + readDataFile "reference.odt" -- handle formulas and pictures -- picEntriesRef <- P.newIORef ([] :: [Entry]) doc' <- walkM (transformPicMath opts) $ walk fixDisplayMath doc @@ -139,7 +158,7 @@ pandocToODT opts doc@(Pandoc meta _) = do ,("xmlns:ooo","http://openoffice.org/2004/office") ,("xmlns:grddl","http://www.w3.org/2003/g/data-view#") ,("office:version","1.2")] ( inTags True "office:meta" [] - ( metaTag "meta:generator" ("Pandoc/" <> pandocVersion) + ( metaTag "meta:generator" ("Pandoc/" <> pandocVersionText) $$ metaTag "dc:title" (stringify title) $$ diff --git a/src/Text/Pandoc/Writers/OOXML.hs b/src/Text/Pandoc/Writers/OOXML.hs index 0c6cdab839e2..98bde20dfbe4 100644 --- a/src/Text/Pandoc/Writers/OOXML.hs +++ b/src/Text/Pandoc/Writers/OOXML.hs @@ -24,7 +24,7 @@ module Text.Pandoc.Writers.OOXML ( mknode ) where import Codec.Archive.Zip -import Control.Monad.Reader +import Control.Monad (mplus) import Control.Monad.Except (throwError) import Text.Pandoc.Error import qualified Data.ByteString as B diff --git a/src/Text/Pandoc/Writers/OpenDocument.hs b/src/Text/Pandoc/Writers/OpenDocument.hs index 8af64969b27a..f7142b7858ff 100644 --- a/src/Text/Pandoc/Writers/OpenDocument.hs +++ b/src/Text/Pandoc/Writers/OpenDocument.hs @@ -15,7 +15,8 @@ Conversion of 'Pandoc' documents to OpenDocument XML. -} module Text.Pandoc.Writers.OpenDocument ( writeOpenDocument ) where import Control.Arrow ((***), (>>>)) -import Control.Monad.State.Strict hiding (when) +import Control.Monad (unless, liftM) +import Control.Monad.State.Strict ( StateT(..), modify, gets, lift ) import Data.Char (chr) import Data.Foldable (find) import Data.List (sortOn, sortBy, foldl') @@ -26,8 +27,8 @@ import qualified Data.Set as Set import Data.Text (Text) import qualified Data.Text as T import Text.Collate.Lang (Lang (..), parseLang) -import Text.Pandoc.Class.PandocMonad (PandocMonad, report, translateTerm, - setTranslations, toLang) +import Text.Pandoc.Class.PandocMonad (PandocMonad, report, toLang) +import Text.Pandoc.Translations (translateTerm, setTranslations) import Text.Pandoc.Definition import qualified Text.Pandoc.Builder as B import Text.Pandoc.Logging diff --git a/src/Text/Pandoc/Writers/Org.hs b/src/Text/Pandoc/Writers/Org.hs index 936e2aa87c4d..080759ebdb90 100644 --- a/src/Text/Pandoc/Writers/Org.hs +++ b/src/Text/Pandoc/Writers/Org.hs @@ -16,9 +16,11 @@ Conversion of 'Pandoc' documents to Emacs Org-Mode. Org-Mode: <http://orgmode.org> -} module Text.Pandoc.Writers.Org (writeOrg) where +import Control.Monad (zipWithM) import Control.Monad.State.Strict + ( StateT, gets, modify, evalStateT ) import Data.Char (isAlphaNum, isDigit) -import Data.List (intersect, intersperse, partition, transpose) +import Data.List (intersperse, partition, transpose) import Data.List.NonEmpty (nonEmpty) import Data.Text (Text) import qualified Data.Text as T @@ -29,6 +31,7 @@ import Text.Pandoc.Logging import Text.Pandoc.Options import Text.DocLayout import Text.Pandoc.Shared +import Text.Pandoc.URI import Text.Pandoc.Templates (renderTemplate) import Text.Pandoc.Citeproc.Locator (parseLocator, LocatorMap(..), LocatorInfo(..)) import Text.Pandoc.Writers.Shared @@ -166,12 +169,11 @@ blockToOrg (CodeBlock (ident,classes,kvs) str) = do then " +n" <> startnum else " -n" <> startnum else "" - let at = map pandocLangToOrg classes `intersect` orgLangIdentifiers - let lang = case at of + let lang = case filter (`notElem` ["example","code"]) classes of [] -> Nothing l:_ -> if "code" `elem` classes -- check for ipynb code cell - then Just ("jupyter-" <> l) - else Just l + then Just ("jupyter-" <> pandocLangToOrg l) + else Just (pandocLangToOrg l) let args = mconcat $ [ " :" <> k <> " " <> v | (k, v) <- kvs, k `notElem` ["startFrom", "org-language"]] @@ -514,8 +516,8 @@ orgPath src = case T.uncons src of isUrl :: Text -> Bool isUrl cs = let (scheme, path) = T.break (== ':') cs - in T.all (\c -> isAlphaNum c || c `elemText` ".-") scheme - && not (T.null path) + in T.all (\c -> isAlphaNum c || T.any (== c) ".-") scheme + && not (T.null path) -- | Translate from pandoc's programming language identifiers to those used by -- org-mode. @@ -528,53 +530,6 @@ pandocLangToOrg cs = "bash" -> "sh" _ -> cs --- | List of language identifiers recognized by org-mode. --- See <https://orgmode.org/manual/Languages.html>. -orgLangIdentifiers :: [Text] -orgLangIdentifiers = - [ "asymptote" - , "lisp" - , "awk" - , "lua" - , "C" - , "matlab" - , "C++" - , "mscgen" - , "clojure" - , "ocaml" - , "css" - , "octave" - , "D" - , "org" - , "ditaa" - , "oz" - , "calc" - , "perl" - , "emacs-lisp" - , "plantuml" - , "eshell" - , "processing" - , "fortran" - , "python" - , "gnuplot" - , "R" - , "screen" - , "ruby" - , "dot" - , "sass" - , "haskell" - , "scheme" - , "java" - , "sed" - , "js" - , "sh" - , "latex" - , "sql" - , "ledger" - , "sqlite" - , "lilypond" - , "vala" ] - -- taken from oc-csl.el in the org source tree: locmap :: LocatorMap locmap = LocatorMap $ M.fromList diff --git a/src/Text/Pandoc/Writers/Powerpoint/Output.hs b/src/Text/Pandoc/Writers/Powerpoint/Output.hs index 9b14b231100d..d35930c7b63e 100644 --- a/src/Text/Pandoc/Writers/Powerpoint/Output.hs +++ b/src/Text/Pandoc/Writers/Powerpoint/Output.hs @@ -21,9 +21,12 @@ Text.Pandoc.Writers.Powerpoint.Presentation) to a zip archive. module Text.Pandoc.Writers.Powerpoint.Output ( presentationToArchive ) where +import Control.Monad ( MonadPlus(mplus), foldM, unless ) import Control.Monad.Except (throwError, catchError) import Control.Monad.Reader + ( asks, MonadReader(local), ReaderT(runReaderT) ) import Control.Monad.State + ( StateT, gets, modify, evalStateT ) import Codec.Archive.Zip import Data.List (intercalate, stripPrefix, nub, union, isPrefixOf, intersperse) import Data.Bifunctor (bimap) @@ -47,6 +50,7 @@ import qualified Text.Pandoc.UTF8 as UTF8 import Text.Pandoc.Class.PandocMonad (PandocMonad) import Text.Pandoc.Error (PandocError(..)) import qualified Text.Pandoc.Class.PandocMonad as P +import Text.Pandoc.Data (readDataFile, readDefaultDataFile) import Text.Pandoc.Options import Text.Pandoc.MIME import qualified Data.ByteString.Lazy as BL @@ -572,11 +576,11 @@ presentationToArchive :: PandocMonad m => WriterOptions -> Meta -> Presentation -> m Archive presentationToArchive opts meta pres = do distArchive <- toArchive . BL.fromStrict <$> - P.readDefaultDataFile "reference.pptx" + readDefaultDataFile "reference.pptx" refArchive <- case writerReferenceDoc opts of Just f -> toArchive <$> P.readFileLazy f Nothing -> toArchive . BL.fromStrict <$> - P.readDataFile "reference.pptx" + readDataFile "reference.pptx" let (referenceLayouts, defaultReferenceLayouts) = (getLayoutsFromArchive refArchive, getLayoutsFromArchive distArchive) diff --git a/src/Text/Pandoc/Writers/Powerpoint/Presentation.hs b/src/Text/Pandoc/Writers/Powerpoint/Presentation.hs index fd6b83120752..49f4f656f623 100644 --- a/src/Text/Pandoc/Writers/Powerpoint/Presentation.hs +++ b/src/Text/Pandoc/Writers/Powerpoint/Presentation.hs @@ -43,7 +43,7 @@ module Text.Pandoc.Writers.Powerpoint.Presentation ( documentToPresentation , LinkTarget(..) ) where - +import Control.Monad import Control.Monad.Reader import Control.Monad.State import Data.List (intercalate) diff --git a/src/Text/Pandoc/Writers/RST.hs b/src/Text/Pandoc/Writers/RST.hs index 021674b34f23..6c166f279672 100644 --- a/src/Text/Pandoc/Writers/RST.hs +++ b/src/Text/Pandoc/Writers/RST.hs @@ -14,8 +14,12 @@ Conversion of 'Pandoc' documents to reStructuredText. reStructuredText: <http://docutils.sourceforge.net/rst.html> -} module Text.Pandoc.Writers.RST ( writeRST, flatten ) where -import Control.Monad.State.Strict -import Data.Char (isSpace) +import Control.Monad.State.Strict ( StateT, gets, modify, evalStateT ) +import Control.Monad (zipWithM, liftM) +import Data.Char (isSpace, generalCategory, isAscii, isAlphaNum, + GeneralCategory( + ClosePunctuation, OpenPunctuation, InitialQuote, + FinalQuote, DashPunctuation, OtherPunctuation)) import Data.List (transpose, intersperse, foldl') import qualified Data.List.NonEmpty as NE import Data.Maybe (fromMaybe) @@ -29,6 +33,7 @@ import Text.Pandoc.Logging import Text.Pandoc.Options import Text.DocLayout import Text.Pandoc.Shared +import Text.Pandoc.URI import Text.Pandoc.Templates (renderTemplate) import Text.Pandoc.Writers.Shared import Text.Pandoc.Walk @@ -160,25 +165,62 @@ pictToRST (label, (attr, src, _, mbtarget)) = do -- | Escape special characters for RST. escapeText :: WriterOptions -> Text -> Text -escapeText o = T.pack . escapeString' True o . T.unpack -- This ought to be parser - where - escapeString' _ _ [] = [] - escapeString' firstChar opts (c:cs) = - case c of - '\\' -> '\\':c:escapeString' False opts cs - _ | c `elemText` "`*_|" && - (firstChar || null cs) -> '\\':c:escapeString' False opts cs - '\'' | isEnabled Ext_smart opts -> '\\':'\'':escapeString' False opts cs - '"' | isEnabled Ext_smart opts -> '\\':'"':escapeString' False opts cs - '-' | isEnabled Ext_smart opts -> - case cs of - '-':_ -> '\\':'-':escapeString' False opts cs - _ -> '-':escapeString' False opts cs - '.' | isEnabled Ext_smart opts -> - case cs of - '.':'.':rest -> '\\':'.':'.':'.':escapeString' False opts rest - _ -> '.':escapeString' False opts cs - _ -> c : escapeString' False opts cs +escapeText opts t = + if T.any isSpecial t + then T.pack . escapeString' True . T.unpack $ t + else t -- optimization + where + isSmart = isEnabled Ext_smart opts + isSpecial c = c == '\\' || c == '_' || c == '`' || c == '*' || c == '|' + || (isSmart && (c == '-' || c == '.' || c == '"' || c == '\'')) + canFollowInlineMarkup c = c == '-' || c == '.' || c == ',' || c == ':' + || c == ';' || c == '!' || c == '?' || c == '\'' + || c == '"' || c == ')' || c == ']' || c == '}' + || c == '>' || isSpace c + || (not (isAscii c) && + generalCategory c `elem` + [OpenPunctuation, InitialQuote, FinalQuote, + DashPunctuation, OtherPunctuation]) + canPrecedeInlineMarkup c = c == '-' || c == ':' || c == '/' || c == '\'' + || c == '"' || c == '<' || c == '(' || c == '[' + || c == '{' || isSpace c + || (not (isAscii c) && + generalCategory c `elem` + [ClosePunctuation, InitialQuote, FinalQuote, + DashPunctuation, OtherPunctuation]) + escapeString' canStart cs = + case cs of + [] -> [] + d:ds + | d == '\\' + -> '\\' : d : escapeString' False ds + '\'':ds + | isSmart + -> '\\' : '\'' : escapeString' True ds + '"':ds + | isSmart + -> '\\' : '"' : escapeString' True ds + '-':'-':ds + | isSmart + -> '\\' : '-' : escapeString' False ('-':ds) + '.':'.':'.':ds + | isSmart + -> '\\' : '.' : escapeString' False ('.':'.':ds) + [e] + | e == '*' || e == '_' || e == '|' || e == '`' + -> ['\\',e] + d:ds + | canPrecedeInlineMarkup d + -> d : escapeString' True ds + e:d:ds + | e == '*' || e == '_' || e == '|' || e == '`' + , (not canStart && canFollowInlineMarkup d) + || (canStart && not (isSpace d)) + -> '\\' : e : escapeString' False (d:ds) + '_':d:ds + | not (isAlphaNum d) + -> '\\' : '_' : escapeString' False (d:ds) + d:ds -> d : escapeString' False ds titleToRST :: PandocMonad m => [Inline] -> [Inline] -> RST m (Doc Text) titleToRST [] _ = return empty @@ -311,7 +353,7 @@ blockToRST (CodeBlock (_,classes,kvs) str) = do blockToRST (BlockQuote blocks) = do contents <- blockListToRST blocks return $ nest 3 contents <> blankline -blockToRST (Table _ blkCapt specs thead tbody tfoot) = do +blockToRST (Table _attrs blkCapt specs thead tbody tfoot) = do let (caption, aligns, widths, headers, rows) = toLegacyTable blkCapt specs thead tbody tfoot caption' <- inlineListToRST caption let blocksToDoc opts bs = do @@ -321,18 +363,23 @@ blockToRST (Table _ blkCapt specs thead tbody tfoot) = do modify $ \st -> st{ stOptions = oldOpts } return result opts <- gets stOptions - let isSimple = all (== 0) widths && length widths > 1 - tbl <- if isSimple - then do - tbl' <- simpleTable opts blocksToDoc headers rows - if offset tbl' > writerColumns opts - then gridTable opts blocksToDoc (all null headers) - (map (const AlignDefault) aligns) widths - headers rows - else return tbl' - else gridTable opts blocksToDoc (all null headers) - (map (const AlignDefault) aligns) widths - headers rows + let renderGrid = gridTable opts blocksToDoc (all null headers) + (map (const AlignDefault) aligns) widths + headers rows + isSimple = all (== 0) widths && length widths > 1 + renderSimple = do + tbl' <- simpleTable opts blocksToDoc headers rows + if offset tbl' > writerColumns opts + then renderGrid + else return tbl' + isList = writerListTables opts + renderList = tableToRSTList caption (map (const AlignDefault) aligns) + widths headers rows + rendered + | isList = renderList + | isSimple = renderSimple + | otherwise = renderGrid + tbl <- rendered return $ blankline $$ (if null caption then tbl @@ -438,6 +485,79 @@ blockListToRST :: PandocMonad m -> RST m (Doc Text) blockListToRST = blockListToRST' False +{- + +http://docutils.sourceforge.net/docs/ref/rst/restructuredtext.html#directives + +According to the terminology used in the spec, a marker includes a +final whitespace and a block includes the directive arguments. Here +the variable names have slightly different meanings because we don't +want to finish the line with a space if there are no arguments, it +would produce rST that differs from what users expect in a way that's +not easy to detect + +-} +toRSTDirective :: Doc Text -> Doc Text -> [(Doc Text, Doc Text)] -> Doc Text -> Doc Text +toRSTDirective typ args options content = marker <> spaceArgs <> cr <> block + where marker = ".. " <> typ <> "::" + block = nest 3 (fieldList $$ + blankline $$ + content $$ + blankline) + spaceArgs = if isEmpty args then "" else " " <> args + -- a field list could end up being an empty doc thus being + -- omitted by $$ + fieldList = foldl ($$) "" $ map joinField options + -- a field body can contain multiple lines + joinField (name, body) = ":" <> name <> ": " <> body + +tableToRSTList :: PandocMonad m + => [Inline] + -> [Alignment] + -> [Double] + -> [[Block]] + -> [[[Block]]] + -> RST m (Doc Text) +tableToRSTList caption _ propWidths headers rows = do + captionRST <- inlineListToRST caption + opts <- gets stOptions + content <- listTableContent toWrite + pure $ toRSTDirective "list-table" captionRST (directiveOptions opts) content + where directiveOptions opts = widths (writerColumns opts) propWidths <> + headerRows + toWrite = if noHeaders then rows else headers:rows + headerRows = [("header-rows", text $ show (1 :: Int)) | not noHeaders] + widths tot pro = [("widths", showWidths tot pro) | + not (null propWidths || all (==0.0) propWidths)] + noHeaders = all null headers + -- >>> showWidths 70 [0.5, 0.5] + -- "35 35" + showWidths :: Int -> [Double] -> Doc Text + showWidths tot = text . unwords . map (show . toColumns tot) + -- toColumns converts a width expressed as a proportion of the + -- total into a width expressed as a number of columns + toColumns :: Int -> Double -> Int + toColumns t p = round (p * fromIntegral t) + listTableContent :: PandocMonad m => [[[Block]]] -> RST m (Doc Text) + listTableContent = joinTable joinDocsM joinDocsM . + mapTable blockListToRST + -- joinDocsM adapts joinDocs in order to work in the `RST m` monad + joinDocsM :: PandocMonad m => [RST m (Doc Text)] -> RST m (Doc Text) + joinDocsM = fmap joinDocs . sequence + -- joinDocs will be used to join cells and to join rows + joinDocs :: [Doc Text] -> Doc Text + joinDocs items = blankline $$ + (chomp . vcat . map formatItem) items $$ + blankline + formatItem :: Doc Text -> Doc Text + formatItem i = hang 3 "- " (i <> cr) + -- apply a function to all table cells changing their type + mapTable :: (a -> b) -> [[a]] -> [[b]] + mapTable = map . map + -- function hor to join cells and function ver to join rows + joinTable :: ([a] -> a) -> ([a] -> a) -> [[a]] -> a + joinTable hor ver = ver . map hor + transformInlines :: [Inline] -> [Inline] transformInlines = insertBS . filter hasContents . @@ -507,14 +627,14 @@ transformInlines = insertBS . okAfterComplex SoftBreak = True okAfterComplex LineBreak = True okAfterComplex (Str (T.uncons -> Just (c,_))) - = isSpace c || c `elemText` "-.,:;!?\\/'\")]}>–—" + = isSpace c || T.any (== c) "-.,:;!?\\/'\")]}>–—" okAfterComplex _ = False okBeforeComplex :: Inline -> Bool okBeforeComplex Space = True okBeforeComplex SoftBreak = True okBeforeComplex LineBreak = True okBeforeComplex (Str (T.unsnoc -> Just (_,c))) - = isSpace c || c `elemText` "-:/'\"<([{–—" + = isSpace c || T.any (== c) "-:/'\"<([{–—" okBeforeComplex _ = False isComplex :: Inline -> Bool isComplex (Emph _) = True @@ -672,7 +792,7 @@ inlineToRST (Code _ str) = do -- we use :literal: when the code contains backticks, since -- :literal: allows backslash-escapes; see #3974 return $ - if '`' `elemText` str + if T.any (== '`') str then ":literal:`" <> literal (escapeText opts (trim str)) <> "`" else "``" <> literal (trim str) <> "``" inlineToRST (Str str) = do @@ -685,7 +805,7 @@ inlineToRST (Math t str) = do modify $ \st -> st{ stHasMath = True } return $ if t == InlineMath then ":math:`" <> literal str <> "`" - else if '\n' `elemText` str + else if T.any (== '\n') str then blankline $$ ".. math::" $$ blankline $$ nest 3 (literal str) $$ blankline else blankline $$ (".. math:: " <> literal str) $$ blankline diff --git a/src/Text/Pandoc/Writers/RTF.hs b/src/Text/Pandoc/Writers/RTF.hs index ff0f4cef2345..ad91b46ece42 100644 --- a/src/Text/Pandoc/Writers/RTF.hs +++ b/src/Text/Pandoc/Writers/RTF.hs @@ -134,7 +134,7 @@ handleUnicode = T.concatMap $ \c -> where surrogate x = not ( (0x0000 <= ord x && ord x <= 0xd7ff) || (0xe000 <= ord x && ord x <= 0xffff) ) - enc x = "\\u" <> tshow (ord x) <> "?" + enc x = "\\u" <> tshow (ord x) <> " ?" -- | Escape special characters. escapeSpecial :: Text -> Text diff --git a/src/Text/Pandoc/Writers/Shared.hs b/src/Text/Pandoc/Writers/Shared.hs index 64cd85997a00..2b109c65d0fa 100644 --- a/src/Text/Pandoc/Writers/Shared.hs +++ b/src/Text/Pandoc/Writers/Shared.hs @@ -245,8 +245,9 @@ gridTable opts blocksToDoc headless aligns widths headers rows = do -- the number of columns will be used in case of even widths let numcols = maximum (length aligns :| length widths : map length (headers:rows)) - let officialWidthsInChars widths' = map ( - (\x -> if x < 1 then 1 else x) . + let officialWidthsInChars :: [Double] -> [Int] + officialWidthsInChars widths' = map ( + (max 1) . (\x -> x - 3) . floor . (fromIntegral (writerColumns opts) *) ) widths' @@ -453,6 +454,7 @@ sectionToListItem opts (Div (ident,_,_) then headerText' else [Link ("toc-" <> ident, [], []) headerText' ("#" <> ident, "")] listContents = filter (not . null) $ map (sectionToListItem opts) subsecs +sectionToListItem opts (Div _ [d@Div{}]) = sectionToListItem opts d -- #8402 sectionToListItem _ _ = [] -- | Returns 'True' iff the list of blocks has a @'Plain'@ as its last diff --git a/src/Text/Pandoc/Writers/TEI.hs b/src/Text/Pandoc/Writers/TEI.hs index c67005ec2d10..835dbf1ea8cc 100644 --- a/src/Text/Pandoc/Writers/TEI.hs +++ b/src/Text/Pandoc/Writers/TEI.hs @@ -22,6 +22,7 @@ import Text.Pandoc.Logging import Text.Pandoc.Options import Text.DocLayout import Text.Pandoc.Shared +import Text.Pandoc.URI import Text.Pandoc.Templates (renderTemplate) import Text.Pandoc.Writers.Shared import Text.Pandoc.XML diff --git a/src/Text/Pandoc/Writers/Texinfo.hs b/src/Text/Pandoc/Writers/Texinfo.hs index cef1d0c47835..63f0065a9ce4 100644 --- a/src/Text/Pandoc/Writers/Texinfo.hs +++ b/src/Text/Pandoc/Writers/Texinfo.hs @@ -12,8 +12,10 @@ Conversion of 'Pandoc' format into Texinfo. -} module Text.Pandoc.Writers.Texinfo ( writeTexinfo ) where +import Control.Monad (zipWithM) import Control.Monad.Except (throwError) import Control.Monad.State.Strict + ( StateT, MonadState(get), gets, modify, evalStateT ) import Data.Char (chr, ord, isAlphaNum) import Data.List (maximumBy, transpose, foldl') import Data.List.NonEmpty (nonEmpty) @@ -31,6 +33,7 @@ import Text.Pandoc.Logging import Text.Pandoc.Options import Text.DocLayout import Text.Pandoc.Shared +import Text.Pandoc.URI import Text.Pandoc.Templates (renderTemplate) import Text.Pandoc.Writers.Shared import Text.Printf (printf) diff --git a/src/Text/Pandoc/Writers/Textile.hs b/src/Text/Pandoc/Writers/Textile.hs index f27a38b08e48..44c5df289a6c 100644 --- a/src/Text/Pandoc/Writers/Textile.hs +++ b/src/Text/Pandoc/Writers/Textile.hs @@ -13,7 +13,8 @@ Conversion of 'Pandoc' documents to Textile markup. Textile: <http://thresholdstate.com/articles/4312/the-textile-reference-manual> -} module Text.Pandoc.Writers.Textile ( writeTextile ) where -import Control.Monad.State.Strict +import Control.Monad (zipWithM, liftM) +import Control.Monad.State.Strict ( StateT, gets, modify, evalStateT ) import Data.Char (isSpace) import Data.Text (Text) import qualified Data.Text as T @@ -380,37 +381,37 @@ inlineToTextile opts (Span _ lst) = inlineToTextile opts (Emph lst) = do contents <- inlineListToTextile opts lst - return $ if '_' `elemText` contents + return $ if T.any (== '_') contents then "<em>" <> contents <> "</em>" else "_" <> contents <> "_" inlineToTextile opts (Underline lst) = do contents <- inlineListToTextile opts lst - return $ if '+' `elemText` contents + return $ if T.any (== '+') contents then "<u>" <> contents <> "</u>" else "+" <> contents <> "+" inlineToTextile opts (Strong lst) = do contents <- inlineListToTextile opts lst - return $ if '*' `elemText` contents + return $ if T.any (== '*') contents then "<strong>" <> contents <> "</strong>" else "*" <> contents <> "*" inlineToTextile opts (Strikeout lst) = do contents <- inlineListToTextile opts lst - return $ if '-' `elemText` contents + return $ if T.any (== '-') contents then "<del>" <> contents <> "</del>" else "-" <> contents <> "-" inlineToTextile opts (Superscript lst) = do contents <- inlineListToTextile opts lst - return $ if '^' `elemText` contents + return $ if T.any (== '^') contents then "<sup>" <> contents <> "</sup>" else "[^" <> contents <> "^]" inlineToTextile opts (Subscript lst) = do contents <- inlineListToTextile opts lst - return $ if '~' `elemText` contents + return $ if T.any (== '~') contents then "<sub>" <> contents <> "</sub>" else "[~" <> contents <> "~]" @@ -427,7 +428,7 @@ inlineToTextile opts (Quoted DoubleQuote lst) = do inlineToTextile opts (Cite _ lst) = inlineListToTextile opts lst inlineToTextile _ (Code _ str) = - return $ if '@' `elemText` str + return $ if T.any (== '@') str then "<tt>" <> escapeStringForXML str <> "</tt>" else "@" <> str <> "@" diff --git a/src/Text/Pandoc/Writers/XWiki.hs b/src/Text/Pandoc/Writers/XWiki.hs index c352356507cb..f3389d0fdd17 100644 --- a/src/Text/Pandoc/Writers/XWiki.hs +++ b/src/Text/Pandoc/Writers/XWiki.hs @@ -42,8 +42,11 @@ import Text.Pandoc.Definition import Text.Pandoc.Logging import Text.Pandoc.Options import Text.Pandoc.Shared +import Text.Pandoc.URI import Text.Pandoc.Writers.MediaWiki (highlightingLangs) -import Text.Pandoc.Writers.Shared (toLegacyTable) +import Text.Pandoc.Templates (renderTemplate) +import Text.Pandoc.Writers.Shared (defField, metaToContext, toLegacyTable) +import Text.DocLayout (render, literal) newtype WriterState = WriterState { listLevel :: Text -- String at the beginning of items @@ -53,9 +56,19 @@ type XWikiReader m = ReaderT WriterState m -- | Convert Pandoc to XWiki. writeXWiki :: PandocMonad m => WriterOptions -> Pandoc -> m Text -writeXWiki _ (Pandoc _ blocks) = +writeXWiki opts (Pandoc meta blocks) = do let env = WriterState { listLevel = "" } - in runReaderT (blockListToXWiki blocks) env + metadata <- metaToContext opts + (fmap (literal . trimr) . (\bs -> runReaderT (blockListToXWiki bs) env)) + (fmap (literal . trimr) . (\is -> runReaderT (inlineListToXWiki is) env)) + meta + body <- runReaderT (blockListToXWiki blocks) env + let context = defField "body" body + $ defField "toc" (writerTableOfContents opts) metadata + return $ + case writerTemplate opts of + Just tpl -> render Nothing $ renderTemplate tpl context + Nothing -> body -- | Concatenates strings with line breaks between them. vcat :: [Text] -> Text diff --git a/src/Text/Pandoc/Writers/ZimWiki.hs b/src/Text/Pandoc/Writers/ZimWiki.hs index 1f4fbee15b17..434375397f35 100644 --- a/src/Text/Pandoc/Writers/ZimWiki.hs +++ b/src/Text/Pandoc/Writers/ZimWiki.hs @@ -32,7 +32,8 @@ import Text.Pandoc.Logging import Text.Pandoc.Options (WrapOption (..), WriterOptions (writerTableOfContents, writerTemplate, writerWrapText)) -import Text.Pandoc.Shared (escapeURI, isURI, linesToPara, removeFormatting, trimr) +import Text.Pandoc.Shared (linesToPara, removeFormatting, trimr) +import Text.Pandoc.URI (escapeURI, isURI) import Text.Pandoc.Templates (renderTemplate) import Text.Pandoc.Writers.Shared (defField, metaToContext, toLegacyTable) diff --git a/src/Text/Pandoc/XML.hs b/src/Text/Pandoc/XML.hs index 30ecac890baa..e2a8b0ccdb6b 100644 --- a/src/Text/Pandoc/XML.hs +++ b/src/Text/Pandoc/XML.hs @@ -21,6 +21,7 @@ module Text.Pandoc.XML ( escapeCharForXML, toEntities, toHtml5Entities, fromEntities, + lookupEntity, html4Attributes, html5Attributes, rdfaAttributes ) where @@ -28,11 +29,13 @@ module Text.Pandoc.XML ( escapeCharForXML, import Data.Char (isAscii, isSpace, ord, isLetter, isDigit) import Data.Text (Text) import qualified Data.Text as T -import Text.HTML.TagSoup.Entity (lookupEntity, htmlEntities) +import Commonmark.Entity (lookupEntity) +import Text.HTML.TagSoup.Entity (htmlEntities) import Text.DocLayout + ( ($$), char, hcat, nest, text, Doc, HasChars ) import Text.Printf (printf) import qualified Data.Map as M -import Data.String +import Data.String ( IsString ) import qualified Data.Set as Set -- | Escape one character as needed for XML. @@ -117,8 +120,8 @@ html5EntityMap = foldr go mempty htmlEntities (\new old -> if T.length new > T.length old then old else new) c ent' entmap - where ent' = T.takeWhile (/=';') (T.pack ent) - _ -> entmap + where ent' = T.takeWhile (/=';') (T.pack ent) + _ -> entmap -- | Converts a string into an NCName, i.e., an XML name without colons. -- Disallowed characters are escaped using @ux%x@, where @%x@ is the @@ -158,7 +161,7 @@ fromEntities t Just (';',ys) -> ys _ -> rest ent' = T.drop 1 ent <> ";" - in case T.pack <$> lookupEntity (T.unpack ent') of + in case lookupEntity ent' of Just c -> c <> fromEntities rest' Nothing -> ent <> fromEntities rest diff --git a/stack.yaml b/stack.yaml index 1ac48d4cf8de..da994b9d8b40 100644 --- a/stack.yaml +++ b/stack.yaml @@ -5,47 +5,38 @@ flags: old-random: false packages: - '.' +- 'pandoc-cli' +- 'pandoc-lua-engine' +- 'pandoc-server' extra-deps: -- skylighting-core-0.13 -- skylighting-0.13 +- skylighting-core-0.13.1.1 +- skylighting-0.13.1.1 - skylighting-format-ansi-0.1 - skylighting-format-latex-0.1 -- skylighting-format-blaze-html-0.1 -- emojis-0.1.2 -- gridtables-0.0.3.0 -- lpeg-1.0.3 +- skylighting-format-blaze-html-0.1.1 +- gridtables-0.1.0.0 - hslua-2.2.1 - hslua-aeson-2.2.1 +- hslua-cli-1.2.0 - hslua-classes-2.2.0 - hslua-core-2.2.1 +- hslua-list-1.1.0 - hslua-marshalling-2.2.1 -- hslua-module-doclayout-1.0.4 -- hslua-module-path-1.0.3 -- hslua-module-system-1.0.2 -- hslua-module-text-1.0.2 -- hslua-module-version-1.0.2 +- hslua-module-version-1.0.3 +- hslua-module-zip-1.0.0 - hslua-objectorientation-2.2.1 - hslua-packaging-2.2.1 - lua-2.2.1 -- lua-arbitrary-1.0.1 -- tasty-hslua-1.0.2 -- tasty-lua-1.0.2 -- pandoc-lua-marshal-0.1.7 -- aeson-pretty-0.8.9 -- unicode-transforms-0.4.0.1 -- unicode-data-0.3.0 -- commonmark-pandoc-0.2.1.2 -- ipynb-0.2 -- pandoc-types-1.22.2.1 -- commonmark-0.2.2 -- commonmark-extensions-0.2.3.2 +- mime-types-0.1.1.0 - doclayout-0.4 - doctemplates-0.10.0.2 -- citeproc-0.8.0.1 -- texmath-0.12.5.2 +- commonmark-extensions-0.2.3.3 +- texmath-0.12.5.4 +- git: https://github.com/jgm/citeproc + commit: cb54223919ecd327250f1b167e4e0c61473f402e ghc-options: "$locals": -fhide-source-paths -Wno-missing-home-modules -resolver: lts-18.10 +resolver: lts-19.21 nix: packages: [zlib] diff --git a/test/Tests/Command.hs b/test/Tests/Command.hs index fcd5efc9acf9..b1a03494a626 100644 --- a/test/Tests/Command.hs +++ b/test/Tests/Command.hs @@ -31,7 +31,7 @@ A command test is a code block with the following format: contain "=> " followed by the exit status. -} -module Tests.Command (runTest, tests) +module Tests.Command (tests) where import Data.Maybe (fromMaybe) @@ -77,21 +77,6 @@ pandocToEmulate False ('|':' ':'p':'a':'n':'d':'o':'c':cs) = pandocToEmulate _ (c:cs) = c : pandocToEmulate False cs pandocToEmulate _ [] = [] --- | Run a test, return True if test passed. -runTest :: String -- ^ Path to test executable - -> String -- ^ Title of test - -> String -- ^ Shell command - -> String -- ^ Input text - -> String -- ^ Expected output - -> TestTree -runTest testExePath testname cmd inp norm = testCase testname $ do - (_ec, out) <- execTest testExePath cmd inp - result <- if out == norm - then return TestPassed - else return $ TestFailed cmd "expected" - $ getDiff (lines out) (lines norm) - assertBool (show result) (result == TestPassed) - tests :: TestTree {-# NOINLINE tests #-} tests = unsafePerformIO $ do diff --git a/test/Tests/Helpers.hs b/test/Tests/Helpers.hs index 697d84dbf794..789775918f4a 100644 --- a/test/Tests/Helpers.hs +++ b/test/Tests/Helpers.hs @@ -155,7 +155,7 @@ instance ToString [Block] where toString = toString . B.fromList instance ToString Block where - toString = toString . B.singleton + toString = toString . B.singleton instance ToString Inlines where toString = unpack . trimr . purely (writeNative def) . toPandoc diff --git a/test/Tests/Old.hs b/test/Tests/Old.hs index a64469e6dfb4..0d20c151a032 100644 --- a/test/Tests/Old.hs +++ b/test/Tests/Old.hs @@ -194,12 +194,6 @@ tests pandocPath = [ test' "reader" ["-r", "creole", "-w", "native", "-s"] "creole-reader.txt" "creole-reader.native" ] - , testGroup "custom writer" - [ test' "basic" ["-f", "native", "-t", "../data/sample.lua"] - "testsuite.native" "writer.custom" - , test' "tables" ["-f", "native", "-t", "../data/sample.lua"] - "tables.native" "tables.custom" - ] , testGroup "man" [ test' "reader" ["-r", "man", "-w", "native", "-s"] "man-reader.man" "man-reader.native" diff --git a/test/Tests/Readers/HTML.hs b/test/Tests/Readers/HTML.hs index eec5da64392d..b1d84735410e 100644 --- a/test/Tests/Readers/HTML.hs +++ b/test/Tests/Readers/HTML.hs @@ -98,6 +98,18 @@ tests = [ testGroup "base tag" , test htmlNativeDivs "<main> followed by text" $ "<main>main content</main>non-main content" =?> doc (divWith ("", [], [("role", "main")]) (plain (text "main content")) <> plain (text "non-main content")) ] + , testGroup "code" + [ + test html "inline code block" $ + "<code>Answer is 42</code>" =?> + plain (codeWith ("",[],[]) "Answer is 42") + ] + , testGroup "tt" + [ + test html "inline tt block" $ + "<tt>Answer is 42</tt>" =?> + plain (codeWith ("",[],[]) "Answer is 42") + ] , testGroup "samp" [ test html "inline samp block" $ diff --git a/test/Tests/Readers/Odt.hs b/test/Tests/Readers/ODT.hs similarity index 89% rename from test/Tests/Readers/Odt.hs rename to test/Tests/Readers/ODT.hs index 8dcd7b29b1d2..ce107af69f08 100644 --- a/test/Tests/Readers/Odt.hs +++ b/test/Tests/Readers/ODT.hs @@ -1,6 +1,6 @@ {-# LANGUAGE OverloadedStrings #-} {- | - Module : Tests.Readers.Odt + Module : Tests.Readers.ODT Copyright : © 2015-2022 John MacFarlane 2015 Martin Linnemann License : GNU GPL, version 2 or above @@ -11,7 +11,7 @@ Tests for the ODT reader. -} -module Tests.Readers.Odt (tests) where +module Tests.Readers.ODT (tests) where import Control.Monad (liftM) import qualified Data.ByteString as BS @@ -33,21 +33,21 @@ tests = testsComparingToMarkdown ++ testsComparingToNative testsComparingToMarkdown :: [TestTree] testsComparingToMarkdown = map nameToTest namesOfTestsComparingToMarkdown where nameToTest name = createTest - compareOdtToMarkdown + compareODTToMarkdown name - (toOdtPath name) + (toODTPath name) (toMarkdownPath name) - toOdtPath name = "odt/odt/" ++ name ++ ".odt" + toODTPath name = "odt/odt/" ++ name ++ ".odt" toMarkdownPath name = "odt/markdown/" ++ name ++ ".md" testsComparingToNative :: [TestTree] testsComparingToNative = map nameToTest namesOfTestsComparingToNative where nameToTest name = createTest - compareOdtToNative + compareODTToNative name - (toOdtPath name) + (toODTPath name) (toNativePath name) - toOdtPath name = "odt/odt/" ++ name ++ ".odt" + toODTPath name = "odt/odt/" ++ name ++ ".odt" toNativePath name = "odt/native/" ++ name ++ ".native" @@ -73,22 +73,22 @@ type TestCreator = ReaderOptions -> FilePath -> FilePath -> IO (NoNormPandoc, NoNormPandoc) -compareOdtToNative :: TestCreator -compareOdtToNative opts odtPath nativePath = do +compareODTToNative :: TestCreator +compareODTToNative opts odtPath nativePath = do nativeFile <- UTF8.toText <$> BS.readFile nativePath odtFile <- B.readFile odtPath native <- getNoNormVia id "native" <$> runIO (readNative def nativeFile) - odt <- getNoNormVia id "odt" <$> runIO (readOdt opts odtFile) + odt <- getNoNormVia id "odt" <$> runIO (readODT opts odtFile) return (odt,native) -compareOdtToMarkdown :: TestCreator -compareOdtToMarkdown opts odtPath markdownPath = do +compareODTToMarkdown :: TestCreator +compareODTToMarkdown opts odtPath markdownPath = do markdownFile <- UTF8.toText <$> BS.readFile markdownPath odtFile <- B.readFile odtPath markdown <- getNoNormVia id "markdown" <$> runIO (readMarkdown def{ readerExtensions = pandocExtensions } markdownFile) - odt <- getNoNormVia id "odt" <$> runIO (readOdt opts odtFile) + odt <- getNoNormVia id "odt" <$> runIO (readODT opts odtFile) return (odt,markdown) @@ -125,7 +125,7 @@ compareMediaPathIO mediaPath mediaBag odtPath = do compareMediaBagIO :: FilePath -> IO Bool compareMediaBagIO odtFile = do df <- B.readFile odtFile - let (_, mb) = readOdt def df + let (_, mb) = readODT def df bools <- mapM (\(fp, _, _) -> compareMediaPathIO fp mb odtFile) (mediaDirectory mb) diff --git a/test/Tests/Readers/Org/Inline.hs b/test/Tests/Readers/Org/Inline.hs index bec796972e3a..e54b9c28ce23 100644 --- a/test/Tests/Readers/Org/Inline.hs +++ b/test/Tests/Readers/Org/Inline.hs @@ -104,8 +104,10 @@ tests = "[fn::Schreib mir eine E-Mail]" =?> para (note $ para "Schreib mir eine E-Mail") - , "Markup-chars not occurring on word break are symbols" =: - T.unlines [ "this+that+ +so+on" + , "By default, markup-chars not occurring on word break are symbols" =: + T.unlines [ "#+pandoc-emphasis-pre:" + , "#+pandoc-emphasis-post:" + , "this+that+ +so+on" , "seven*eight* nine*" , "+not+funny+" ] =?> diff --git a/test/Tests/Readers/Org/Meta.hs b/test/Tests/Readers/Org/Meta.hs index cce3f7a06892..7ef8fe75d301 100644 --- a/test/Tests/Readers/Org/Meta.hs +++ b/test/Tests/Readers/Org/Meta.hs @@ -208,12 +208,19 @@ tests = ] , testGroup "emphasis config" - [ "Changing pre and post chars for emphasis" =: - T.unlines [ "#+pandoc-emphasis-pre: \"[)\"" - , "#+pandoc-emphasis-post: \"]\\n\"" - , "([/emph/])*foo*" + [ "Changing pre chars for emphasis" =: + T.unlines [ "#+pandoc-emphasis-pre: \"[)$a1%\"" + , "[/emph/.)*strong*.a~code~" ] =?> - para ("([" <> emph "emph" <> "])" <> strong "foo") + para ("[" <> emph "emph" <> ".)" <> strong "strong" <> + ".a" <> code "code") + + , "Changing post chars for emphasis" =: + T.unlines [ "#+pandoc-emphasis-post: \"(]$a1%\"" + , "/emph/('*strong*]'~code~a" + ] =?> + para (emph "emph" <> "('" <> strong "strong" <> "]'" <> + code "code" <> "a") , "setting an invalid value restores the default" =: T.unlines [ "#+pandoc-emphasis-pre: \"[\"" diff --git a/test/Tests/Readers/RTF.hs b/test/Tests/Readers/RTF.hs index f8e197a083b1..fdd1414caf3f 100644 --- a/test/Tests/Readers/RTF.hs +++ b/test/Tests/Readers/RTF.hs @@ -39,4 +39,3 @@ tests = map rtfTest [ "footnote" , "table_simple" , "table_error_codes" ] - diff --git a/test/Tests/Shared.hs b/test/Tests/Shared.hs index 439196145ca0..3a5454b89070 100644 --- a/test/Tests/Shared.hs +++ b/test/Tests/Shared.hs @@ -73,11 +73,11 @@ testTOC = [ givesTOC "empty case" $ mempty =?> bulletList [] bulletList [plain "H1a" <> bulletList [plain "H2"]] , givesTOC "only referenced headers" $ header 1 "H1a" <> headerId "h2" 2 "H2" =?> - bulletList [plain "H1a" <> + bulletList [plain "H1a" <> bulletList [plain $ linkId "toc-h2" "#h2" "" "H2"]] , givesTOC "section id used as backup" $ divWith ("sec",["section"],[]) (header 1 "H1") =?> - bulletList [plain $ linkId "toc-sec" "#sec" "" "H1"] + bulletList [plain $ linkId "toc-sec" "#sec" "" "H1"] ] testCollapse :: [TestTree] diff --git a/test/Tests/Writers/AsciiDoc.hs b/test/Tests/Writers/AsciiDoc.hs index cb1863fc4c47..d2b5d5183225 100644 --- a/test/Tests/Writers/AsciiDoc.hs +++ b/test/Tests/Writers/AsciiDoc.hs @@ -93,4 +93,3 @@ tests = [ testGroup "emphasis" ] ] ] - diff --git a/test/Tests/Writers/Docbook.hs b/test/Tests/Writers/DocBook.hs similarity index 99% rename from test/Tests/Writers/Docbook.hs rename to test/Tests/Writers/DocBook.hs index a95140ac5834..95111e36445d 100644 --- a/test/Tests/Writers/Docbook.hs +++ b/test/Tests/Writers/DocBook.hs @@ -1,5 +1,5 @@ {-# LANGUAGE OverloadedStrings #-} -module Tests.Writers.Docbook (tests) where +module Tests.Writers.DocBook (tests) where import Data.Text (unpack) import Test.Tasty @@ -15,10 +15,10 @@ docbook5 :: (ToPandoc a) => a -> String docbook5 = docbook5WithOpts def{ writerWrapText = WrapNone } docbookWithOpts :: ToPandoc a => WriterOptions -> a -> String -docbookWithOpts opts = unpack . purely (writeDocbook4 opts) . toPandoc +docbookWithOpts opts = unpack . purely (writeDocBook4 opts) . toPandoc docbook5WithOpts :: ToPandoc a => WriterOptions -> a -> String -docbook5WithOpts opts = unpack . purely (writeDocbook5 opts) . toPandoc +docbook5WithOpts opts = unpack . purely (writeDocBook5 opts) . toPandoc {- "my test" =: X =?> Y diff --git a/test/Tests/Writers/HTML.hs b/test/Tests/Writers/HTML.hs index 5176af1f0c2f..3ec7a866d919 100644 --- a/test/Tests/Writers/HTML.hs +++ b/test/Tests/Writers/HTML.hs @@ -84,6 +84,11 @@ tests = doubleQuoted (spanWith ("", [], [("cite", "http://example.org")]) (str "examples")) =?> "<q cite=\"http://example.org\">examples</q>" ] + , testGroup "code" + [ "code should be rendered correctly" =: + plain (codeWith ("",[],[]) "Answer is 42") =?> + "<code>Answer is 42</code>" + ] , testGroup "sample" [ "sample should be rendered correctly" =: plain (codeWith ("",["sample"],[]) "Answer is 42") =?> diff --git a/test/Tests/Writers/JATS.hs b/test/Tests/Writers/JATS.hs index 8abf33fe2122..e34f6481b15f 100644 --- a/test/Tests/Writers/JATS.hs +++ b/test/Tests/Writers/JATS.hs @@ -11,7 +11,7 @@ import Text.Pandoc.Builder import qualified Data.Text as T jats :: (ToPandoc a) => a -> Text -jats = purely (writeJATS def{ writerWrapText = WrapNone }) +jats = purely (writeJatsArchiving def{ writerWrapText = WrapNone }) . toPandoc jatsArticleAuthoring :: (ToPandoc a) => a -> Text diff --git a/test/command/2378.md b/test/command/2378.md index 7b6586366b0b..6c4e81bd2c63 100644 --- a/test/command/2378.md +++ b/test/command/2378.md @@ -21,7 +21,8 @@ x & y\footnote{a footnote} \\ x & y{} \\ \midrule() \endhead -1 & 2 \\ \bottomrule() +\endlastfoot +1 & 2 \\ \end{longtable} ``` diff --git a/test/command/2649.md b/test/command/2649.md index 4ab059ea05b8..7225ae0f4561 100644 --- a/test/command/2649.md +++ b/test/command/2649.md @@ -17,7 +17,7 @@ % pandoc -f mediawiki -t html5 {| border="4" cellspacing="2" cellpadding="0" WIDTH="100%" |----- -| peildatum Simbase || november 2005 || '''uitslagen Flohrgambiet''' || +| peildatum Simbase || november 2005 || colspan=2 | '''uitslagen Flohrgambiet''' || |----- | totaal aantal partijen Simbase || 7.316.773 | wit wint || 53% @@ -34,8 +34,7 @@ <tr class="odd"> <td><p>peildatum Simbase</p></td> <td><p>november 2005</p></td> -<td><p><strong>uitslagen Flohrgambiet</strong></p></td> -<td></td> +<td colspan="2"><p><strong>uitslagen Flohrgambiet</strong></p></td> </tr> <tr class="even"> <td><p>totaal aantal partijen Simbase</p></td> @@ -90,20 +89,20 @@ <tbody> <tr class="odd"> <td><p>1</p></td> -<td><p><a href="Sébastien_Loeb" title="wikilink">Sébastien -Loeb</a></p></td> +<td style="text-align: left;"><p><a href="Sébastien_Loeb" +title="wikilink">Sébastien Loeb</a></p></td> <td><p>78</p></td> </tr> <tr class="even"> <td><p>2</p></td> -<td><p><strong><a href="Sébastien_Ogier" title="wikilink">Sébastien -Ogier</a></strong></p></td> +<td style="text-align: left;"><p><strong><a href="Sébastien_Ogier" +title="wikilink">Sébastien Ogier</a></strong></p></td> <td><p>38</p></td> </tr> <tr class="odd"> <td><p>10</p></td> -<td><p><a href="Hannu_Mikkola" title="wikilink">Hannu -Mikkola</a></p></td> +<td style="text-align: left;"><p><a href="Hannu_Mikkola" +title="wikilink">Hannu Mikkola</a></p></td> <td><p>18</p></td> </tr> </tbody> diff --git a/test/command/4564.md b/test/command/4564.md new file mode 100644 index 000000000000..81c50afb7105 --- /dev/null +++ b/test/command/4564.md @@ -0,0 +1,65 @@ +``` +% pandoc -f native -t rst --list-tables +[BlockQuote + [Table ("",[],[]) (Caption Nothing + []) + [(AlignDefault,ColWidth 0.1527777777777778) + ,(AlignDefault,ColWidth 0.1388888888888889) + ,(AlignDefault,ColWidth 0.16666666666666666) + ,(AlignDefault,ColWidth 0.375)] + (TableHead ("",[],[]) + [Row ("",[],[]) + [Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) + [Plain [Str "Centered",SoftBreak,Str "Header"]] + ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) + [Plain [Str "Left",SoftBreak,Str "Aligned"]] + ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) + [Plain [Str "Right",SoftBreak,Str "Aligned"]] + ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) + [Plain [Str "Default",Space,Str "aligned"]]]]) + [(TableBody ("",[],[]) (RowHeadColumns 0) + [] + [Row ("",[],[]) + [Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) + [Plain [Str "First"]] + ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) + [Plain [Str "row"]] + ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) + [Plain [Str "12.0"]] + ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) + [Plain [Str "Example",Space,Str "of",Space,Str "a",Space,Str "row",Space,Str "that",SoftBreak,Str "spans",Space,Str "multiple",Space,Str "lines."]]] + ,Row ("",[],[]) + [Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) + [Plain [Str "Second"]] + ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) + [Plain [Str "row"]] + ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) + [Plain [Str "5.0"]] + ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) + [Plain [Str "Here\8217s",Space,Str "another",Space,Str "one.",Space,Str "Note",SoftBreak,Str "the",Space,Str "blank",Space,Str "line",Space,Str "between",SoftBreak,Str "rows."]]]])] + (TableFoot ("",[],[]) + [])]] +^D + .. list-table:: + :widths: 11 10 12 27 + :header-rows: 1 + + - + + - Centered Header + - Left Aligned + - Right Aligned + - Default aligned + - + + - First + - row + - 12.0 + - Example of a row that spans multiple lines. + - + + - Second + - row + - 5.0 + - Here’s another one. Note the blank line between rows. +``` diff --git a/test/command/5367.md b/test/command/5367.md index 591ee4b2eeba..1ee7ce749cfb 100644 --- a/test/command/5367.md +++ b/test/command/5367.md @@ -35,8 +35,9 @@ Fruit{} \end{minipage} \\ \midrule() \endhead -Bans\footnote{table cell footnote} \\ \bottomrule() +\endlastfoot +Bans\footnote{table cell footnote} \\ \end{longtable} dolly\footnote{doc footnote} diff --git a/test/command/6384.md b/test/command/6384.md new file mode 100644 index 000000000000..1be1c3e45ba6 --- /dev/null +++ b/test/command/6384.md @@ -0,0 +1,16 @@ +``` +% pandoc --wrap=preserve --file-scope command/file1.txt command/file2.txt +^D +<div id="command__file1.txt"> +<h1 id="command__file1.txt__zed">Zed</h1> +<p><a href="bar">foo</a> +and <a href="#command__file1.txt__zed">Zed</a> +and <a href="#command__file2.txt__zed">other Zed</a> +and <a href="#command__file2.txt">other file</a> +and <a href="c.md#zed">foreign Zed</a></p> +</div> +<div id="command__file2.txt"> +<h2 id="command__file2.txt__zed">Zed</h2> +<p><a href="baz">foo</a></p> +</div> +``` diff --git a/test/command/7272.md b/test/command/7272.md index bb184ad4b8b9..56cd1c3bc338 100644 --- a/test/command/7272.md +++ b/test/command/7272.md @@ -18,10 +18,11 @@ >{\raggedright\arraybackslash}p{(\columnwidth - 0\tabcolsep) * \real{1.0000}}@{}} \toprule() \endhead +\bottomrule() +\endlastfoot \begin{minipage}[t]{\linewidth}\raggedright { text\\ text2 }\strut \end{minipage} \\ -\bottomrule() \end{longtable} ``` diff --git a/test/command/7678.md b/test/command/7678.md new file mode 100644 index 000000000000..b6a09ef20c78 --- /dev/null +++ b/test/command/7678.md @@ -0,0 +1,31 @@ +``` +% pandoc -f bibtex -t csljson +@misc{doe, + author = "Jane Doe", + title = "Work", + year = "2021", + url = "%20and%20" +} +^D +[ + { + "URL": "%20and%20", + "author": [ + { + "family": "Doe", + "given": "Jane" + } + ], + "id": "doe", + "issued": { + "date-parts": [ + [ + 2021 + ] + ] + }, + "title": "Work", + "type": "" + } +] +``` diff --git a/test/command/8174.md b/test/command/8174.md new file mode 100644 index 000000000000..61fe37b93acf --- /dev/null +++ b/test/command/8174.md @@ -0,0 +1,44 @@ +```` +% pandoc -t native +```yaml {#id} +some: code +``` +^D +[ CodeBlock ( "id" , [ "yaml" ] , [] ) "some: code" ] +```` + +```` +% pandoc -t native +```yaml {.class #id} +some: code +``` +^D +[ CodeBlock + ( "id" , [ "yaml" , "class" ] , [] ) "some: code" +] +```` + +<!-- Inline code sections at the start of the line should not be mistaken for code blocks --> +```` +% pandoc -t native +```ab``` +^D +[ Para [ Code ( "" , [] , [] ) "ab" ] ] +```` + +```` +% pandoc -t native +```ab```{.class} +^D +[ Para [ Code ( "" , [ "class" ] , [] ) "ab" ] ] +```` + +<!-- Illegal language identifiers should be treated as inline code for now --> +```` +% pandoc -t native +``` foo}{.bar} +test +``` +^D +[ Para [ Code ( "" , [] , [] ) "foo}{.bar} test" ] ] +```` diff --git a/test/command/8219.md b/test/command/8219.md index 52279cf42250..0d1ed8b1d3cc 100644 --- a/test/command/8219.md +++ b/test/command/8219.md @@ -7,8 +7,10 @@ \begin{longtable}[]{@{}ll@{}} \caption{}\label{test}\tabularnewline \toprule() +\endfirsthead \endhead -one & two \\ \bottomrule() +\endlastfoot +one & two \\ \end{longtable} ``` diff --git a/test/command/8251.md b/test/command/8251.md new file mode 100644 index 000000000000..cb16306cbac7 --- /dev/null +++ b/test/command/8251.md @@ -0,0 +1,13 @@ +``` +% pandoc -f native -t html +Link ("",["", "", ""],[]) [Str "foo"] ("https://example.com","") +^D +<a href="https://example.com">foo</a> +``` + +``` +% pandoc -f native -t markdown +Link ("",["", "", ""],[]) [Str "foo"] ("https://example.com","") +^D +[foo](https://example.com){} +``` diff --git a/test/command/8257.md b/test/command/8257.md new file mode 100644 index 000000000000..a284fd38a566 --- /dev/null +++ b/test/command/8257.md @@ -0,0 +1,41 @@ +``` +% pandoc -f markdown -t html5 ++------+-------+ +| Item | Price | ++======+=======+ +| Eggs | 5£ | ++------+-------+ +| Spam | 3£ | ++======+=======+ +| Sum | 8£ | ++======+=======+ +^D +<table style="width:21%;"> +<colgroup> +<col style="width: 9%" /> +<col style="width: 11%" /> +</colgroup> +<thead> +<tr class="header"> +<th>Item</th> +<th>Price</th> +</tr> +</thead> +<tbody> +<tr class="odd"> +<td>Eggs</td> +<td>5£</td> +</tr> +<tr class="even"> +<td>Spam</td> +<td>3£</td> +</tr> +</tbody><tfoot> +<tr class="odd"> +<td>Sum</td> +<td>8£</td> +</tr> +</tfoot> + +</table> +``` diff --git a/test/command/8281.md b/test/command/8281.md new file mode 100644 index 000000000000..200e74ff6192 --- /dev/null +++ b/test/command/8281.md @@ -0,0 +1,15 @@ +``` +% pandoc -t html +# Title + +test + +. . . + +test +^D +<h1 id="title">Title</h1> +<p>test</p> +<p>. . .</p> +<p>test</p> +``` diff --git a/test/command/8302.md b/test/command/8302.md new file mode 100644 index 000000000000..7495300716fa --- /dev/null +++ b/test/command/8302.md @@ -0,0 +1,6 @@ +``` +% pandoc -f org -t markdown +some text cite:&long-2004-tecton-evolut +^D +some text @long-2004-tecton-evolut +``` diff --git a/test/command/8307.md b/test/command/8307.md new file mode 100644 index 000000000000..d211d7003901 --- /dev/null +++ b/test/command/8307.md @@ -0,0 +1,49 @@ +``` +% pandoc -t commonmark -f html +<table> +<tbody> +<tr> +<td title="this + + +breaks">hello</td> +</tr> +</tbody> +</table> +^D +<table> +<tbody> +<tr class="odd"> +<td title="this + +breaks">hello</td> +</tr> +</tbody> +</table> +``` + +```` +% pandoc -t commonmark -f markdown +``` {=html} +<table> +<tbody> +<tr> +<td title="this + + +breaks">hello</td> +</tr> +</tbody> +</table> +``` +^D +<table> +<tbody> +<tr> +<td title="this + +breaks">hello</td> +</tr> +</tbody> +</table> +```` diff --git a/test/command/8344.md b/test/command/8344.md new file mode 100644 index 000000000000..c95c75aab921 --- /dev/null +++ b/test/command/8344.md @@ -0,0 +1,90 @@ +``` +% pandoc -t jats -s +--- +title: |- + My\ + document +... + +# Section\ +with line break + +Paragraph\ +with line break + +-------- +A B +--- --- +1\ 3 +2 4 + +*1\ 5\ +2* 6 + + 7 +-------- + +^D +<?xml version="1.0" encoding="utf-8" ?> +<!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Archiving and Interchange DTD v1.2 20190208//EN" + "JATS-archivearticle1.dtd"> +<article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" dtd-version="1.2" article-type="other"> +<front> +<journal-meta> +<journal-id></journal-id> +<journal-title-group> +</journal-title-group> +<issn></issn> +<publisher> +<publisher-name></publisher-name> +</publisher> +</journal-meta> +<article-meta> +<title-group> +<article-title>My<break/>document</article-title> +</title-group> +<permissions> +</permissions> +</article-meta> +</front> +<body> +<sec id="section"> + <title>Section<break/> +

with line break

+

Paragraph + with line break

+ + + + + + + + + + + + + + + + + + + + + + + + + + +
AB
123 4
1 + 256
7
+
+ + + + + +``` diff --git a/test/command/8354.md b/test/command/8354.md new file mode 100644 index 000000000000..96e3d6801885 --- /dev/null +++ b/test/command/8354.md @@ -0,0 +1,38 @@ +``` +% pandoc -f markdown -t html --citeproc +--- +title: test +nocite: '[@*]' +references: +- author: + - family: Fekete + given: Jean-Daniel + - family: Freire + given: Juliana + DOI: 10.1109/MCG.2020.3006412 + id: feketeExploringReproducibilityVisualization2020 + ISSN: 1558-1756 + issue: 5 + issued: 2020-09 + page: 108-119 + source: IEEE Xplore + title: Exploring Reproducibility in Visualization + container-title: IEEE Computer Graphics and Applications + type: article-journal + volume: 40 +--- + +## References +^D +

References

+
+``` diff --git a/test/command/8380.md b/test/command/8380.md new file mode 100644 index 000000000000..116922c859dd --- /dev/null +++ b/test/command/8380.md @@ -0,0 +1,31 @@ +``` +% pandoc -f man -t rst +LC_* +^D +LC\_\* +``` + +These examples of things that don't require escaping are taken +from the RST documentation: + +``` +% pandoc -f native -t rst +[Para [Str "2*x a**b O(N**2) e**(x*y) f(x)*f(y) a|b"] +,Para [Str "a**b O(N**2) e**(x*y) f(x)*f(y)"] +] +^D +2*x a**b O(N**2) e**(x*y) f(x)*f(y) a|b + +a**b O(N**2) e**(x*y) f(x)*f(y) +``` + +These examples of things that do require escaping are taken +from the RST documentation: + +``` +% pandoc -f native -t rst +Str "*4, class_, *args, **kwargs, `TeX-quoted', *ML, *.txt" +^D +\*4, class\_, \*args, \**kwargs, \`TeX-quoted', \*ML, \*.txt +``` + diff --git a/test/command/8402.md b/test/command/8402.md new file mode 100644 index 000000000000..67536227cc28 --- /dev/null +++ b/test/command/8402.md @@ -0,0 +1,52 @@ +``` +% pandoc --toc -s -t markdown +::: {.cell .markdown id="6u8qXoeFGdqt"} +# Summary +> Expand to see summary + +## Overview and Explanation +::: + +# Details + +## inner 1 +text + +## inner 2 + +### inner inner 1 + +## inner 3 +text +^D +- [Summary](#summary){#toc-summary} + - [Overview and + Explanation](#overview-and-explanation){#toc-overview-and-explanation} +- [Details](#details){#toc-details} + - [inner 1](#inner-1){#toc-inner-1} + - [inner 2](#inner-2){#toc-inner-2} + - [inner inner 1](#inner-inner-1){#toc-inner-inner-1} + - [inner 3](#inner-3){#toc-inner-3} + +::: {#6u8qXoeFGdqt .cell .markdown} +# Summary + +> Expand to see summary + +## Overview and Explanation +::: + +# Details + +## inner 1 + +text + +## inner 2 + +### inner inner 1 + +## inner 3 + +text +``` diff --git a/test/command/file1.txt b/test/command/file1.txt new file mode 100644 index 000000000000..5416f3a6c186 --- /dev/null +++ b/test/command/file1.txt @@ -0,0 +1,9 @@ +# Zed + +[foo]: bar + +[foo] +and [Zed](#zed) +and [other Zed](command/file2.txt#zed) +and [other file](command/file2.txt) +and [foreign Zed](c.md#zed) diff --git a/test/command/file2.txt b/test/command/file2.txt new file mode 100644 index 000000000000..20ee06c8ce09 --- /dev/null +++ b/test/command/file2.txt @@ -0,0 +1,5 @@ +## Zed + +[foo]: baz + +[foo] diff --git a/test/command/html-writer-a-in-a.md b/test/command/html-writer-a-in-a.md new file mode 100644 index 000000000000..219496d28891 --- /dev/null +++ b/test/command/html-writer-a-in-a.md @@ -0,0 +1,7 @@ +a is not allowed inside a in HTML, so we remove links in link text: +``` +% pandoc -f native -t html +[ Link ("",[],[]) [Link ("",[],[]) [Str "Lo"] ("url2","") ] ("url","") ] +^D +Lo +``` diff --git a/test/command/lua-pandoc-state.lua b/test/command/lua-pandoc-state.lua deleted file mode 100644 index 5282a4c291a4..000000000000 --- a/test/command/lua-pandoc-state.lua +++ /dev/null @@ -1,11 +0,0 @@ -function report (what, value) - print(string.format('%16s: %s', what, value)) -end -report('# input files', #PANDOC_STATE.input_files) -report('output file', PANDOC_STATE.output_file) -report('# request header', #PANDOC_STATE.request_headers) -report('resource path', table.concat(PANDOC_STATE.resource_path, ', ')) -report('source URL', PANDOC_STATE.source_url) -report('user data dir', PANDOC_STATE.user_data_dir and 'defined' or 'unset') -report('trace', PANDOC_STATE.trace) -report('verbosity', PANDOC_STATE.verbosity) diff --git a/test/command/lua-pandoc-state.md b/test/command/lua-pandoc-state.md deleted file mode 100644 index e56d75af5ef6..000000000000 --- a/test/command/lua-pandoc-state.md +++ /dev/null @@ -1,14 +0,0 @@ -``` -% pandoc --lua-filter=command/lua-pandoc-state.lua --data-dir=foo -Hello -^D - # input files: 1 - output file: nil -# request header: 0 - resource path: . - source URL: nil - user data dir: defined - trace: false - verbosity: WARNING -

Hello

-``` diff --git a/test/command/pandoc-citeproc-locators-delimited.md b/test/command/pandoc-citeproc-locators-delimited.md index 97db169b93f0..08c58c051ac4 100644 --- a/test/command/pandoc-citeproc-locators-delimited.md +++ b/test/command/pandoc-citeproc-locators-delimited.md @@ -100,7 +100,7 @@ Unbalanced curly } ends early[^18] [^6]: Subsequent, p. {(a)}. -[^7]: Ibid-with-locator. +[^7]: Ibid. [^8]: Ibid 123-35 numbers are suffix. diff --git a/test/command/short-caption.md b/test/command/short-caption.md new file mode 100644 index 000000000000..0fb71290fd14 --- /dev/null +++ b/test/command/short-caption.md @@ -0,0 +1,42 @@ +``` +% pandoc -f latex -t native +\begin{table} +\caption[short caption]{long caption} +\begin{tabular}{ll} +hi & hi \\ +\end{tabular} +\end{table} +^D +[ Table + ( "" , [] , [] ) + (Caption + (Just [ Str "short" , Space , Str "caption" ]) + [ Plain [ Str "long" , Space , Str "caption" ] ]) + [ ( AlignLeft , ColWidthDefault ) + , ( AlignLeft , ColWidthDefault ) + ] + (TableHead ( "" , [] , [] ) []) + [ TableBody + ( "" , [] , [] ) + (RowHeadColumns 0) + [] + [ Row + ( "" , [] , [] ) + [ Cell + ( "" , [] , [] ) + AlignDefault + (RowSpan 1) + (ColSpan 1) + [ Plain [ Str "hi" ] ] + , Cell + ( "" , [] , [] ) + AlignDefault + (RowSpan 1) + (ColSpan 1) + [ Plain [ Str "hi" ] ] + ] + ] + ] + (TableFoot ( "" , [] , [] ) []) +] +``` diff --git a/test/ipynb/simple.out.native b/test/ipynb/simple.out.native index a17c36a41588..1c7e979ccfdc 100644 --- a/test/ipynb/simple.out.native +++ b/test/ipynb/simple.out.native @@ -83,7 +83,7 @@ Pandoc , Image ( "" , [] , [] ) [ Str "the" , Space , Str "moon" ] - ( "lalune.jpg" , "" ) + ( "uid6-lalune.jpg" , "" ) , Space , Str "will" , Space diff --git a/test/lhs-test.html b/test/lhs-test.html index 76492a50cc01..b8e5efca7745 100644 --- a/test/lhs-test.html +++ b/test/lhs-test.html @@ -35,6 +35,9 @@ } } @media print { + html { + background-color: white; + } body { background-color: transparent; color: black; @@ -157,6 +160,8 @@ margin: 0 0.8em 0.2em -1.6em; vertical-align: middle; } + .display.math{display: block; text-align: center; margin: 0.5rem auto;} + /* CSS for syntax highlighting */ pre > code.sourceCode { white-space: pre; position: relative; } pre > code.sourceCode > span { display: inline-block; line-height: 1.25; } pre > code.sourceCode > span:empty { height: 1.2em; } @@ -220,7 +225,6 @@ code span.va { color: #19177c; } /* Variable */ code span.vs { color: #4070a0; } /* VerbatimString */ code span.wa { color: #60a0b0; font-weight: bold; font-style: italic; } /* Warning */ - .display.math{display: block; text-align: center; margin: 0.5rem auto;} -
- - -
-
-
-
- - - -
- - - - - - -
-
-
-

-    
- - -
-

pandoc version

-
- - - - - diff --git a/trypandoc/trypandoc.js b/trypandoc/trypandoc.js deleted file mode 100644 index 4032c5a9778e..000000000000 --- a/trypandoc/trypandoc.js +++ /dev/null @@ -1,419 +0,0 @@ -"use strict"; - -var params = { - text: '"hello *world*"', - to: 'html5', - from: 'markdown', - standalone: false, - citeproc: false, - files: {} }; - -const examples = { - ["Hello world"]: - { text: '*Hello* world!', - from: 'markdown', - to: 'html5', - standalone: false, - citeproc: false, - files: {} }, - ["BibTeX to CSL JSON"]: - { text: `@BOOK{Wurm2011-ho, - title = "{Substanz und Qualität : Ein Beitrag zur Interpretation der - plotinischen Traktate VI,1, 2 und 3}", - author = "Wurm, Klaus", - publisher = "De Gruyter", - series = "Quellen und Studien zur Philosophie", - edition = "Reprint 2011", - year = 2011, - address = "Berlin", - keywords = "!!! Plotinus translation", - language = "de" -}`, - from: 'bibtex', - to: 'csljson', - standalone: false, - citeproc: false, - files: {} }, - ["Markdown to Docbook with citations"]: - { text: `--- -references: -- author: - - family: Salam - given: Abdus - container-title: "Elementary particle theory: Relativistic groups and - analyticity. Proceedings of the eighth Nobel symposium" - editor: - - family: Svartholm - given: Nils - event-date: 1968-05-19/1968-05-25 - event-place: Aspenäsgarden, Lerum - id: salam - issued: 1968 - page: 367-377 - publisher: Almquist & Wiksell - publisher-place: Stockholm - title: Weak and electromagnetic interactions - type: paper-conference ---- - -@salam [p. 370] says some interesting things.`, - from: 'markdown', - to: 'docbook5', - standalone: true, - citeproc: true, - files: {} }, - ["MediaWiki to docx with equations"]: - { text: `Just as the components of a vector change when we change the [[basis (linear algebra)|basis]] of the vector space, the components of a tensor also change under such a transformation. Each type of tensor comes equipped with a ''transformation law'' that details how the components of the tensor respond to a [[change of basis]]. The components of a vector can respond in two distinct ways to a [[change of basis]] (see [[covariance and contravariance of vectors]]), where the new [[basis vectors]] \\mathbf{\\hat{e}}_i are expressed in terms of the old basis vectors \\mathbf{e}_j as, -:\\mathbf{\\hat{e}}_i = \\sum_{j=1}^n \\mathbf{e}_j R^j_i = \\mathbf{e}_j R^j_i . - -Here ''R'''' j''''i'' are the entries of the change of basis matrix, and in the rightmost expression the [[summation]] sign was suppressed: this is the [[Einstein summation convention]], which will be used throughout this article.The Einstein summation convention, in brief, requires the sum to be taken over all values of the index whenever the same symbol appears as a subscript and superscript in the same term. For example, under this convention B_i C^i = B_1 C^1 + B_2 C^2 + \\cdots B_n C^n The components ''v''''i'' of a column vector '''v''' transform with the [[matrix inverse|inverse]] of the matrix ''R'', -:\\hat{v}^i = \\left(R^{-1}\\right)^i_j v^j, - -where the hat denotes the components in the new basis. This is called a ''contravariant'' transformation law, because the vector components transform by the ''inverse'' of the change of basis. In contrast, the components, ''w''''i'', of a covector (or row vector), '''w''', transform with the matrix ''R'' itself, -:\\hat{w}_i = w_j R^j_i .`, - from: 'mediawiki', - to: 'docx', - standalone: true, - citeproc: false, - files: {} }, - ["Man page to ConTeXt"]: - { text: `.TP -\\f[C]-L\\f[R] \\f[I]SCRIPT\\f[R], \\f[C]--lua-filter=\\f[R]\\f[I]SCRIPT\\f[R] -Transform the document in a similar fashion as JSON filters (see -\\f[C]--filter\\f[R]), but use pandoc\\[cq]s built-in Lua filtering system. -The given Lua script is expected to return a list of Lua filters which -will be applied in order. -Each Lua filter must contain element-transforming functions indexed by -the name of the AST element on which the filter function should be -applied. -.RS -.PP -The \\f[C]pandoc\\f[R] Lua module provides helper functions for element -creation. -It is always loaded into the script\\[cq]s Lua environment. -.PP -See the Lua filters documentation for further details. -.PP -In order of preference, pandoc will look for Lua filters in -.IP "1." 3 -a specified full or relative path, -.IP "2." 3 -\\f[C]$DATADIR/filters\\f[R] where \\f[C]$DATADIR\\f[R] is the user data -directory (see \\f[C]--data-dir\\f[R], above). -.PP -Filters, Lua filters, and citeproc processing are applied in the order -specified on the command line. -.RE -.TP -\\f[C]-M\\f[R] \\f[I]KEY\\f[R][\\f[C]=\\f[R]\\f[I]VAL\\f[R]], \\f[C]--metadata=\\f[R]\\f[I]KEY\\f[R][\\f[C]:\\f[R]\\f[I]VAL\\f[R]] -Set the metadata field \\f[I]KEY\\f[R] to the value \\f[I]VAL\\f[R]. -A value specified on the command line overrides a value specified in the -document using YAML metadata blocks. -Values will be parsed as YAML boolean or string values. -If no value is specified, the value will be treated as Boolean true. -Like \\f[C]--variable\\f[R], \\f[C]--metadata\\f[R] causes template -variables to be set. -But unlike \\f[C]--variable\\f[R], \\f[C]--metadata\\f[R] affects the -metadata of the underlying document (which is accessible from filters -and may be printed in some output formats) and metadata values will be -escaped when inserted into the template.`, - from: 'man', - to: 'context', - standalone: false, - citeproc: false, - files: {} }, - ["LaTeX with macros to reStructuredText"]: - { text: `% from https://en.wikibooks.org/wiki/LaTeX/Macros -\\newcommand{\\wbalTwo}[2][Wikimedia]{ -This is the Wikibook about LaTeX -supported by {#1} and {#2}!} - -\\begin{itemize} -\\item \\wbalTwo{John Doe} -\\item \\wbalTwo[lots of users]{John Doe} -\\end{itemize}`, - from: 'latex', - to: 'rst', - standalone: true, - citeproc: false, - files: {} }, - - ["CSV table to org"]: - { text: `"Year", "Score", "Title" -1968, 86, "Greetings" -1970, 17, "Bloody Mama" -1970, 73, "Hi, Mom!" -1971, 40, "Born to Win" -1973, 98, "Mean Streets" -1973, 88, "Bang the Drum Slowly" -1974, 97, "The Godfather, Part II" -1976, 41, "The Last Tycoon" -1976, 99, "Taxi Driver"`, - from: 'csv', - to: 'org', - standalone: false, - citeproc: false, - files: {} } - -} - -function clearText() { - params.text = ''; - document.getElementById("text").value = ''; -} - -function addSupportFile(e) { - -} - -function addFile(name, contents) { - if (params.files[name]) { - throw("File " + name + " already exists. Remove it before re-adding."); - return; - } - params.files[name] = contents; - let filesDiv = document.getElementById("files"); - let fileDiv = document.createElement("div"); - fileDiv.classList.add("file"); - let title = document.createElement("div"); - title.classList.add("title"); - let removeButton = document.createElement("button"); - removeButton.textContent = "Remove"; - removeButton.onclick = (e) => { - delete params.files[name]; - e.target.parentElement.parentElement.remove(); - } - let filename = document.createElement("span"); - filename.classList.add("filename"); - filename.textContent = name; - title.appendChild(filename); - title.appendChild(removeButton); - fileDiv.appendChild(title); - let textarea = document.createElement("textarea"); - textarea.onchange = (e) => { - params.files[name] = e.target.value; - } - textarea.textContent = contents; - fileDiv.appendChild(textarea); - filesDiv.appendChild(fileDiv); -} - -function permalink() { - let href = window.location.href; - const URLparams = new URLSearchParams(Object.entries(params)); - return href.replace(/([?].*)?$/,"?" + URLparams); -} - -const binaryFormats = { - docx: { extension: "docx", - mime: "application/vnd.openxmlformats-officedocument.wordprocessingml.document" }, - odt: { extension: "odt", - mime: "application/vnd.oasis.opendocument.text" }, - pptx: { extension: "pptx", - mime: "application/vnd.openxmlformats-officedocument.presentationml.presentation" }, - epub: { extension: "epub", - mime: "application/epub+zip" }, - epub2: { extension: "epub", - mime: "application/epub+zip" }, - epub3: { extension: "epub", - mime: "application/epub+zip" } -}; - -const binaryMimeTypes = { - ["application/epub+zip"]: true, - ["application/vnd.openxmlformats-officedocument.wordprocessingml.document"]: true, - ["application/vnd.openxmlformats-officedocument.presentationml.presentation"]: true, - ["application/vnd.oasis.opendocument.text"]: true -}; - -function paramsFromURL() { - if (window.location.search.length > 0) { - const uparams = new URLSearchParams(window.location.search); - params.text = uparams.get("text") || ""; - params.from = uparams.get("from") || "markdown"; - params.to = uparams.get("to") || "html5"; - params.standalone = uparams.get("standalone") === "true"; - params.citeproc = uparams.get("citeproc") === "true"; - } -} - -function handleErrors(response) { - let errs = document.getElementById("errors"); - if (!response.ok) { - errs.textContent = "Conversion failed, status = " + response.status; - errs.style.display = "block"; - } - if (response.status == 503) { - errs.textContent += " Timed out."; - } - return response; -} - -function convert() { - document.getElementById("results").textContent = ""; - let errs = document.getElementById("errors"); - errs.style.display = "none"; - errs.textContent = ""; - console.log(params); - - let commandString = "pandoc" - + " --from " + params.from + " --to " + params.to - + (params.standalone ? " --standalone" : "") - + (params.citeproc ? " --citeproc" : "") ; - document.getElementById("command").textContent = commandString; - fetch("/cgi-bin/pandoc-server.cgi", { - method: "POST", - headers: {"Content-Type": "application/json"}, - body: JSON.stringify(params) - }) - .then(handleErrors) - .then(response => response.text()) - .then(restext => { - let binary = binaryFormats[params.to]; - if (binary && - document.getElementById("errors").style.display == "none") { - document.getElementById("results").innerHTML += - 'click to download trypandoc.' + binary.extension + ''; - } else { - document.getElementById("results").textContent += restext; - } - document.getElementById("permalink").href = permalink(); - }); -} - -function setFormFromParams() { - document.getElementById("text").value = params.text; - document.getElementById("from").value = params.from; - document.getElementById("to").value = params.to; - document.getElementById("standalone").checked = params.standalone; - document.getElementById("citeproc").checked = params.citeproc; -} - - -(function() { - paramsFromURL(); - setFormFromParams(); - - const exampleSelect = document.getElementById("examples"); - for (const k in examples) { - exampleSelect.innerHTML += ''; - } - - document.getElementById("convert").onclick = convert; - document.getElementById("from").onchange = (e) => { - params.from = e.target.value; - convert(); - } - document.getElementById("to").onchange = (e) => { - params.to = e.target.value; - convert(); - } - document.getElementById("text").onchange = (e) => { - params.text = e.target.value; - } - document.getElementById("standalone").onchange = (e) => { - params.standalone = e.target.checked; - convert(); - } - document.getElementById("citeproc").onchange = (e) => { - params.citeproc = e.target.checked; - convert(); - } - - document.getElementById("examples").onchange = (e) => { - params = examples[e.target.value]; - setFormFromParams(); - convert(); - } - - const fileInput = document.getElementById('loadfile'); - - // Listen for the change event so we can capture the file - fileInput.addEventListener('change', (e) => { - // Get a reference to the file - const file = e.target.files[0]; - const mimetype = file.type; - let binary = binaryMimeTypes[mimetype]; - - // Encode the file using the FileReader API - const reader = new FileReader(); - let inputtext = document.getElementById("text"); - reader.onloadend = () => { - // Use a regex to remove data url part - if (binary) { - const base64String = reader.result - .replace('data:', '') - .replace(/^.+,/, ''); - inputtext.value = base64String; - } else { - inputtext.value = reader.result; - } - params.text = inputtext.value; - }; - if (binary) { - reader.readAsDataURL(file); - } else { - reader.readAsText(file); - } - }); - - const addfileButton = document.getElementById("addfile"); - addfileButton.addEventListener('change', (e) => { - // Get a reference to the file - const file = e.target.files[0]; - const mimetype = file.type; - let binary = binaryMimeTypes[mimetype]; - - // Encode the file using the FileReader API - const reader = new FileReader(); - reader.onloadend = () => { - // Use a regex to remove data url part - if (binary) { - const base64String = reader.result - .replace('data:', '') - .replace(/^.+,/, ''); - addFile(file.name, base64String); - } else { - addFile(file.name, reader.result); - } - }; - if (binary) { - reader.readAsDataURL(file); - } else { - reader.readAsText(file); - } - - }); - - // const supportFiles = document.getElementById('supportfiles'); - // - // // Listen for the change event so we can capture the file - // supportFiles.addEventListener('change', (e) => { - // // Get a reference to the file - // const files = e.target.files; - // params.files = {}; - // Object.keys(files).forEach(i => { - // const file = files[i]; - // const reader = new FileReader(); - // reader.onload = (e) => { - // params.files[file.name] = reader.result - // .replace('data:', '') - // .replace(/^.+,/, ''); - // } - // reader.readAsDataURL(file); - // }); - // }); - - fetch("/cgi-bin/pandoc-server.cgi/version") - .then(handleErrors) - .then(response => response.text()) - .then(restext => - document.getElementById("version").textContent = restext - ); - - convert(); - -})(); - diff --git a/weeder.dhall b/weeder.dhall new file mode 100644 index 000000000000..5ef3252f8346 --- /dev/null +++ b/weeder.dhall @@ -0,0 +1 @@ +{ roots = [ "^Main.main$", "^Paths_.*$" ] , type-class-roots = True } diff --git a/src/Text/Pandoc/XML/Light.hs b/xml-light/Text/Pandoc/XML/Light.hs similarity index 99% rename from src/Text/Pandoc/XML/Light.hs rename to xml-light/Text/Pandoc/XML/Light.hs index 85095338d4d3..61025cb5cfdc 100644 --- a/src/Text/Pandoc/XML/Light.hs +++ b/xml-light/Text/Pandoc/XML/Light.hs @@ -111,4 +111,3 @@ nodeToContent (Conduit.NodeElement el) = nodeToContent (Conduit.NodeContent t) = Just (Text (CData CDataText t Nothing)) nodeToContent _ = Nothing - diff --git a/src/Text/Pandoc/XML/Light/Output.hs b/xml-light/Text/Pandoc/XML/Light/Output.hs similarity index 100% rename from src/Text/Pandoc/XML/Light/Output.hs rename to xml-light/Text/Pandoc/XML/Light/Output.hs diff --git a/src/Text/Pandoc/XML/Light/Proc.hs b/xml-light/Text/Pandoc/XML/Light/Proc.hs similarity index 99% rename from src/Text/Pandoc/XML/Light/Proc.hs rename to xml-light/Text/Pandoc/XML/Light/Proc.hs index 73b80bbee694..87104f3a4d88 100644 --- a/src/Text/Pandoc/XML/Light/Proc.hs +++ b/xml-light/Text/Pandoc/XML/Light/Proc.hs @@ -11,7 +11,7 @@ This code is taken from xml-light, released under the BSD3 license. -} module Text.Pandoc.XML.Light.Proc - ( + ( -- * Replacement for xml-light's Text.XML.Proc strContent , onlyElems @@ -135,5 +135,3 @@ lookupAttrBy p as = attrVal `fmap` find (p . attrKey) as -- satisfies the given predicate. findAttrBy :: (QName -> Bool) -> Element -> Maybe Text findAttrBy p e = lookupAttrBy p (elAttribs e) - - diff --git a/src/Text/Pandoc/XML/Light/Types.hs b/xml-light/Text/Pandoc/XML/Light/Types.hs similarity index 99% rename from src/Text/Pandoc/XML/Light/Types.hs rename to xml-light/Text/Pandoc/XML/Light/Types.hs index e7698a0a2ba7..59edb897916c 100644 --- a/src/Text/Pandoc/XML/Light/Types.hs +++ b/xml-light/Text/Pandoc/XML/Light/Types.hs @@ -189,5 +189,3 @@ fromXLContent :: XL.Content -> Content fromXLContent (XL.Elem el) = Elem $ fromXLElement el fromXLContent (XL.Text cd) = Text $ fromXLCData cd fromXLContent (XL.CRef s) = CRef (T.pack s) - -