From 3aed342a4d8401afe15145a3333edc35325ae55b Mon Sep 17 00:00:00 2001 From: alexbiehl Date: Tue, 8 Aug 2017 18:29:28 +0200 Subject: [PATCH 1/4] Validate doc-index.json on doctarball upload --- Distribution/Server/Features/Documentation.hs | 56 +++++++++++++++++-- hackage-server.cabal | 1 + 2 files changed, 53 insertions(+), 4 deletions(-) diff --git a/Distribution/Server/Features/Documentation.hs b/Distribution/Server/Features/Documentation.hs index 63a7d8607..edcf819f7 100644 --- a/Distribution/Server/Features/Documentation.hs +++ b/Distribution/Server/Features/Documentation.hs @@ -1,5 +1,6 @@ {-# LANGUAGE RankNTypes, FlexibleContexts, - NamedFieldPuns, RecordWildCards, PatternGuards #-} + NamedFieldPuns, OverloadedStrings, + RecordWildCards, PatternGuards #-} module Distribution.Server.Features.Documentation ( DocumentationFeature(..), DocumentationResource(..), @@ -30,7 +31,11 @@ import qualified Data.ByteString.Lazy as BSL import qualified Data.Map as Map import Data.Function (fix) +import qualified Data.Aeson as Aeson +import qualified Data.HashMap.Strict as HashMap import Data.Aeson (toJSON) +import qualified Text.HTML.TagSoup as TagSoup +import qualified Text.HTML.TagSoup.Match as TagSoup import Data.Time.Clock (NominalDiffTime, diffUTCTime, getCurrentTime) import System.Directory (getModificationTime) @@ -316,7 +321,7 @@ documentationFeature name case pkgVersion pkgid == nullVersion of -- if no version is given we want to redirect to the latest version - True -> tempRedirect latestPkgPath (toResponse "") + True -> tempRedirect latestPkgPath (toResponse BSL.empty) where latest = packageId pkginfo dpath' = [ if var == "package" @@ -352,13 +357,56 @@ documentationFeature name checkDocTarball :: PackageId -> BSL.ByteString -> Either String () checkDocTarball pkgid = checkEntries - . fmapErr (either id show) . Tar.checkTarbomb (display pkgid ++ "-docs") + . fmapErr (either id show) . Tar.checkTarbomb pkgDocsDir . fmapErr (either id show) . Tar.checkSecurity . fmapErr (either id show) . Tar.checkPortability . fmapErr show . Tar.read where + pkgDocsDir = display pkgid ++ "-docs" + fmapErr f = Tar.foldEntries Tar.Next Tar.Done (Tar.Fail . f) - checkEntries = Tar.foldEntries (\_ remainder -> remainder) (Right ()) Left + checkEntries = Tar.foldEntries checkEntry (Right ()) Left + + checkEntry entry remainder + | Tar.entryPath entry == pkgDocsDir "doc-index.json" + , Tar.NormalFile content _ <- Tar.entryContent entry + = checkJsonDocIndex content + | otherwise + = remainder + +checkJsonDocIndex :: BSL.ByteString -> Either String () +checkJsonDocIndex jsDocIndex + | Just (Aeson.Array entries) <- Aeson.decode jsDocIndex + = forM_ entries $ \entry -> do + case entry of + Aeson.Object obj + | Just (Aeson.String displayHtml) <- HashMap.lookup "display_html" obj + -> checkDisplayHtml displayHtml + _ -> Left "Expected display_html property" + | otherwise + = Left "Expected an array element" + where + checkDisplayHtml displayHtml = + checkTags (TagSoup.parseTagsOptions TagSoup.parseOptionsFast displayHtml) + + checkTags [] = Right () + checkTags (t:tx) + | TagSoup.tagOpen hasValidTag hasValidAttrs t + || TagSoup.tagClose hasValidTag t + || TagSoup.tagText (const True) t + = checkTags tx + | otherwise + = Left "Disallowed element found" + + hasValidTag t = t `elem` whitelistedTags + hasValidAttrs _ = True + + whitelistedTags = + [ "a" + , "span" + , "ul" + , "li" + ] {------------------------------------------------------------------------------ Auxiliary diff --git a/hackage-server.cabal b/hackage-server.cabal index a075ee5a3..32a181735 100644 --- a/hackage-server.cabal +++ b/hackage-server.cabal @@ -347,6 +347,7 @@ library lib-server , split ^>= 0.2 , stm ^>= 2.4 , tagged ^>= 0.8.5 + , tagsoup ^>= 0.14 , tar ^>= 0.5 , text ^>= 1.2.2 , time-locale-compat ^>= 0.1.0.1 From 1345bda3bbe43a77b69b0955557d7e9279ef1325 Mon Sep 17 00:00:00 2001 From: alexbiehl Date: Tue, 8 Aug 2017 21:28:56 +0200 Subject: [PATCH 2/4] Use for_ for FTP compat --- Distribution/Server/Features/Documentation.hs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Distribution/Server/Features/Documentation.hs b/Distribution/Server/Features/Documentation.hs index edcf819f7..e22c569b9 100644 --- a/Distribution/Server/Features/Documentation.hs +++ b/Distribution/Server/Features/Documentation.hs @@ -29,6 +29,7 @@ import Distribution.Version (nullVersion) import qualified Data.ByteString.Lazy as BSL import qualified Data.Map as Map +import Data.Foldable (for_) import Data.Function (fix) import qualified Data.Aeson as Aeson @@ -377,7 +378,7 @@ checkDocTarball pkgid = checkJsonDocIndex :: BSL.ByteString -> Either String () checkJsonDocIndex jsDocIndex | Just (Aeson.Array entries) <- Aeson.decode jsDocIndex - = forM_ entries $ \entry -> do + = for_ entries $ \entry -> do case entry of Aeson.Object obj | Just (Aeson.String displayHtml) <- HashMap.lookup "display_html" obj From 14c9a44aa681f0fe98d859ffafc8c4696de24929 Mon Sep 17 00:00:00 2001 From: alexbiehl Date: Wed, 9 Aug 2017 08:43:36 +0200 Subject: [PATCH 3/4] Cleanup --- Distribution/Server/Features/Documentation.hs | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/Distribution/Server/Features/Documentation.hs b/Distribution/Server/Features/Documentation.hs index e22c569b9..5780b17cb 100644 --- a/Distribution/Server/Features/Documentation.hs +++ b/Distribution/Server/Features/Documentation.hs @@ -379,14 +379,17 @@ checkJsonDocIndex :: BSL.ByteString -> Either String () checkJsonDocIndex jsDocIndex | Just (Aeson.Array entries) <- Aeson.decode jsDocIndex = for_ entries $ \entry -> do - case entry of - Aeson.Object obj - | Just (Aeson.String displayHtml) <- HashMap.lookup "display_html" obj - -> checkDisplayHtml displayHtml - _ -> Left "Expected display_html property" + case extractDisplayHtml entry of + Just displayHtml -> checkDisplayHtml displayHtml + _ -> Left "Expected display_html property" | otherwise = Left "Expected an array element" where + extractDisplayHtml (Aeson.Object o) = do + Aeson.String displayHtml <- HashMap.lookup "display_html" o + return displayHtml + extractDisplayHtml _ = Nothing + checkDisplayHtml displayHtml = checkTags (TagSoup.parseTagsOptions TagSoup.parseOptionsFast displayHtml) From a2eaf335c6004178fe034fb96b58a9c3d3ef6f7c Mon Sep 17 00:00:00 2001 From: alexbiehl Date: Thu, 10 Aug 2017 20:23:32 +0200 Subject: [PATCH 4/4] doc-index.json needs to be a file --- Distribution/Server/Features/Documentation.hs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/Distribution/Server/Features/Documentation.hs b/Distribution/Server/Features/Documentation.hs index 5780b17cb..3f7687c2c 100644 --- a/Distribution/Server/Features/Documentation.hs +++ b/Distribution/Server/Features/Documentation.hs @@ -370,8 +370,9 @@ checkDocTarball pkgid = checkEntry entry remainder | Tar.entryPath entry == pkgDocsDir "doc-index.json" - , Tar.NormalFile content _ <- Tar.entryContent entry - = checkJsonDocIndex content + = case Tar.entryContent entry of + Tar.NormalFile content _ -> checkJsonDocIndex content + _ -> Left "doc-index.json not a file" | otherwise = remainder