Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support query for output record derivation trace. #725

Draft
wants to merge 1 commit into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
104 changes: 93 additions & 11 deletions debugger/Main.hs
Original file line number Diff line number Diff line change
Expand Up @@ -21,19 +21,101 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
-}

import Language.DifferentialDatalog.Debugger.DebugEventParser
import Text.Parsec
{-# LANGUAGE RecordWildCards, ImplicitParams, LambdaCase, FlexibleContexts, TemplateHaskell #-}

import Control.Exception
import Control.Monad
import Data.List
import System.Console.GetOpt
import System.Environment
import System.FilePath.Posix
import Text.Parsec

import Language.DifferentialDatalog.Debugger.DebugTypes
import Language.DifferentialDatalog.Debugger.DebugState
import Language.DifferentialDatalog.Debugger.DebugEventParser
import Language.DifferentialDatalog.Module
import Language.DifferentialDatalog.Syntax

data TOption = Help
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think we need to copy all flags from the DDlog executable. LibDir and Help seem like the only relevant ones.

| DebugDumpFile String
| Datalog String
| LibDir String
| OutputDir String

parseText :: String -> String
parseText inputString = do
case parse eventsParser "" inputString of
Left e -> errorWithoutStackTrace $ "Failed to parse input file: " ++ show e
Right r -> show r
options :: [OptDescr TOption]
options = [ Option ['h'] ["help"] (NoArg Help) "Display help message."
, Option ['d'] [] (ReqArg DebugDumpFile "DEBUG_FILE") "Debug dumped file."
, Option ['i'] [] (ReqArg Datalog "FILE") "DDlog program to compile."
, Option ['L'] [] (ReqArg LibDir "PATH") "Extra DDlog library directory."
, Option ['o'] ["output-dir"] (ReqArg OutputDir "DIR") "Output directory (default based on program name)."
]

addOption :: Config -> TOption -> IO Config
addOption config (DebugDumpFile f) = return config { confDebugDumpFile = f}
addOption config (Datalog f) = return config { confDatalogFile = f}
addOption config (LibDir d) = return config { confLibDirs = nub (d:confLibDirs config)}
addOption config (OutputDir d) = return config { confOutputDir = d }
addOption config Help = return config { confAction = ActionHelp}

validateConfig :: Config -> IO ()
validateConfig Config{..} = do
when (confDatalogFile == "" && confAction /= ActionHelp && confAction /= ActionVersion)
$ errorWithoutStackTrace "input file not specified"

main :: IO ()
main = do
(filename:_) <- getArgs
contents <- readFile filename
let result = parseText contents
putStr result
args <- getArgs
prog <- getProgName
home <- lookupEnv "DDLOG_HOME"
config <- case getOpt Permute options args of
(flags, [], []) -> do
conf <- foldM addOption defaultConfig flags
validateConfig conf
return conf
`catch`
(\e -> do putStrLn $ usageInfo ("Usage: " ++ prog ++ " [OPTION...]") options
throw (e::SomeException))
_ -> errorWithoutStackTrace $ usageInfo ("Usage: " ++ prog ++ " [OPTION...]") options
config' <- case home of
Just(p) -> addOption config (LibDir $ p ++ "/lib")
_ -> return config
do
datalogProg <- parseProgram config'
events <- parseEventFromDumpFile config'
let recordMap = handleDebugEvents events emptyDebuggerMaps datalogProg
s = queryAll events (dbgRecordMap recordMap) datalogProg
dumpQueryResultToFile config' s

queryAll :: [Event] -> DebuggerRecordMap -> DatalogProgram-> String
queryAll [] _ _ = "\n"
queryAll (event: events) dgbRecordMap prog =
let outputRecord = evtOutput event
operatorId = evtOperatorId event
operatorInput = InputOp operatorId
dbgRecord = DebuggerRecord {dbgRecord=outputRecord, dbgOperatorId=operatorInput}
dbgRecordNodes = queryDerivations dbgRecord dgbRecordMap prog
in (show dbgRecordNodes) ++ "\n\n" ++ (queryAll events dgbRecordMap prog)

parseProgram :: Config -> IO (DatalogProgram)
parseProgram Config{..} = do
fdata <- readFile confDatalogFile
(d, _, _) <- parseDatalogProgram (takeDirectory confDatalogFile:confLibDirs) True fdata confDatalogFile
return d

parseEventFromDumpFile :: Config -> IO ([Event])
parseEventFromDumpFile Config{..} = do
contents <- readFile confDebugDumpFile
return (parseDebugEvents contents)

parseDebugEvents :: String -> [Event]
parseDebugEvents inputString = do
case parse eventsParser "" inputString of
Left e -> errorWithoutStackTrace $ "Failed to parse input file: " ++ show e
Right r -> r

dumpQueryResultToFile :: Config -> String -> IO()
dumpQueryResultToFile Config{..} content =
case confOutputDir of
"" -> return ()
_ -> writeFile confOutputDir content
40 changes: 7 additions & 33 deletions src/Language/DifferentialDatalog/Debugger/DebugEventParser.hs
Original file line number Diff line number Diff line change
Expand Up @@ -29,40 +29,11 @@ import Text.Parsec
import Text.Parsec.Language
import Data.Functor.Identity

data Operator = OpMap | OpAggregate | OpCondition | OpJoin
| OpAntijoin | OpInspect | OpUndefined deriving (Show)

data OperatorId = OperatorId {opRelId:: Int, opRule::Int, opOperaror::Int} deriving (Show)

data Event = DebugEvent { evtOperatorId :: OperatorId
, evtWeight :: Int
, evtTimestamp :: Integer
, evtOperator :: Operator
, evtInput :: Record
, evtOutput :: Record
}
| DebugJoinEvent { evtOperatorId :: OperatorId
, evtWeight :: Int
, evtTimestamp :: Integer
, evtOperator :: Operator
, evtInput1 :: Record
, evtInput2 :: Record
, evtOutput :: Record
}
deriving (Show)

data Record = IntRecord {intVal :: Integer}
| BoolRecord {boolVal :: Bool}
| DoubleRecord {doubleVal :: Double}
| StringRecord {stringVal :: String}
| NamedStructRecord {name :: String, val :: [(String, Record)]}
| TupleRecord {tupleVal :: [Record]}
| ArrayRecord {arrayVal :: [Record]}
deriving (Show)
import Language.DifferentialDatalog.Debugger.DebugTypes

debugDef :: GenLanguageDef String u Data.Functor.Identity.Identity
debugDef = emptyDef { T.identStart = alphaNum
, T.identLetter = alphaNum
debugDef = emptyDef { T.identStart = alphaNum <|> char '_'
, T.identLetter = alphaNum <|> char '_' <|> char ':'
, T.caseSensitive = True}

identifier :: ParsecT String u Identity String
Expand All @@ -89,6 +60,9 @@ parens = T.parens lexer
brackets :: ParsecT String u Identity a -> ParsecT String u Identity a
brackets = T.brackets lexer

integer :: ParsecT String u Identity Integer
integer = T.integer lexer

decimal :: ParsecT String u Identity Integer
decimal = T.decimal lexer

Expand Down Expand Up @@ -143,7 +117,7 @@ debugEventParser :: ParsecT String u Identity Event
debugEventParser = do
opid <- parens (OperatorId <$> (fromIntegral <$> (decimal <* comma))
<*> (fromIntegral <$> (decimal <* comma)) <*> (fromIntegral <$> decimal))
w <- fromIntegral <$> (comma *> decimal)
w <- fromIntegral <$> (comma *> integer)
ts <- comma *> decimal <* comma
op <- operatorParser
case op of
Expand Down
197 changes: 197 additions & 0 deletions src/Language/DifferentialDatalog/Debugger/DebugState.hs
Original file line number Diff line number Diff line change
@@ -0,0 +1,197 @@
{-
Copyright (c) 2020 VMware, Inc.
SPDX-License-Identifier: MIT

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
-}

{-# LANGUAGE TupleSections, LambdaCase, RecordWildCards#-}

module Language.DifferentialDatalog.Debugger.DebugState (
queryDerivations,
handleDebugEvents,
emptyDebuggerMaps,
getPredecessorOpId,
DebuggerMaps(..),
DebuggerRecord(..),
OperatorInput(..),
DebuggerRecordMap,
)where

import qualified Data.Map as M
import Data.Maybe

import Language.DifferentialDatalog.Syntax
import Language.DifferentialDatalog.Debugger.DebugTypes

-- (1) Derivation corresponds to the input records for a specifc output record.
-- (2) One derivation could have at most two elements since Join has at most two input
-- records and other operators have only one input record.
-- (3) One output record may have multiple derivations.
type Derivation = [DebuggerRecord]

-- Output Record -> all derivations of this record observed by the debugger
type DebuggerRecordMap = M.Map DebuggerRecord [Derivation]

-- Derivation -> Weight. This map record all derivations and their weights for one
-- particular output record.
type DerivationWeightMap = M.Map Derivation Int

-- Output Record -> {derivation -> weight}.
type DebuggerRecordWeightMap = M.Map DebuggerRecord DerivationWeightMap

-- Operator id for a specific input/intermediate record. It could be either
-- a valid OperatorId used for further trace or an input relation that mark
-- the source of an input relation.
data OperatorInput = InputOp OperatorId
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

A comment here would be great.

| InputRel String
deriving (Show, Eq, Ord)

data DebuggerRecord = DebuggerRecord { dbgRecord :: Record
, dbgOperatorId :: OperatorInput
} deriving (Show, Eq, Ord)

-- DebuggerRecordNode
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

A few thoughts about this data type. First, it's worth having a high-level explanation along the lines of "DebuggerRecordNode - a node in the derivation tree of a record". Second, this data structure represents the entire derivation tree all the way to DDlog input records. This tree can be huge even for a single output record, which means that we don't necessarily want to construct it proactively, at least not by default. I envisaged a more lazy interface where the user (or a script) expands the derivation tree one node at a time. Having said that, Haskell's lazy evaluation may come to the rescue here. Even though the data structure represents a complete derivation tree, its nodes will not be computed until the user tries to access a particular node, so this might actually be ok.

-- nodeVal : record val and its operator id
-- childrenList : each list element is a children set that construct the parent node
-- there could be multiple derivations, so it is a list of children set
data DebuggerRecordNode = DebuggerRecordNode { nodeVal :: DebuggerRecord
, childrenList :: [[DebuggerRecordNode]]
} deriving (Show)

-- DebuggerMaps: global map that has all derivation information from the dump file.
data DebuggerMaps = DebuggerMaps { dbgRecordMap :: DebuggerRecordMap
, dbgRecordWeightMap :: DebuggerRecordWeightMap
} deriving (Show)

emptyDebuggerMaps :: DebuggerMaps
emptyDebuggerMaps = DebuggerMaps M.empty M.empty

-- Return root node of the derivation tree
queryDerivations :: DebuggerRecord -> DebuggerRecordMap -> DatalogProgram -> DebuggerRecordNode
queryDerivations debuggerRecord debuggerRecordMap prog =
let root = DebuggerRecordNode { nodeVal = debuggerRecord, childrenList = []}
in case (dbgOperatorId debuggerRecord) of
InputRel relName -> case getRelationPredecessorOpId relName prog of
[] -> root
_ -> case M.lookup debuggerRecord debuggerRecordMap of
Nothing -> root
Just derivations -> let childrenList = map (derivationToDebuggerRecordNode debuggerRecordMap prog) derivations
in DebuggerRecordNode {nodeVal = debuggerRecord, childrenList = childrenList}

InputOp _ -> let derivation = M.lookup debuggerRecord debuggerRecordMap
in case derivation of
Nothing -> root -- control should never arrive here
Just derivations -> let childrenList = map (derivationToDebuggerRecordNode debuggerRecordMap prog) derivations
in DebuggerRecordNode {nodeVal = debuggerRecord, childrenList = childrenList}

-- helper function used by `queryDerivations`
-- Derivation: [DebuggerRecord], generally has one/two input records, it stands
-- for one possible derivation for a specific output record

derivationToDebuggerRecordNode :: DebuggerRecordMap -> DatalogProgram -> Derivation -> [DebuggerRecordNode]
derivationToDebuggerRecordNode debuggerRecordMap prog derivation =
map (\inputrecord -> queryDerivations inputrecord debuggerRecordMap prog) derivation

-- Construct the global DebuggerMaps which contains all record inheritance information
-- and used for processing queries.
handleDebugEvents :: [Event] -> DebuggerMaps -> DatalogProgram -> DebuggerMaps
handleDebugEvents [] dbgMaps _ = dbgMaps
handleDebugEvents (event:events) dbgMaps prog =
let updatedMaps = handleDebugEvent event dbgMaps prog
in handleDebugEvents events updatedMaps prog

-- Process a single debug event entry. Each event corresponds to one row in the
-- debug dump file. Add this entry into the global debuggerMap and update its
-- derivation's weight accordingly.
handleDebugEvent :: Event -> DebuggerMaps -> DatalogProgram -> DebuggerMaps
handleDebugEvent event DebuggerMaps{..} prog =
let outputRecord = DebuggerRecord { dbgRecord = (evtOutput event), dbgOperatorId = InputOp (evtOperatorId event)}
predecessorIds = getPredecessorOpId (evtOperatorId event) prog
derivation = case event of
DebugEvent{..} -> let inputRecord = DebuggerRecord { dbgRecord = evtInput, dbgOperatorId = (predecessorIds !! 0)}
in [inputRecord]
DebugJoinEvent{..} -> let inputRecord1 = DebuggerRecord { dbgRecord = evtInput1, dbgOperatorId = (predecessorIds !! 0)}
inputRecord2 = DebuggerRecord { dbgRecord = evtInput2, dbgOperatorId = (predecessorIds !! 1)}
in [inputRecord1, inputRecord2]
derivationWeightMap = case M.lookup outputRecord dbgRecordWeightMap of
Nothing -> M.empty
Just weightMap -> weightMap

updatedWeight = case M.lookup derivation derivationWeightMap of
Nothing -> evtWeight event
Just w -> (evtWeight event) + w
in if updatedWeight == 0
then
let updatedDerivationWeightMap = M.delete derivation derivationWeightMap
updatedRecordWeightMap = M.insert outputRecord updatedDerivationWeightMap dbgRecordWeightMap
in DebuggerMaps { dbgRecordMap = dbgRecordMap, dbgRecordWeightMap = updatedRecordWeightMap}
else
let traceDerivations = M.lookup outputRecord dbgRecordMap
updatedDerivationWeightMap = M.insert derivation updatedWeight derivationWeightMap
updatedRecordWeightMap = M.insert outputRecord updatedDerivationWeightMap dbgRecordWeightMap
in case traceDerivations of
Nothing -> let updatedDbgRecordMap = M.insert outputRecord [derivation] dbgRecordMap
in DebuggerMaps { dbgRecordMap = updatedDbgRecordMap, dbgRecordWeightMap = updatedRecordWeightMap}
Just derivations -> let updatedDbgRecordMap = M.insert outputRecord (derivations ++ [derivation]) dbgRecordMap
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This will introduce duplicate derivations, and because we do not track individual derivation weights, we won't be able to eliminate mutually canceling derivations.

in DebuggerMaps { dbgRecordMap = updatedDbgRecordMap, dbgRecordWeightMap = updatedRecordWeightMap}

-- Get the operator if for input records in a debug entry
getPredecessorOpId :: OperatorId -> DatalogProgram-> [OperatorInput]
getPredecessorOpId OperatorId{..} DatalogProgram{..} =
let Rule{..} = progRules !! ruleIdx
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This will crash if ruleIdx is out of range. We should not trust input data. One way to deal with this is to run functions that can fail inside MonadError. See, e.g., Validate.hs for examples of this.

ruleRhs = ruleRHS !! rhsIdx
in case ruleRhs of
RHSLiteral{..} ->
if rhsIdx == 0
then [InputRel (atomRelation rhsAtom)]
else let prevRuleRhs = ruleRHS !! (rhsIdx - 1)
in case prevRuleRhs of
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think the case analysis here should not be whether prevRuleRhs is a literal or not, but rather whether rhsIdx - 1 == 0.

RHSLiteral{rhsAtom = prevAtom} -> [InputRel (atomRelation prevAtom), InputRel (atomRelation rhsAtom)]
_ -> [InputOp OperatorId {ruleIdx = ruleIdx, rhsIdx = (rhsIdx - 1), headIdx = headIdx}, InputRel (atomRelation rhsAtom)]
RHSCondition{..} -> let prevRhsIdx = getPredecessorRHSRuleIdxForCondition rhsIdx ruleRHS
in [InputOp OperatorId {ruleIdx = ruleIdx, rhsIdx = prevRhsIdx, headIdx = headIdx}]
_ -> [InputOp OperatorId {ruleIdx = ruleIdx, rhsIdx = (rhsIdx - 1), headIdx = headIdx}]
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Here we want to consider two cases, based on whether rhsIdx - 1 == 0. If it is then we want InputRel, otherwise InputOp


getPredecessorRHSRuleIdxForCondition :: Int -> [RuleRHS] -> Int
getPredecessorRHSRuleIdxForCondition rhsIdx rules =
case (rules !! (rhsIdx-1)) of
RHSCondition{..} -> getPredecessorRHSRuleIdxForCondition (rhsIdx-1) rules
_ -> (rhsIdx-1)


-- Search OperatorId for an InputRel. Search the rules and head name, if the head name
-- equal to the relation name, return the corresponding head index, rule index and rhsRule length
-- minus one as rhs index.
getRelationPredecessorOpId :: String -> DatalogProgram -> [OperatorId]
getRelationPredecessorOpId relName DatalogProgram{..} =
let resultList = map (\ruleIdx -> getOperatorIdFromRule (progRules !! ruleIdx) ruleIdx relName) [0..length (progRules) - 1]
in mergeResultList resultList

getOperatorIdFromRule :: Rule -> Int -> String -> [OperatorId]
getOperatorIdFromRule Rule{..} ruleIdx relName =
catMaybes (map (\i -> let atom = ruleLHS !! i
in if (atomRelation atom) == relName
then Just OperatorId {ruleIdx = ruleIdx, rhsIdx = (length ruleRHS - 1), headIdx = i}
else Nothing) [0..length (ruleLHS) - 1])

mergeResultList :: [[a]] -> [a]
mergeResultList [] = []
mergeResultList (element: elements) =
element ++ (mergeResultList elements)
Loading