[Commit slider] LLM bench support (openvinotoolkit#28037)

### Details: - *item1* - *...* ### Tickets: - *ticket-id*
11happy · Dec 23, 2024 · 26452a3 · 26452a3
1 parent 157d1b5
commit 26452a3
Show file tree

Hide file tree

Showing 3 changed files with 227 additions and 0 deletions.
diff --git a/src/plugins/intel_cpu/tools/commit_slider/utils/cfg.json b/src/plugins/intel_cpu/tools/commit_slider/utils/cfg.json
@@ -4,6 +4,7 @@
         "bmPerf" : "BenchmarkAppPerformanceMode",
         "compareBlobs" : "CompareBlobsMode",
         "ac" : "AccuracyCheckerMode",
+        "llmBench" : "LLMBenchMode",
         "nop" : "NopMode"
     },
     "traversalMap" : {

diff --git a/src/plugins/intel_cpu/tools/commit_slider/utils/cfg_samples/llm_bench.json b/src/plugins/intel_cpu/tools/commit_slider/utils/cfg_samples/llm_bench.json
@@ -0,0 +1,72 @@
+{
+    "appCmd":"source {venvName}/bin/activate && cd {appPath} && python3.10 -m pip install --upgrade pip && python3.10 -m pip install openvino=={wheelVersion} openvino_genai=={wheelVersion} openvino_tokenizers=={wheelVersion} --find-links={precommitPath}wheels/ && python3.10 -m pip install -r requirements.txt && {cmd}",
+    "appPath" : "{appPath}",
+    "venvCfg":{
+       "venvEnabled":true,
+       "venvDir":"{workPath}/venv/",
+       "venvName":"tempVenv"
+    },
+    "commandList":[
+
+    ],
+    "runConfig":{
+       "mode":"llmBench",
+       "traversal":"firstFailedVersion",
+       "perfAppropriateDeviation" : 0.05,
+       "commitList" : {
+         "getCommitListCmd" : "git log {c1}..{c2} --boundary --pretty=\"%h\""
+     }
+    },
+    "dlbConfig":{
+       "launchedAsJob":false,
+       "toolName":"",
+       "wheelVersionsMap":{
+
+       },
+       "commonPath":"{commitPath}",
+       "subPath":"{subPath}",
+       "appPath":"",
+       "appCmd":""
+    },
+    "cachedPathConfig":{
+       "enabled":true,
+       "scheme":"mandatory",
+       "passCmdList":false,
+       "changeAppPath":false,
+       "commonPath":"{commitPath}",
+       "subPath":"{subPath}",
+       "cashMap":{
+
+       }
+    },
+    "substitutionRules":[
+       {
+          "name":"precommitPath",
+          "enabled":true,
+          "type":"map",
+          "placeholder":"precommitPath",
+          "from":"$.cachedPathConfig.cashMap",
+          "to":"$.appCmd"
+       },
+       {
+          "name":"wheelVersion",
+          "enabled":true,
+          "type":"map",
+          "placeholder":"wheelVersion",
+          "from":"$.dlbConfig.wheelVersionsMap",
+          "to":"$.appCmd"
+       }
+    ],
+    "subscriptions":[
+       {
+          "name":"wheelPathsMap",
+          "enabled":true
+       },
+       {
+          "name":"wheelVersionsMap",
+          "enabled":true
+       }
+    ],
+    "verboseOutput" : true,
+    "checkIfBordersDiffer" : true
+ }
diff --git a/src/plugins/intel_cpu/tools/commit_slider/utils/modes.py b/src/plugins/intel_cpu/tools/commit_slider/utils/modes.py
@@ -284,6 +284,160 @@ def getCommitInfo(self, commit):
                 ci=super().getCommitInfo(commit),
                 d=commit.perfRel)
 
+
+class LLMBenchMode(Mode):
+    def __init__(self, cfg):
+        super().__init__(cfg)
+        self.perfRel = 0
+        self.createCash()
+
+    def isPerformanceBased(self):
+        return True
+
+    def prepareRun(self, list, cfg):
+        super().prepareRun(list, cfg)
+        sampleCommit = list[0]
+        sampleCommit = sampleCommit.replace('"', "")
+        self.commonLogger.info(
+            "Prepare sample commit - {commit}".format(commit=sampleCommit)
+        )
+        commitLogger = getCommitLogger(cfg, sampleCommit)
+        foundThroughput = 0
+        isCommitCashed, cashedThroughput = self.getCommitIfCashed(sampleCommit)
+        if isCommitCashed:
+            logMsg = "Cashed commit - {commit}".format(commit=sampleCommit)
+            self.commonLogger.info(logMsg)
+            commitLogger.info(logMsg)
+            foundThroughput = cashedThroughput
+        else:
+            handleCommit(sampleCommit, cfg)
+            output = fetchAppOutput(cfg, sampleCommit)
+            commitLogger.info(output)
+            foundThroughput = re.search(
+                self.outPattern, output, flags=re.MULTILINE
+            ).group(1)
+            self.setCommitCash(sampleCommit, float(foundThroughput))
+        self.sampleThroughput = float(foundThroughput)
+        return list
+
+    def checkCfg(self, cfg):
+        super().checkCfg(cfg)
+        if not ("perfAppropriateDeviation" in cfg["runConfig"]):
+            raise CfgError("Appropriate deviation is not configured")
+        else:
+            self.apprDev = cfg["runConfig"]["perfAppropriateDeviation"]
+        if ("metric" in cfg["runConfig"]):
+            self.outPattern = self.specifyMetric(cfg["runConfig"]["metric"])
+        else:
+            self.outPattern = self.specifyMetric()
+
+
+    def specifyMetric(self, metric: str = "First token latency"):
+        if metric in [
+            "First token latency"]:
+            res = r"First token latency:\s*([0-9]*[.][0-9]*)\s*ms/token"
+            return res
+        raise CfgError("Metric {} is not supported".format(metric))
+
+    def preliminaryCheck(self, list, cfg):
+        # # model path checking - todo is necessary ?
+        # common if-degradation-exists check
+        super().preliminaryCheck(list, cfg)
+
+        # performance - specific check if results for borders are stable,
+        isLeftStable = not cfg["preliminaryCheckCfg"]["leftCheck"] or\
+            self.preliminaryStabilityCheck(list[0], cfg)
+        isRightStable = not cfg["preliminaryCheckCfg"]["rightCheck"] or\
+            self.preliminaryStabilityCheck(list[-1], cfg)
+        if (not isLeftStable or not isRightStable):
+            raise PreliminaryAnalysisError(
+                "{lCommit} is {lStable}, {rCommit} is {rStable}".format(
+                    lCommit=list[0],
+                    rCommit=list[-1],
+                    lStable="stable" if isLeftStable else "unstable",
+                    rStable="stable" if isRightStable else "unstable"
+                ),
+                PreliminaryAnalysisError.PreliminaryErrType.UNSTABLE_APPLICATION
+                )
+
+    def compareCommits(self, lCommit: str, rCommit: str, cfg: map):
+        leftThroughput = self.getPseudoMetric(lCommit, cfg)
+        rightThroughput = self.getPseudoMetric(rCommit, cfg)
+        isBad, curRel = self.traversal.numericComparator(
+            leftThroughput, rightThroughput, self.apprDev
+        )
+        if isBad:
+            self.perfRel = curRel
+        curCommit = rCommit.replace('"', "")
+        commitLogger = getCommitLogger(cfg, curCommit)
+        commitLogger.info("Performance relation is {rel}".format(rel=curRel))
+        commitLogger.info(
+            "Commit is {status}".format(status=("bad" if isBad else "good"))
+        )
+        return isBad
+
+    def getPseudoMetric(self, commit, cfg):
+        commit = commit.replace('"', "")
+        curThroughput = 0
+        commitLogger = getCommitLogger(cfg, commit)
+        isCommitCashed, cashedThroughput = self.getCommitIfCashed(commit)
+        pc = Mode.CommitPath.PathCommit(
+            commit,
+            Mode.CommitPath.CommitState.DEFAULT
+        )
+        self.setOutputInfo(pc)
+        self.commitPath.accept(self.traversal, pc)
+        if isCommitCashed:
+            logMsg = "Cashed commit - {commit}".format(commit=commit)
+            self.commonLogger.info(logMsg)
+            commitLogger.info(logMsg)
+            curThroughput = cashedThroughput
+        else:
+            self.commonLogger.info("New commit: {commit}".format(
+                commit=commit)
+            )
+            handleCommit(commit, cfg)
+            output = fetchAppOutput(cfg, commit)
+            commitLogger.info(output)
+            foundThroughput = re.search(
+                self.outPattern, output, flags=re.MULTILINE
+            ).group(1)
+            curThroughput = float(foundThroughput)
+            self.setCommitCash(commit, curThroughput)
+        return curThroughput
+
+    def preliminaryStabilityCheck(self, commit, cfg):
+        commit = commit.replace('"', "")
+        curThroughput = 0
+
+        self.commonLogger.info(
+            "Preliminary check of commit: {commit}".format(
+                commit=commit)
+        )
+        handleCommit(commit, cfg)
+        throughputList = []
+        dev = self.apprDev = cfg["runConfig"]["perfAppropriateDeviation"]
+        for i in range(cfg["preliminaryCheckCfg"]["tryCount"]):
+            output = fetchAppOutput(cfg, commit)
+            foundThroughput = re.search(
+                self.outPattern, output, flags=re.MULTILINE
+            ).group(1)
+            curThroughput = float(foundThroughput)
+            throughputList.append(curThroughput)
+        resStable = checkStability(throughputList, dev)
+        if resStable:
+            self.setCommitCash(commit, curThroughput)
+        return resStable
+
+    def setOutputInfo(self, pathCommit):
+        pathCommit.perfRel = self.perfRel
+
+    def getCommitInfo(self, commit):
+        return "{ci}, perf. ratio = {d}".format(
+                ci=super().getCommitInfo(commit),
+                d=commit.perfRel)
+
+
 class AccuracyCheckerMode(Mode):
     def __init__(self, cfg):
         super().__init__(cfg)