From b8426cf91ba09cce25d62062ae3f480f5576b02d Mon Sep 17 00:00:00 2001 From: Ben Date: Tue, 10 Dec 2024 16:23:07 +0100 Subject: [PATCH] Implement log analyzer Signed-off-by: Ben --- go.mod | 1 + go.sum | 2 + pkg/diagnostics/collect.go | 2 +- pkg/diagnostics/diagnostics.go | 18 ++ pkg/diagnostics/log_analysis.go | 330 ++++++++++++++++++++++++++++++++ pkg/util/util.go | 23 ++- 6 files changed, 373 insertions(+), 3 deletions(-) create mode 100644 pkg/diagnostics/log_analysis.go diff --git a/go.mod b/go.mod index 2e916cb552..4f89ad0289 100644 --- a/go.mod +++ b/go.mod @@ -28,6 +28,7 @@ require ( github.com/kedacore/keda/v2 v2.7.0 github.com/kube-object-storage/lib-bucket-provisioner v0.0.0-20221122204822-d1a8c34382f1 github.com/libopenstorage/secrets v0.0.0-20240416031220-a17cf7f72c6c + github.com/lithammer/fuzzysearch v1.1.8 github.com/marstr/randname v0.0.0-20200428202425-99aca53a2176 github.com/onsi/ginkgo/v2 v2.20.2 github.com/onsi/gomega v1.34.1 diff --git a/go.sum b/go.sum index fee5fbecdb..5436a158c2 100644 --- a/go.sum +++ b/go.sum @@ -722,6 +722,8 @@ github.com/libopenstorage/operator v0.0.0-20200725001727-48d03e197117/go.mod h1: github.com/libopenstorage/stork v1.3.0-beta1.0.20200630005842-9255e7a98775/go.mod h1:qBSzYTJVHlOMg5RINNiHD1kBzlasnrc2uKLPZLgu1Qs= github.com/liggitt/tabwriter v0.0.0-20181228230101-89fcab3d43de h1:9TO3cAIGXtEhnIaL+V+BEER86oLrvS+kWobKpbJuye0= github.com/liggitt/tabwriter v0.0.0-20181228230101-89fcab3d43de/go.mod h1:zAbeS9B/r2mtpb6U+EI2rYA5OAXxsYw6wTamcNW+zcE= +github.com/lithammer/fuzzysearch v1.1.8 h1:/HIuJnjHuXS8bKaiTMeeDlW2/AyIWk2brx1V8LFgLN4= +github.com/lithammer/fuzzysearch v1.1.8/go.mod h1:IdqeyBClc3FFqSzYq/MXESsS4S0FsZ5ajtkr5xPLts4= github.com/magiconair/properties v1.8.0/go.mod h1:PppfXfuXeibc/6YijjN8zIbojt8czPbwD3XqdrwzmxQ= github.com/magiconair/properties v1.8.1/go.mod h1:PppfXfuXeibc/6YijjN8zIbojt8czPbwD3XqdrwzmxQ= github.com/magiconair/properties v1.8.7 h1:IeQXZAiQcpL9mgcAe1Nu6cX9LLw6ExEHKjN0VQdvPDY= diff --git a/pkg/diagnostics/collect.go b/pkg/diagnostics/collect.go index 1b2207ab4a..80fd5580f9 100644 --- a/pkg/diagnostics/collect.go +++ b/pkg/diagnostics/collect.go @@ -140,7 +140,7 @@ func (c *Collector) CollectPodsLogs(listOptions client.ListOptions) { c.CollectDescribe("pod", pod.Name) - podLogs, _ := util.GetPodLogs(*pod) + podLogs, _ := util.GetPodLogs(*pod, nil, false) for containerName, containerLog := range podLogs { targetFile := fmt.Sprintf("%s/%s-%s.log", c.folderName, pod.Name, containerName) err := util.SaveStreamToFile(containerLog, targetFile) diff --git a/pkg/diagnostics/diagnostics.go b/pkg/diagnostics/diagnostics.go index 215d4deef2..c5438c6e79 100644 --- a/pkg/diagnostics/diagnostics.go +++ b/pkg/diagnostics/diagnostics.go @@ -25,6 +25,7 @@ func Cmd() *cobra.Command { CmdDbDump(), CmdAnalyze(), CmdReport(), + CmdLogAnalysis(), ) return cmd } @@ -123,6 +124,23 @@ func CmdAnalyzeResources() *cobra.Command { return cmd } +// CmdLogAnalysis returns a CLI command +func CmdLogAnalysis() *cobra.Command { + cmd := &cobra.Command{ + Use: "log-analysis", + Short: "Run the log analyzer on all NooBaa pods", + Run: RunLogAnalysis, + } + cmd.Flags().Bool("fuzzy", false, "(Experimental) Toggle fuzzy matching for the search string") + cmd.Flags().Int64("tail", 1000, "Number of lines to tail from the logs, minimum 1, default 1000") + cmd.Flags().Bool("noobaa-time", false, "Use NooBaa-provided timestamps instead of the Kubernetes ones (~10ms earlier than Kubernetes)") + cmd.Flags().BoolP("case-insensitive", "i", false, "Toggle search-string case sensitivity (similar to grep's -i flag)") + cmd.Flags().BoolP("whole-string", "w", false, "Match the whole search string as a single word (similar to grep's -w flag)") + cmd.Flags().Bool("prefer-line", false, "Prefer to print the line containing the search string when it doesn't contain a timestamp") + + return cmd +} + /////// Deprecated Functions /////// // CmdDbDumpDeprecated returns a CLI command diff --git a/pkg/diagnostics/log_analysis.go b/pkg/diagnostics/log_analysis.go new file mode 100644 index 0000000000..d79c76c560 --- /dev/null +++ b/pkg/diagnostics/log_analysis.go @@ -0,0 +1,330 @@ +package diagnostics + +import ( + "bufio" + "fmt" + "regexp" + "strings" + "time" + + "github.com/noobaa/noobaa-operator/v5/pkg/options" + "github.com/noobaa/noobaa-operator/v5/pkg/util" + "github.com/lithammer/fuzzysearch/fuzzy" + + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/labels" + "sigs.k8s.io/controller-runtime/pkg/client" + + "github.com/spf13/cobra" +) + +// LogAnalysisOptions holds parameters for log analysis +type LogAnalysisOptions struct { + tailLines int64 + fuzzyMatch bool + searchString string + noobaaTimestamp bool + matchWholeString bool + caseInsensitivity bool + showLineIfNoTimestamp bool +} + +const ( + noobaaOutputTimestampFormat = "02/01/2006 15:04:05" + // To be used in case of additional NooBaa operators in the future + noobaaOperatorPodNamePrefix = "noobaa-operator" + operatorTimestampPrefix = `time="` + operatorWTimestampPrefix = "W" +) + +var ( + ansiRegex = regexp.MustCompile(`\x1b\[[0-9;]*m`) +) + +// RunLogAnalysis runs a CLI command +func RunLogAnalysis(cmd *cobra.Command, args []string) { + tailLines, _ := cmd.Flags().GetInt64("tail") + fuzzyMatch, _ := cmd.Flags().GetBool("fuzzy") + noobaaTimestamp, _ := cmd.Flags().GetBool("noobaa-time") + showLineIfNoTimestamp, _ := cmd.Flags().GetBool("prefer-line") + matchWholeString, _ := cmd.Flags().GetBool("whole-string") + caseInsensitivity, _ := cmd.Flags().GetBool("case-insensitive") + searchString := "" + + analysisOptions := LogAnalysisOptions{ + tailLines: tailLines, + fuzzyMatch: fuzzyMatch, + searchString: searchString, + noobaaTimestamp: noobaaTimestamp, + matchWholeString: matchWholeString, + caseInsensitivity: caseInsensitivity, + showLineIfNoTimestamp: showLineIfNoTimestamp, + } + + validateLogAnalysisParameters(&analysisOptions, args) + podSelector, _ := labels.Parse("app=noobaa") + listOptions := client.ListOptions{Namespace: options.Namespace, LabelSelector: podSelector} + CollectAndAnalyzeLogs(listOptions, &analysisOptions) +} + +// validateLogAnalysisParameters validates the parameters for log analysis +func validateLogAnalysisParameters(analysisOptions *LogAnalysisOptions, args []string) { + log := util.Logger() + if len(args) != 1 || args[0] == "" { + analysisOptions.searchString = util.ShowStringPrompt("Provide a search string") + } else { + analysisOptions.searchString = args[0] + } + + if analysisOptions.tailLines < 1 { + log.Fatalf("❌ Tail must be a whole positive number") + } + + if analysisOptions.fuzzyMatch && analysisOptions.matchWholeString { + log.Fatalf("❌ Cannot use both fuzzy matching and whole-string matching") + } +} + +// CollectAndAnalyzeLogs collects and analyzes logs of all existing noobaa pods +func CollectAndAnalyzeLogs(listOptions client.ListOptions, analysisOptions *LogAnalysisOptions) { + log := util.Logger() + chosenTimestamp := "" + if analysisOptions.noobaaTimestamp { + chosenTimestamp = "NooBaa" + } else { + chosenTimestamp = "Kubernetes" + } + log.Println() + log.Println("✨────────────────────────────────────────────✨") + log.Printf(" Collecting and analyzing pod logs -") + log.Printf(" Search string: %s", analysisOptions.searchString) + log.Printf(" Case insensitivity: %t", analysisOptions.caseInsensitivity) + log.Printf(" Match whole string: %t", analysisOptions.matchWholeString) + log.Printf(" From the last %d lines", analysisOptions.tailLines) + log.Printf(" Using %s timestamps", chosenTimestamp) + log.Println(" Found occurrences will be printed below") + log.Println(" in the format :") + log.Println("✨────────────────────────────────────────────✨") + podList := &corev1.PodList{} + if !util.KubeList(podList, &listOptions) { + log.Printf(`❌ failed to get NooBaa pod list within namespace %s\n`, options.Namespace) + return + } + for i := range podList.Items { + pod := &podList.Items[i] + analyzePodLogs(pod, analysisOptions) + } +} + +// analyzePodLogs will count the number of occurrences of the search string in a pod log +// as well as find and print the timestamps of the first and last occurrence of +// the search string in the logs +func analyzePodLogs(pod *corev1.Pod, analysisOptions *LogAnalysisOptions) { + log := util.Logger() + podLogs, err := util.GetPodLogs(*pod, &analysisOptions.tailLines, true) + if err != nil { + log.Printf("❌ Failed to get logs for pod %s: %v", pod.Name, err) + return + } + if analysisOptions.caseInsensitivity { + analysisOptions.searchString = strings.ToLower(analysisOptions.searchString) + } + stringBoundaryRegex := compileStringBoundaryRegex(analysisOptions) // Compiled here for better efficiency + for containerName, containerLog := range podLogs { + firstAppearanceFound := false + firstAppearanceTimestamp := "" + lastAppearanceTimestamp := "" + lastOccurrenceLine := "" + log.Printf("Analyzing %s:%s", pod.Name, containerName) + defer containerLog.Close() + occurrenceCounter := 0 + scanner := bufio.NewScanner(containerLog) + for scanner.Scan() { + line := scanner.Text() + // Clean line from ANSI escape codes + if !strings.Contains(pod.Name, noobaaOperatorPodNamePrefix) { + line = sanitizeANSI(line) + } + lineContainsMatch := stringMatchCheck(line, stringBoundaryRegex, analysisOptions) + if lineContainsMatch { + if !firstAppearanceFound { + firstAppearanceFound = true + firstAppearanceTimestamp = extractTimeString(pod, line, *analysisOptions) + } + occurrenceCounter++ + lastOccurrenceLine = line + } + } + lastAppearanceTimestamp = extractTimeString(pod, lastOccurrenceLine, *analysisOptions) + if occurrenceCounter == 0 { + log.Println("No occurrences found") + } else { + log.Printf("Hits: %d", occurrenceCounter) + log.Printf("Earliest appearance: %s", firstAppearanceTimestamp) + if occurrenceCounter > 1 { + log.Printf("Latest appearance: %s", lastAppearanceTimestamp) + } + } + log.Println("──────────────────────────────────────────────────────────────────────────────────") + } +} + +// sanitizeANSI removes ANSI escape codes from a string +func sanitizeANSI(line string) string { + // Remove ANSI escape codes from the line + // This is done to avoid the terminal from interpreting them as color codes + // and printing them as garbage characters + // The regex pattern is taken from + return ansiRegex.ReplaceAllString(line, "") +} + +// compileStringBoundaryRegex compiles a word boundary regex pattern for the search string +func compileStringBoundaryRegex(analysisOptions *LogAnalysisOptions) *regexp.Regexp { + var stringBoundarySearchPattern *regexp.Regexp + stringBoundaryPattern := fmt.Sprintf(`\b%s\b`, regexp.QuoteMeta(analysisOptions.searchString)) + if analysisOptions.caseInsensitivity { + stringBoundarySearchPattern = regexp.MustCompile("(?i)" + stringBoundaryPattern) + } else { + stringBoundarySearchPattern = regexp.MustCompile(stringBoundaryPattern) + } + return stringBoundarySearchPattern +} + +// stringMatchCheck checks if a line contains a match to the search string +func stringMatchCheck (line string, stringBoundaryRegex *regexp.Regexp, analysisOptions *LogAnalysisOptions) bool { + if analysisOptions.matchWholeString { + return wholestringMatchCheck(line, stringBoundaryRegex) + } else { + return partialMatchCheck(line, analysisOptions) + } +} + +// wholestringMatchCheck checks if a line contains a whole string match to the search string +// Mostly used for readability and organization purposes +func wholestringMatchCheck (line string, stringBoundaryRegex *regexp.Regexp) bool { + return stringBoundaryRegex.MatchString(line) +} + +// partialMatchCheck checks if a line contains a partial/fuzzy match to the search string +func partialMatchCheck (line string, analysisOptions *LogAnalysisOptions) bool { + if analysisOptions.fuzzyMatch { + fuzzyCaseInsensitiveMatch := analysisOptions.caseInsensitivity && fuzzy.MatchNormalized(analysisOptions.searchString, line) + fuzzyCaseSensitiveMatch := fuzzy.Match(analysisOptions.searchString, line) + return fuzzyCaseInsensitiveMatch || fuzzyCaseSensitiveMatch + } else { + // Check for a match by temporarily casting the line string to lowercase + // (the search string is cast in the beginning of analyzePodLogs) + caseInsensitiveMatch := analysisOptions.caseInsensitivity && strings.Contains(strings.ToLower(line), analysisOptions.searchString) + caseSensitiveMatch := strings.Contains(line, analysisOptions.searchString) + return caseInsensitiveMatch || caseSensitiveMatch + } +} + +// extractTimeString extracts the timestamp from a log line by checking which pod +// it originated from and redirecting it to the appropriate extraction function +func extractTimeString(pod *corev1.Pod, line string, analysisOptions LogAnalysisOptions) string { + if analysisOptions.noobaaTimestamp { + if strings.Contains(pod.Name, noobaaOperatorPodNamePrefix) { + return extractOperatorTimestampString(line, analysisOptions.showLineIfNoTimestamp) + } else { + return extractCoreTimestampString(line, analysisOptions.showLineIfNoTimestamp) + } + } else { + return extractKubernetesTimestampString(line) + } +} + +// extractKubernetesTimestampString extracts the timestamp from a Kubernetes log line +func extractKubernetesTimestampString(line string) string { + // Example log line: + // 2024-12-10T07:27:16.856641898Z Dec-10 7:27:16.847 [BGWorkers/36]... + splitLine := strings.SplitN(line, " ", 2) + return splitLine[0] +} + +// extractCoreTimestampString extracts, parses and formats a timestamp string +// from pods running NooBaa Core code (core, endpoint, PV pod) +func extractCoreTimestampString(line string, showLineIfNoTimestamp bool) string { + // Example log line: + // Dec-9 15:16:31.621 [BGWorkers/36] [L0] ... + const minimumRequiredIndices = 2 + // Example split result: + // ["Dec-9", "15:16:31.621", "[BGWorkers..."] + splitLine := strings.SplitN(line, " ", 3) + if len(splitLine) < minimumRequiredIndices { + return timeParsingError(showLineIfNoTimestamp, line) + } + lineDate := splitLine[0] + lineTime := splitLine[1] + // The year is assumed to be the current one since it's not provided + year := time.Now().Year() + layout := "Jan-2-2006 15:04:05.000" + timestampString := fmt.Sprintf("%s-%d %s", lineDate, year, lineTime) + parsedTime, err := time.Parse(layout, timestampString) + if err != nil { + return timeParsingError(showLineIfNoTimestamp, line) + } + return parsedTime.Format(noobaaOutputTimestampFormat) +} + +// extractOperatorTimestampString extracts, parses, formats and returns a timestamp +// string from the NooBaa Operator pod logs +func extractOperatorTimestampString(line string, showLineIfNoTimestamp bool) string { + if strings.HasPrefix(line, operatorTimestampPrefix) { + return extractStandardOperatorTimestampString(line, showLineIfNoTimestamp) + } else if strings.HasPrefix(line, operatorWTimestampPrefix) { + return extractOperatorWTimestampString(line, showLineIfNoTimestamp) + } + return timeParsingError(showLineIfNoTimestamp, line) +} + +// extractStandardOperatorTimestampString extracts the timestamp in case of a standard operator log line +// Example: +// time="2024-12-10T07:27:36Z" level=info msg="... +func extractStandardOperatorTimestampString(line string, showLineIfNoTimestamp bool) string { + secondQuotesIndex := strings.Index(line[len(operatorTimestampPrefix):], `"`) + if secondQuotesIndex == -1 { + return timeParsingError(showLineIfNoTimestamp, line) + } + timestampString := line[len(operatorTimestampPrefix) : len(operatorTimestampPrefix) + secondQuotesIndex] + // Parse the date using RFC3339 layout + const operatorTimestampFormat = time.RFC3339 + parsedTimestamp, err := time.Parse(operatorTimestampFormat, timestampString) + if err != nil { + return timeParsingError(showLineIfNoTimestamp, line) + } + return parsedTimestamp.Format(noobaaOutputTimestampFormat) +} + +// extractOperatorWTimestampString extracts the timestamp in case of a non-standard operator log line +// Example: +// W1209 13:41:05.890285 1 reflector.go:484... +func extractOperatorWTimestampString(line string, showLineIfNoTimestamp bool) string { + const minimumWTimeParseLength = 22 + if len(line) < minimumWTimeParseLength { + return timeParsingError(showLineIfNoTimestamp, line) + } + wStringDayMonthStartIndex := 1 + wStringDayMonthEndIndex := 5 + wStringTimeStartIndex := 6 + wStringTimeEndIndex := 21 + datePart := line[wStringDayMonthStartIndex : wStringDayMonthEndIndex] + timePart := line[wStringTimeStartIndex : wStringTimeEndIndex] + day := datePart[2:] + month := datePart[:2] + // The year is assumed to be the current one since it's not provided + year := time.Now().Year() + fullTimeStr := fmt.Sprintf("%s/%s/%d %s", day, month, year, timePart) + return fullTimeStr +} + +// timeParsingError returns an error message if the timestamp +// could not be parsed based on the user's prefer-line setting +func timeParsingError(showLineIfNoTimestamp bool, line string) string { + absentTimestampErrorWithLine := fmt.Sprintf("Could not parse timestamp in line %s", line) + const absentTimestampError = "No timestamp found" + if showLineIfNoTimestamp { + return absentTimestampErrorWithLine + } + return absentTimestampError +} diff --git a/pkg/util/util.go b/pkg/util/util.go index 3011e2faff..fc048fcf6d 100644 --- a/pkg/util/util.go +++ b/pkg/util/util.go @@ -692,7 +692,9 @@ func GetPodStatusLine(pod *corev1.Pod) string { } // GetPodLogs info -func GetPodLogs(pod corev1.Pod) (map[string]io.ReadCloser, error) { +// Note: tailLines can be used to limit the amount of logs returned +// if you're not interested in any filters, pass nil +func GetPodLogs(pod corev1.Pod, tailLines *int64, timestamps bool) (map[string]io.ReadCloser, error) { allContainers := append(pod.Spec.InitContainers, pod.Spec.Containers...) config := KubeConfig() clientset, err := kubernetes.NewForConfig(config) @@ -723,11 +725,18 @@ func GetPodLogs(pod corev1.Pod) (map[string]io.ReadCloser, error) { // retrieve logs from a current instantiation of pod containers podLogOpts := corev1.PodLogOptions{Container: container.Name} + podLogOpts.Timestamps = timestamps + if tailLines != nil { + podLogOpts.TailLines = tailLines + } getPodLogOpts(&pod, &podLogOpts, nil) // retrieve logs from a previous instantiation of pod containers - prevPodLogOpts := corev1.PodLogOptions{Container: container.Name, Previous: true} + prevPodLogOpts := corev1.PodLogOptions{Container: container.Name, Previous: true, Timestamps: timestamps} previousSuffix := "previous" + if tailLines != nil { + prevPodLogOpts.TailLines = tailLines + } getPodLogOpts(&pod, &prevPodLogOpts, &previousSuffix) } return containerMap, nil @@ -1208,6 +1217,15 @@ func GetFlagIntOrPrompt(cmd *cobra.Command, flag string) int { return val } +// ShowStringPrompt prompts a user to provide a string +func ShowStringPrompt(prompt string) string { + fmt.Printf("%s: ", prompt) + var str string + _, err := fmt.Scan(&str) + Panic(err) + return str +} + // GetFlagStringOrPrompt returns flag value but if empty will prompt to read from stdin func GetFlagStringOrPrompt(cmd *cobra.Command, flag string) string { str, _ := cmd.Flags().GetString(flag) @@ -1252,6 +1270,7 @@ func GetBoolFlagPtr(cmd *cobra.Command, flag string) (*bool, error) { return nil, nil } + // PrintThisNoteWhenFinishedApplyingAndStartWaitLoop is a common log task func PrintThisNoteWhenFinishedApplyingAndStartWaitLoop() { log := Logger()