From 23f93c16957176b4b29194b85938ecf203b20872 Mon Sep 17 00:00:00 2001 From: Saket Chaudhary Date: Wed, 9 Oct 2024 22:23:10 +0530 Subject: [PATCH] checks & pollServer --- checks/startup_check.go | 20 ++++++++++++++------ config/config.go | 6 ++++++ lambda/logserver/logserver.go | 1 + main.go | 22 ++++++++++++++++++---- telemetry/batch.go | 2 ++ 5 files changed, 41 insertions(+), 10 deletions(-) diff --git a/checks/startup_check.go b/checks/startup_check.go index 0e94e66..03499a6 100644 --- a/checks/startup_check.go +++ b/checks/startup_check.go @@ -18,11 +18,12 @@ type LogSender interface { } /// Register checks here -var checks = []checkFn{ - agentVersionCheck, - handlerCheck, - sanityCheck, - vendorCheck, + +var checks = map[string]checkFn{ + "agentversion": agentVersionCheck, + "handler": handlerCheck, + "sanity": sanityCheck, + "vendor": vendorCheck, } func RunChecks(ctx context.Context, conf *config.Configuration, reg *api.RegistrationResponse, logSender LogSender) { @@ -32,8 +33,15 @@ func RunChecks(ctx context.Context, conf *config.Configuration, reg *api.Registr util.Logln(errLog) } - for _, check := range checks { + for name, check := range checks { + // Time where we start the check + if name == conf.StartUpCheck { + fmt.Printf("Bypassing check %s\n", name) + continue + } + fmt.Printf("Running check %s\n", name) runCheck(ctx, conf, reg, runtimeConfig, logSender, check) + // Time where we end the check } } diff --git a/config/config.go b/config/config.go index b02eec0..16fab1d 100644 --- a/config/config.go +++ b/config/config.go @@ -21,6 +21,7 @@ var EmptyNRWrapper = "Undefined" type Configuration struct { TestingOverride bool // ignores envioronment specific details when running unit tests ExtensionEnabled bool + StartUpCheck string LogsEnabled bool SendFunctionLogs bool CollectTraceID bool @@ -39,6 +40,7 @@ type Configuration struct { func ConfigurationFromEnvironment() *Configuration { nrEnabledStr, nrEnabledOverride := os.LookupEnv("NEW_RELIC_ENABLED") + nrStartupCheckStr, nrStartupCheckOverride := os.LookupEnv("NEW_RELIC_STARTUP_CHECK") nrEnabledRubyStr, nrEnabledRubyOverride := os.LookupEnv("NEW_RELIC_AGENT_ENABLED") enabledStr, extensionEnabledOverride := os.LookupEnv("NEW_RELIC_LAMBDA_EXTENSION_ENABLED") licenseKey, lkOverride := os.LookupEnv("NEW_RELIC_LICENSE_KEY") @@ -103,6 +105,10 @@ func ConfigurationFromEnvironment() *Configuration { ret.TelemetryEndpoint = telemetryEndpoint } + if nrStartupCheckOverride { + ret.StartUpCheck = nrStartupCheckStr + } + if leOverride { ret.LogEndpoint = logEndpoint } diff --git a/lambda/logserver/logserver.go b/lambda/logserver/logserver.go index ad086ad..14d15f4 100644 --- a/lambda/logserver/logserver.go +++ b/lambda/logserver/logserver.go @@ -140,6 +140,7 @@ func (ls *LogServer) handler(res http.ResponseWriter, req *http.Request) { case "platform.report": metricString := "" requestId := "" + util.Debugf("platform log from platform.report : %v", event.Record) switch event.Record.(type) { case map[string]interface{}: record := event.Record.(map[string]interface{}) diff --git a/main.go b/main.go index cedfb23..30a947b 100644 --- a/main.go +++ b/main.go @@ -137,6 +137,11 @@ func main() { // Run startup checks go func() { + if conf.StartUpCheck == "all" { + // avoid all checks + util.Logf("Bypassing all checks") + return + } checks.RunChecks(ctx, conf, registrationResponse, telemetryClient) }() @@ -154,7 +159,7 @@ func main() { util.Logf("New Relic Extension shutting down after %v events\n", eventCounter) - pollLogServer(logServer, batch) + pollLogServer(logServer, batch, "Extension shutdown Logs") err = logServer.Close() if err != nil { util.Logln("Error shutting down Log API server", err) @@ -273,7 +278,7 @@ func mainLoop(ctx context.Context, invocationClient *client.InvocationClient, ba // Before we begin to await telemetry, harvest and ship. Ripe telemetry will mostly be handled here. Even that is a // minority of invocations. Putting this here lets us run the HTTP request to send to NR in parallel with the Lambda // handler, reducing or eliminating our latency impact. - pollLogServer(logServer, batch) + pollLogServer(logServer, batch, "mainLoop Ripe telemetry") shipHarvest(ctx, batch.Harvest(time.Now()), telemetryClient) select { @@ -295,7 +300,7 @@ func mainLoop(ctx context.Context, invocationClient *client.InvocationClient, ba // Opportunity for an aggressive harvest, in which case, we definitely want to wait for the HTTP POST // to complete. Mostly, nothing really happens here. - pollLogServer(logServer, batch) + pollLogServer(logServer, batch, "mainLoop") shipHarvest(ctx, batch.Harvest(time.Now()), telemetryClient) } @@ -305,11 +310,20 @@ func mainLoop(ctx context.Context, invocationClient *client.InvocationClient, ba } // pollLogServer polls for platform logs, and annotates telemetry -func pollLogServer(logServer *logserver.LogServer, batch *telemetry.Batch) { +func pollLogServer(logServer *logserver.LogServer, batch *telemetry.Batch, callingFunction string) { + util.Debugf("Platform logServer Called by %s", callingFunction) for _, platformLog := range logServer.PollPlatformChannel() { + util.Debugf("Platform log: %s", string(platformLog.Content)) inv := batch.AddTelemetry(platformLog.RequestID, platformLog.Content) if inv == nil { util.Debugf("Skipping platform log for request %v", platformLog.RequestID) + eventStart := time.Now() + batch.AddInvocation(platformLog.RequestID, eventStart) + inv := batch.AddTelemetry(platformLog.RequestID, platformLog.Content) + if inv == nil { + util.Logf("Failed to add telemetry for request %v", lastRequestId) + } + util.Logf("successfully added telemetry for request %v", platformLog.RequestID) } } } diff --git a/telemetry/batch.go b/telemetry/batch.go index 7b9a573..c5ae735 100644 --- a/telemetry/batch.go +++ b/telemetry/batch.go @@ -67,6 +67,8 @@ func (b *Batch) AddTelemetry(requestId string, telemetry []byte) *Invocation { } return inv } + util.Debugf("No invocation found for request ID %s\n", requestId) + util.Debugf("Current invocations: %v\n", b.invocations) return nil }