Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[DJM] Add new script for dataproc #32525

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitlab/package_build/installer.yml
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,7 @@ installer-install-scripts:
- mkdir -p $OMNIBUS_PACKAGE_DIR
- inv -e installer.build-linux-script "databricks" "$RELEASE_VERSION"
- inv -e installer.build-linux-script "emr" "$RELEASE_VERSION"
- inv -e installer.build-linux-script "dataproc" "$RELEASE_VERSION"
- mv ./bin/installer/install-*.sh $OMNIBUS_PACKAGE_DIR/
- ls -la $OMNIBUS_PACKAGE_DIR
artifacts:
Expand Down
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -342,7 +342,7 @@ require (
)

require (
cloud.google.com/go/compute/metadata v0.5.2 // indirect
cloud.google.com/go/compute/metadata v0.5.2
code.cloudfoundry.org/cfhttp/v2 v2.0.0 // indirect
code.cloudfoundry.org/clock v1.0.0 // indirect
code.cloudfoundry.org/consuladapter v0.0.0-20200131002136-ac1daf48ba97 // indirect
Expand Down
105 changes: 105 additions & 0 deletions pkg/fleet/installer/setup/djm/dataproc.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
// Unless explicitly stated otherwise all files in this repository are licensed
// under the Apache License Version 2.0.
// This product includes software developed at Datadog (https://www.datadoghq.com/).
// Copyright 2016-present Datadog, Inc.

// Package djm contains data-jobs-monitoring installation logic
package djm

import (
"cloud.google.com/go/compute/metadata"
"context"
"fmt"
"os"

"github.com/DataDog/datadog-agent/pkg/fleet/installer/setup/common"
"github.com/DataDog/datadog-agent/pkg/util/log"
)

const (
dataprocInjectorVersion = "0.26.0-1"
dataprocJavaTracerVersion = "1.42.2-1"
dataprocAgentVersion = "7.58.2-1"
)

var (
tracerEnvConfigDataproc = []common.InjectTracerConfigEnvVar{
{
Key: "DD_DATA_JOBS_ENABLED",
Value: "true",
},
{
Key: "DD_INTEGRATIONS_ENABLED",
Value: "false",
},
{
Key: "DD_DATA_JOBS_COMMAND_PATTERN",
Value: ".*org.apache.spark.deploy.*",
},
{
Key: "DD_SPARK_APP_NAME_AS_SERVICE",
Value: "true",
},
}
)

// SetupDataproc sets up the DJM environment on Dataproc
func SetupDataproc(s *common.Setup) error {

metadataClient := metadata.NewClient(nil)
Copy link
Contributor

@BaptisteFoy BaptisteFoy Jan 2, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: as this package basically handles http calls to the metadata endpoints, I wonder if wrapping the transport could let us trace these calls
Like so:

transport := telemetry.WrapRoundTripper(d.client.Transport)

That way we can know if customers have errors and if we need retries

s.Packages.Install(common.DatadogAgentPackage, dataprocAgentVersion)
s.Packages.Install(common.DatadogAPMInjectPackage, dataprocInjectorVersion)
s.Packages.Install(common.DatadogAPMLibraryJavaPackage, dataprocJavaTracerVersion)

os.Setenv("DD_APM_INSTRUMENTATION_ENABLED", "host")

hostname, err := os.Hostname()
if err != nil {
return fmt.Errorf("failed to get hostname: %w", err)
}
s.Config.DatadogYAML.Hostname = hostname
s.Config.DatadogYAML.DJM.Enabled = true
s.Config.InjectTracerYAML.AdditionalEnvironmentVariables = tracerEnvConfigDataproc

// Ensure tags are always attached with the metrics
s.Config.DatadogYAML.ExpectedTagsDuration = "10m"
isMaster, clusterName, err := setupCommonDataprocHostTags(s, metadataClient)
if err != nil {
return fmt.Errorf("failed to set tags: %w", err)
}
if isMaster == "true" {
setupResourceManager(s, clusterName)
}
return nil
}

func setupCommonDataprocHostTags(s *common.Setup, metadataClient *metadata.Client) (string, string, error) {
ctx := context.Background()

clusterID, err := metadataClient.InstanceAttributeValueWithContext(ctx, "dataproc-cluster-uuid")
if err != nil {
return "", "", err
}
setHostTag(s, "cluster_id", clusterID)
setHostTag(s, "dataproc_cluster_id", clusterID)

dataprocRole, err := metadataClient.InstanceAttributeValueWithContext(ctx, "dataproc-role")
if err != nil {
return "", "", err
}
isMaster := "false"
if dataprocRole == "Master" {
isMaster = "true"
}
setHostTag(s, "is_master_node", isMaster)
s.Span.SetTag("host."+"is_master_node", isMaster)

clusterName, err := metadataClient.InstanceAttributeValueWithContext(ctx, "dataproc-cluster-name")
if err != nil {
log.Warn("failed to get clusterName, using clusterID instead")
return isMaster, clusterID, nil
}
setHostTag(s, "cluster_name", clusterName)

return isMaster, clusterName, nil
}
10 changes: 5 additions & 5 deletions pkg/fleet/installer/setup/djm/emr.go
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ func SetupEmr(s *common.Setup) error {
return fmt.Errorf("failed to set tags: %w", err)
}
if isMaster {
setupEmrResourceManager(s, clusterName)
setupResourceManager(s, clusterName)
}
return nil
}
Expand All @@ -115,7 +115,7 @@ func setupCommonEmrHostTags(s *common.Setup) (bool, string, error) {

setHostTag(s, "instance_group_id", info.InstanceGroupID)
setHostTag(s, "is_master_node", strconv.FormatBool(info.IsMaster))
s.Span.SetTag("host_tag."+"is_master_node", info.IsMaster)
s.Span.SetTag("host."+"is_master_node", info.IsMaster)

extraInstanceInfoRaw, err := os.ReadFile(filepath.Join(emrInfoPath, "extraInstanceData.json"))
if err != nil {
Expand All @@ -131,13 +131,13 @@ func setupCommonEmrHostTags(s *common.Setup) (bool, string, error) {
setHostTag(s, "emr_version", extraInfo.ReleaseLabel)
s.Span.SetTag("emr_version", extraInfo.ReleaseLabel)

clusterName := resolveClusterName(s, extraInfo.JobFlowID)
clusterName := resolveEmrClusterName(s, extraInfo.JobFlowID)
setHostTag(s, "cluster_name", clusterName)

return info.IsMaster, clusterName, nil
}

func setupEmrResourceManager(s *common.Setup, clusterName string) {
func setupResourceManager(s *common.Setup, clusterName string) {

var sparkIntegration common.IntegrationConfig
var yarnIntegration common.IntegrationConfig
Expand Down Expand Up @@ -179,7 +179,7 @@ var executeCommandWithTimeout = func(s *common.Setup, command string, args ...st
return output, nil
}

func resolveClusterName(s *common.Setup, jobFlowID string) string {
func resolveEmrClusterName(s *common.Setup, jobFlowID string) string {
var err error
span, _ := telemetry.StartSpanFromContext(s.Ctx, "resolve.cluster_name")
defer func() { span.Finish(err) }()
Expand Down
4 changes: 2 additions & 2 deletions pkg/fleet/installer/setup/djm/emr_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,10 @@ import (
"github.com/DataDog/datadog-agent/pkg/fleet/telemetry"
)

//go:embed testdata/instance.json
//go:embed testdata/emrInstance.json
var instanceJSON string

//go:embed testdata/extraInstanceData.json
//go:embed testdata/emrExtraInstanceData.json
var extraInstanceJSON string

//go:embed testdata/emrDescribeClusterResponse.json
Expand Down
1 change: 1 addition & 0 deletions pkg/fleet/installer/setup/setup.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ type flavor struct {
var flavors = map[string]flavor{
"databricks": {path: "djm/databricks.go", run: djm.SetupDatabricks},
"emr": {path: "djm/emr.go", run: djm.SetupEmr},
"dataproc": {path: "djm/dataproc.go", run: djm.SetupDataproc},
}

// Setup installs Datadog.
Expand Down
Loading