From 3215c0d5be802e3e123518fb67499e570797aefa Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?C=C3=A9lian=20Raimbault?=
 <161456554+CelianR@users.noreply.github.com>
Date: Fri, 12 Apr 2024 17:37:31 +0200
Subject: [PATCH 1/9] Revert "[gitlab-use-module] Use gitlab python module
 instead of raw http requests (#24070)" (#24651)

This reverts commit 0ba7f945968854166dd7bb0a6cd01246b476836e.

This is causing timeouts in jobs that trigger child pipelines.
---
 .github/workflows/label-analysis.yml |   2 +-
 tasks/kernel_matrix_testing/ci.py    |  55 +--
 tasks/kmt.py                         |   2 +-
 tasks/libs/ciproviders/github_api.py |   3 +
 tasks/libs/ciproviders/gitlab.py     | 545 +++++++++++++++++++++++++++
 tasks/libs/ciproviders/gitlab_api.py | 243 ------------
 tasks/libs/common/remote_api.py      | 123 ++++++
 tasks/libs/pipeline/data.py          |  59 ++-
 tasks/libs/pipeline/notifications.py |  19 +-
 tasks/libs/pipeline/stats.py         |   6 +-
 tasks/libs/pipeline/tools.py         | 177 ++++-----
 tasks/libs/types/types.py            |  18 +-
 tasks/linter.py                      |  19 +-
 tasks/notify.py                      |   2 +-
 tasks/pipeline.py                    | 218 +++++------
 tasks/release.py                     |  11 +-
 tasks/unit-tests/gitlab_api_tests.py |  93 ++++-
 tasks/unit-tests/notify_tests.py     | 175 ++++-----
 18 files changed, 1117 insertions(+), 653 deletions(-)
 create mode 100644 tasks/libs/ciproviders/gitlab.py
 delete mode 100644 tasks/libs/ciproviders/gitlab_api.py
 create mode 100644 tasks/libs/common/remote_api.py

diff --git a/.github/workflows/label-analysis.yml b/.github/workflows/label-analysis.yml
index bbf262c9381bb..7d97b83595f71 100644
--- a/.github/workflows/label-analysis.yml
+++ b/.github/workflows/label-analysis.yml
@@ -21,7 +21,7 @@ jobs:
       - name: Checkout repository
         uses: actions/checkout@v4
       - name: Install Python dependencies
-        run: pip install -r requirements.txt -r tasks/requirements.txt
+        run: pip install -r tasks/requirements.txt
       - name: Auto assign team label
         run: inv -e github.assign-team-label --pr-id='${{ github.event.pull_request.number }}'
   fetch-labels:
diff --git a/tasks/kernel_matrix_testing/ci.py b/tasks/kernel_matrix_testing/ci.py
index 364bf9d3c2845..8e03b74f5a293 100644
--- a/tasks/kernel_matrix_testing/ci.py
+++ b/tasks/kernel_matrix_testing/ci.py
@@ -6,11 +6,9 @@
 import re
 import tarfile
 import xml.etree.ElementTree as ET
-from typing import TYPE_CHECKING, Dict, List, Optional, Set, Tuple, Union, overload
+from typing import TYPE_CHECKING, Any, Dict, List, Optional, Set, Tuple, Union, overload
 
-from gitlab.v4.objects import ProjectJob
-
-from tasks.libs.ciproviders.gitlab_api import get_gitlab_repo
+from tasks.libs.ciproviders.gitlab import Gitlab, get_gitlab_token
 
 if TYPE_CHECKING:
     from typing_extensions import Literal
@@ -18,27 +16,31 @@
     from tasks.kernel_matrix_testing.types import Arch, Component, StackOutput, VMConfig
 
 
+def get_gitlab() -> Gitlab:
+    return Gitlab("DataDog/datadog-agent", str(get_gitlab_token()))
+
+
 class KMTJob:
     """Abstract class representing a Kernel Matrix Testing job, with common properties and methods for all job types"""
 
-    def __init__(self, job: ProjectJob):
-        self.gitlab = get_gitlab_repo()
-        self.job = job
+    def __init__(self, job_data: Dict[str, Any]):
+        self.gitlab = get_gitlab()
+        self.job_data = job_data
 
     def __str__(self):
         return f"<KMTJob: {self.name}>"
 
     @property
     def id(self) -> int:
-        return self.job.id
+        return self.job_data["id"]
 
     @property
     def pipeline_id(self) -> int:
-        return self.job.pipeline["id"]
+        return self.job_data["pipeline"]["id"]
 
     @property
     def name(self) -> str:
-        return self.job.name
+        return self.job_data.get("name", "")
 
     @property
     def arch(self) -> Arch:
@@ -50,11 +52,11 @@ def component(self) -> Component:
 
     @property
     def status(self) -> str:
-        return self.job.status
+        return self.job_data['status']
 
     @property
     def failure_reason(self) -> str:
-        return self.job.failure_reason
+        return self.job_data["failure_reason"]
 
     @overload
     def artifact_file(self, file: str, ignore_not_found: Literal[True]) -> Optional[str]:  # noqa: U100
@@ -88,14 +90,16 @@ def artifact_file_binary(self, file: str, ignore_not_found: bool = False) -> Opt
         ignore_not_found: if True, return None if the file is not found, otherwise raise an error
         """
         try:
-            res = self.gitlab.jobs.get(self.id, lazy=True).artifact(file)
-
-            return res.content
+            res = self.gitlab.artifact(self.id, file, ignore_not_found=ignore_not_found)
+            if res is None:
+                if not ignore_not_found:
+                    raise RuntimeError("Invalid return value from gitlab.artifact")
+                else:
+                    return None
+            res.raise_for_status()
         except Exception as e:
-            if ignore_not_found:
-                return None
-
             raise RuntimeError(f"Could not retrieve artifact {file}") from e
+        return res.content
 
 
 class KMTSetupEnvJob(KMTJob):
@@ -103,8 +107,8 @@ class KMTSetupEnvJob(KMTJob):
     the job name and output artifacts
     """
 
-    def __init__(self, job: ProjectJob):
-        super().__init__(job)
+    def __init__(self, job_data: Dict[str, Any]):
+        super().__init__(job_data)
         self.associated_test_jobs: List[KMTTestRunJob] = []
 
     @property
@@ -161,8 +165,8 @@ class KMTTestRunJob(KMTJob):
     the job name and output artifacts
     """
 
-    def __init__(self, job: ProjectJob):
-        super().__init__(job)
+    def __init__(self, job_data: Dict[str, Any]):
+        super().__init__(job_data)
         self.setup_job: Optional[KMTSetupEnvJob] = None
 
     @property
@@ -227,10 +231,9 @@ def get_all_jobs_for_pipeline(pipeline_id: Union[int, str]) -> Tuple[List[KMTSet
     setup_jobs: List[KMTSetupEnvJob] = []
     test_jobs: List[KMTTestRunJob] = []
 
-    gitlab = get_gitlab_repo()
-    jobs = gitlab.pipelines.get(pipeline_id, lazy=True).jobs.list(per_page=100, all=True)
-    for job in jobs:
-        name = job.name
+    gitlab = get_gitlab()
+    for job in gitlab.all_jobs(pipeline_id):
+        name = job.get("name", "")
         if name.startswith("kmt_setup_env"):
             setup_jobs.append(KMTSetupEnvJob(job))
         elif name.startswith("kmt_run_"):
diff --git a/tasks/kmt.py b/tasks/kmt.py
index a3abd373ebdd1..d7614ccc73a36 100644
--- a/tasks/kmt.py
+++ b/tasks/kmt.py
@@ -984,7 +984,7 @@ def explain_ci_failure(_, pipeline: str):
             failreason = testfail  # By default, we assume it's a test failure
 
             # Now check the artifacts, we'll guess why the job failed based on the size
-            for artifact in job.job.artifacts:
+            for artifact in job.job_data.get("artifacts", []):
                 if artifact.get("filename") == "artifacts.zip":
                     fsize = artifact.get("size", 0)
                     if fsize < 1500:
diff --git a/tasks/libs/ciproviders/github_api.py b/tasks/libs/ciproviders/github_api.py
index b9a186287dc6d..1d0e12f760b08 100644
--- a/tasks/libs/ciproviders/github_api.py
+++ b/tasks/libs/ciproviders/github_api.py
@@ -1,6 +1,7 @@
 import base64
 import os
 import platform
+import re
 import subprocess
 from typing import List
 
@@ -14,6 +15,8 @@
 
 __all__ = ["GithubAPI"]
 
+errno_regex = re.compile(r".*\[Errno (\d+)\] (.*)")
+
 
 class GithubAPI:
     """
diff --git a/tasks/libs/ciproviders/gitlab.py b/tasks/libs/ciproviders/gitlab.py
new file mode 100644
index 0000000000000..6e79edca40939
--- /dev/null
+++ b/tasks/libs/ciproviders/gitlab.py
@@ -0,0 +1,545 @@
+import json
+import os
+import platform
+import subprocess
+from collections import UserList
+from urllib.parse import quote
+
+import yaml
+from invoke.exceptions import Exit
+
+from tasks.libs.common.remote_api import APIError, RemoteAPI
+
+__all__ = ["Gitlab"]
+
+
+class Gitlab(RemoteAPI):
+    """
+    Helper class to perform API calls against the Gitlab API, using a Gitlab PAT.
+    """
+
+    BASE_URL = "https://gitlab.ddbuild.io/api/v4"
+
+    def __init__(self, project_name="DataDog/datadog-agent", api_token=""):
+        super(Gitlab, self).__init__("Gitlab")
+        self.api_token = api_token
+        self.project_name = project_name
+        self.authorization_error_message = (
+            "HTTP 401: Your GITLAB_TOKEN may have expired. You can "
+            "check and refresh it at "
+            "https://gitlab.ddbuild.io/-/profile/personal_access_tokens"
+        )
+
+    def test_project_found(self):
+        """
+        Checks if a project can be found. This is useful for testing access permissions to projects.
+        """
+        result = self.project()
+
+        # name is arbitrary, just need to check if something is in the result
+        if "name" in result:
+            return
+
+        print(f"Cannot find GitLab project {self.project_name}")
+        print("If you cannot see it in the GitLab WebUI, you likely need permission.")
+        raise Exit(code=1)
+
+    def project(self):
+        """
+        Gets the project info.
+        """
+        path = f"/projects/{quote(self.project_name, safe='')}"
+        return self.make_request(path, json_output=True)
+
+    def create_pipeline(self, ref, variables=None):
+        """
+        Create a pipeline targeting a given reference of a project.
+        ref must be a branch or a tag.
+        """
+        if variables is None:
+            variables = {}
+
+        path = f"/projects/{quote(self.project_name, safe='')}/pipeline"
+        headers = {"Content-Type": "application/json"}
+        data = json.dumps({"ref": ref, "variables": [{"key": k, "value": v} for (k, v) in variables.items()]})
+        return self.make_request(path, headers=headers, data=data, json_output=True)
+
+    def all_pipelines_for_ref(self, ref, sha=None):
+        """
+        Gets all pipelines for a given reference (+ optionally git sha).
+        """
+        page = 1
+
+        # Go through all pages
+        results = self.pipelines_for_ref(ref, sha=sha, page=page)
+        while results:
+            yield from results
+            page += 1
+            results = self.pipelines_for_ref(ref, sha=sha, page=page)
+
+    def pipelines_for_ref(self, ref, sha=None, page=1, per_page=100):
+        """
+        Gets one page of pipelines for a given reference (+ optionally git sha).
+        """
+        path = f"/projects/{quote(self.project_name, safe='')}/pipelines?ref={quote(ref, safe='')}&per_page={per_page}&page={page}"
+        if sha:
+            path = f"{path}&sha={sha}"
+        return self.make_request(path, json_output=True)
+
+    def last_pipeline_for_ref(self, ref, per_page=100):
+        """
+        Gets the last pipeline for a given reference.
+        per_page cannot exceed 100.
+        """
+        pipelines = self.pipelines_for_ref(ref, per_page=per_page)
+
+        if len(pipelines) == 0:
+            return None
+
+        return sorted(pipelines, key=lambda pipeline: pipeline['created_at'], reverse=True)[0]
+
+    def last_pipelines(self):
+        """
+        Get the last 100 pipelines
+        """
+        path = f"/projects/{quote(self.project_name, safe='')}/pipelines?per_page=100&page=1"
+        return self.make_request(path, json_output=True)
+
+    def trigger_pipeline(self, data):
+        """
+        Trigger a pipeline on a project using the trigger endpoint.
+        Requires a trigger token in the data object, in the 'token' field.
+        """
+        path = f"/projects/{quote(self.project_name, safe='')}/trigger/pipeline"
+
+        if 'token' not in data:
+            raise Exit("Missing 'token' field in data object to trigger child pipelines", 1)
+
+        return self.make_request(path, data=data, json_input=True, json_output=True)
+
+    def pipeline(self, pipeline_id):
+        """
+        Gets info for a given pipeline.
+        """
+        path = f"/projects/{quote(self.project_name, safe='')}/pipelines/{pipeline_id}"
+        return self.make_request(path, json_output=True)
+
+    def cancel_pipeline(self, pipeline_id):
+        """
+        Cancels a given pipeline.
+        """
+        path = f"/projects/{quote(self.project_name, safe='')}/pipelines/{pipeline_id}/cancel"
+        return self.make_request(path, json_output=True, method="POST")
+
+    def cancel_job(self, job_id):
+        """
+        Cancels a given job
+        """
+        path = f"/projects/{quote(self.project_name, safe='')}/jobs/{job_id}/cancel"
+        return self.make_request(path, json_output=True, method="POST")
+
+    def commit(self, commit_sha):
+        """
+        Gets info for a given commit sha.
+        """
+        path = f"/projects/{quote(self.project_name, safe='')}/repository/commits/{commit_sha}"
+        return self.make_request(path, json_output=True)
+
+    def artifact(self, job_id, artifact_name, ignore_not_found=False):
+        path = f"/projects/{quote(self.project_name, safe='')}/jobs/{job_id}/artifacts/{artifact_name}"
+        try:
+            response = self.make_request(path, stream_output=True)
+            return response
+        except APIError as e:
+            if e.status_code == 404 and ignore_not_found:
+                return None
+            raise e
+
+    def all_jobs(self, pipeline_id):
+        """
+        Gets all the jobs for a pipeline.
+        """
+        page = 1
+
+        # Go through all pages
+        results = self.jobs(pipeline_id, page)
+        while results:
+            yield from results
+            page += 1
+            results = self.jobs(pipeline_id, page)
+
+    def jobs(self, pipeline_id, page=1, per_page=100):
+        """
+        Gets one page of the jobs for a pipeline.
+        per_page cannot exceed 100.
+        """
+        path = f"/projects/{quote(self.project_name, safe='')}/pipelines/{pipeline_id}/jobs?per_page={per_page}&page={page}"
+        return self.make_request(path, json_output=True)
+
+    def job_log(self, job_id):
+        """
+        Gets the log file for a given job.
+        """
+
+        path = f"/projects/{quote(self.project_name, safe='')}/jobs/{job_id}/trace"
+        return self.make_request(path)
+
+    def all_pipeline_schedules(self):
+        """
+        Gets all pipelines schedules for the given project.
+        """
+        page = 1
+
+        # Go through all pages
+        results = self.pipeline_schedules(page)
+        while results:
+            yield from results
+            page += 1
+            results = self.pipeline_schedules(page)
+
+    def pipeline_schedules(self, page=1, per_page=100):
+        """
+        Gets one page of the pipeline schedules for the given project.
+        per_page cannot exceed 100
+        """
+        path = f"/projects/{quote(self.project_name, safe='')}/pipeline_schedules?per_page={per_page}&page={page}"
+        return self.make_request(path, json_output=True)
+
+    def pipeline_schedule(self, schedule_id):
+        """
+        Gets a single pipeline schedule.
+        """
+        path = f"/projects/{quote(self.project_name, safe='')}/pipeline_schedules/{schedule_id}"
+        return self.make_request(path, json_output=True)
+
+    def create_pipeline_schedule(self, description, ref, cron, cron_timezone=None, active=None):
+        """
+        Create a new pipeline schedule with given attributes.
+        """
+        path = f"/projects/{quote(self.project_name, safe='')}/pipeline_schedules"
+        data = {
+            "description": description,
+            "ref": ref,
+            "cron": cron,
+            "cron_timezone": cron_timezone,
+            "active": active,
+        }
+        no_none_data = {k: v for k, v in data.items() if v is not None}
+        return self.make_request(path, data=no_none_data, json_output=True, json_input=True)
+
+    def edit_pipeline_schedule(
+        self, schedule_id, description=None, ref=None, cron=None, cron_timezone=None, active=None
+    ):
+        """
+        Edit an existing pipeline schedule with given attributes.
+        """
+        path = f"/projects/{quote(self.project_name, safe='')}/pipeline_schedules/{schedule_id}"
+        data = {
+            "description": description,
+            "ref": ref,
+            "cron": cron,
+            "cron_timezone": cron_timezone,
+            "active": active,
+        }
+        no_none_data = {k: v for k, v in data.items() if v is not None}
+        return self.make_request(path, json_input=True, json_output=True, data=no_none_data, method="PUT")
+
+    def delete_pipeline_schedule(self, schedule_id):
+        """
+        Delete an existing pipeline schedule.
+        """
+        path = f"/projects/{quote(self.project_name, safe='')}/pipeline_schedules/{schedule_id}"
+        # Gitlab API docs claim that this returns the JSON representation of the deleted schedule,
+        # but it actually returns an empty string
+        result = self.make_request(path, json_output=False, method="DELETE")
+        return f"Pipeline schedule deleted; result: {result if result else '(empty)'}"
+
+    def create_pipeline_schedule_variable(self, schedule_id, key, value):
+        """
+        Create a variable for an existing pipeline schedule.
+        """
+        path = f"/projects/{quote(self.project_name, safe='')}/pipeline_schedules/{schedule_id}/variables"
+        data = {
+            "key": key,
+            "value": value,
+        }
+        return self.make_request(path, data=data, json_output=True, json_input=True)
+
+    def edit_pipeline_schedule_variable(self, schedule_id, key, value):
+        """
+        Edit an existing variable for a pipeline schedule.
+        """
+        path = f"/projects/{quote(self.project_name, safe='')}/pipeline_schedules/{schedule_id}/variables/{key}"
+        return self.make_request(path, json_input=True, data={"value": value}, json_output=True, method="PUT")
+
+    def delete_pipeline_schedule_variable(self, schedule_id, key):
+        """
+        Delete an existing variable for a pipeline schedule.
+        """
+        path = f"/projects/{quote(self.project_name, safe='')}/pipeline_schedules/{schedule_id}/variables/{key}"
+        return self.make_request(path, json_output=True, method="DELETE")
+
+    def find_tag(self, tag_name):
+        """
+        Look up a tag by its name.
+        """
+        path = f"/projects/{quote(self.project_name, safe='')}/repository/tags/{tag_name}"
+        try:
+            response = self.make_request(path, json_output=True)
+            return response
+        except APIError as e:
+            # If Gitlab API returns a "404 not found" error we return an empty dict
+            if e.status_code == 404:
+                print(
+                    f"Couldn't find the {tag_name} tag: Gitlab returned a 404 Not Found instead of a 200 empty response."
+                )
+                return dict()
+            else:
+                raise e
+
+    def lint(self, configuration):
+        """
+        Lint a gitlab-ci configuration.
+        """
+        path = f"/projects/{quote(self.project_name, safe='')}/ci/lint?dry_run=true&include_jobs=true"
+        headers = {"Content-Type": "application/json"}
+        data = {"content": configuration}
+        return self.make_request(path, headers=headers, data=data, json_input=True, json_output=True)
+
+    def make_request(
+        self, path, headers=None, data=None, json_input=False, json_output=False, stream_output=False, method=None
+    ):
+        """
+        Utility to make a request to the Gitlab API.
+        See RemoteAPI#request.
+
+        Adds "PRIVATE-TOKEN: {self.api_token}" to the headers to be able to authenticate ourselves to GitLab.
+        """
+        headers = dict(headers or [])
+        headers["PRIVATE-TOKEN"] = self.api_token
+
+        return self.request(
+            path=path,
+            headers=headers,
+            data=data,
+            json_input=json_input,
+            json_output=json_output,
+            stream_output=stream_output,
+            raw_output=False,
+            method=method,
+        )
+
+
+def get_gitlab_token():
+    if "GITLAB_TOKEN" not in os.environ:
+        print("GITLAB_TOKEN not found in env. Trying keychain...")
+        if platform.system() == "Darwin":
+            try:
+                output = subprocess.check_output(
+                    ['security', 'find-generic-password', '-a', os.environ["USER"], '-s', 'GITLAB_TOKEN', '-w']
+                )
+                if len(output) > 0:
+                    return output.strip()
+            except subprocess.CalledProcessError:
+                print("GITLAB_TOKEN not found in keychain...")
+                pass
+        print(
+            "Please create an 'api' access token at "
+            "https://gitlab.ddbuild.io/-/profile/personal_access_tokens and "
+            "add it as GITLAB_TOKEN in your keychain "
+            "or export it from your .bashrc or equivalent."
+        )
+        raise Exit(code=1)
+    return os.environ["GITLAB_TOKEN"]
+
+
+def get_gitlab_bot_token():
+    if "GITLAB_BOT_TOKEN" not in os.environ:
+        print("GITLAB_BOT_TOKEN not found in env. Trying keychain...")
+        if platform.system() == "Darwin":
+            try:
+                output = subprocess.check_output(
+                    ['security', 'find-generic-password', '-a', os.environ["USER"], '-s', 'GITLAB_BOT_TOKEN', '-w']
+                )
+                if output:
+                    return output.strip()
+            except subprocess.CalledProcessError:
+                print("GITLAB_BOT_TOKEN not found in keychain...")
+                pass
+        print(
+            "Please make sure that the GITLAB_BOT_TOKEN is set or that " "the GITLAB_BOT_TOKEN keychain entry is set."
+        )
+        raise Exit(code=1)
+    return os.environ["GITLAB_BOT_TOKEN"]
+
+
+class ReferenceTag(yaml.YAMLObject):
+    """
+    Custom yaml tag to handle references in gitlab-ci configuration
+    """
+
+    yaml_tag = u'!reference'
+
+    def __init__(self, references):
+        self.references = references
+
+    @classmethod
+    def from_yaml(cls, loader, node):
+        return UserList(loader.construct_sequence(node))
+
+    @classmethod
+    def to_yaml(cls, dumper, data):
+        return dumper.represent_sequence(cls.yaml_tag, data.data, flow_style=True)
+
+
+def generate_gitlab_full_configuration(input_file, context=None, compare_to=None):
+    """
+    Generate a full gitlab-ci configuration by resolving all includes
+    """
+    # Update loader/dumper to handle !reference tag
+    yaml.SafeLoader.add_constructor(ReferenceTag.yaml_tag, ReferenceTag.from_yaml)
+    yaml.SafeDumper.add_representer(UserList, ReferenceTag.to_yaml)
+
+    yaml_contents = []
+    read_includes(input_file, yaml_contents)
+    full_configuration = {}
+    for yaml_file in yaml_contents:
+        full_configuration.update(yaml_file)
+    # Override some variables with a dedicated context
+    if context:
+        full_configuration["variables"].update(context)
+    if compare_to:
+        for value in full_configuration.values():
+            if (
+                isinstance(value, dict)
+                and "changes" in value
+                and isinstance(value["changes"], dict)
+                and "compare_to" in value["changes"]
+            ):
+                value["changes"]["compare_to"] = compare_to
+            elif isinstance(value, list):
+                for v in value:
+                    if (
+                        isinstance(v, dict)
+                        and "changes" in v
+                        and isinstance(v["changes"], dict)
+                        and "compare_to" in v["changes"]
+                    ):
+                        v["changes"]["compare_to"] = compare_to
+    return yaml.safe_dump(full_configuration)
+
+
+def read_includes(yaml_file, includes):
+    """
+    Recursive method to read all includes from yaml files and store them in a list
+    """
+    current_file = read_content(yaml_file)
+    if 'include' not in current_file:
+        includes.append(current_file)
+    else:
+        for include in current_file['include']:
+            read_includes(include, includes)
+        del current_file['include']
+        includes.append(current_file)
+
+
+def read_content(file_path):
+    """
+    Read the content of a file, either from a local file or from an http endpoint
+    """
+    content = None
+    if file_path.startswith('http'):
+        import requests
+
+        response = requests.get(file_path)
+        response.raise_for_status()
+        content = response.text
+    else:
+        with open(file_path) as f:
+            content = f.read()
+    return yaml.safe_load(content)
+
+
+def get_preset_contexts(required_tests):
+    possible_tests = ["all", "main", "release", "mq"]
+    required_tests = required_tests.casefold().split(",")
+    if set(required_tests) | set(possible_tests) != set(possible_tests):
+        raise Exit(f"Invalid test required: {required_tests} must contain only values from {possible_tests}", 1)
+    main_contexts = [
+        ("BUCKET_BRANCH", ["nightly"]),  # ["dev", "nightly", "beta", "stable", "oldnightly"]
+        ("CI_COMMIT_BRANCH", ["main"]),  # ["main", "mq-working-branch-main", "7.42.x", "any/name"]
+        ("CI_COMMIT_TAG", [""]),  # ["", "1.2.3-rc.4", "6.6.6"]
+        ("CI_PIPELINE_SOURCE", ["pipeline"]),  # ["trigger", "pipeline", "schedule"]
+        ("DEPLOY_AGENT", ["true"]),
+        ("RUN_ALL_BUILDS", ["true"]),
+        ("RUN_E2E_TESTS", ["auto"]),
+        ("RUN_KMT_TESTS", ["on"]),
+        ("RUN_UNIT_TESTS", ["on"]),
+        ("TESTING_CLEANUP", ["true"]),
+    ]
+    release_contexts = [
+        ("BUCKET_BRANCH", ["stable"]),
+        ("CI_COMMIT_BRANCH", ["7.42.x"]),
+        ("CI_COMMIT_TAG", ["3.2.1", "1.2.3-rc.4"]),
+        ("CI_PIPELINE_SOURCE", ["schedule"]),
+        ("DEPLOY_AGENT", ["true"]),
+        ("RUN_ALL_BUILDS", ["true"]),
+        ("RUN_E2E_TESTS", ["auto"]),
+        ("RUN_KMT_TESTS", ["on"]),
+        ("RUN_UNIT_TESTS", ["on"]),
+        ("TESTING_CLEANUP", ["true"]),
+    ]
+    mq_contexts = [
+        ("BUCKET_BRANCH", ["dev"]),
+        ("CI_COMMIT_BRANCH", ["mq-working-branch-main"]),
+        ("CI_PIPELINE_SOURCE", ["pipeline"]),
+        ("DEPLOY_AGENT", ["false"]),
+        ("RUN_ALL_BUILDS", ["false"]),
+        ("RUN_E2E_TESTS", ["auto"]),
+        ("RUN_KMT_TESTS", ["off"]),
+        ("RUN_UNIT_TESTS", ["off"]),
+        ("TESTING_CLEANUP", ["false"]),
+    ]
+    all_contexts = []
+    for test in required_tests:
+        if test in ["all", "main"]:
+            generate_contexts(main_contexts, [], all_contexts)
+        if test in ["all", "release"]:
+            generate_contexts(release_contexts, [], all_contexts)
+        if test in ["all", "mq"]:
+            generate_contexts(mq_contexts, [], all_contexts)
+    return all_contexts
+
+
+def generate_contexts(contexts, context, all_contexts):
+    """
+    Recursive method to generate all possible contexts from a list of tuples
+    """
+    if len(contexts) == 0:
+        all_contexts.append(context[:])
+        return
+    for value in contexts[0][1]:
+        context.append((contexts[0][0], value))
+        generate_contexts(contexts[1:], context, all_contexts)
+        context.pop()
+
+
+def load_context(context):
+    """
+    Load a context either from a yaml file or from a json string
+    """
+    if os.path.exists(context):
+        with open(context) as f:
+            y = yaml.safe_load(f)
+        if "variables" not in y:
+            raise Exit(
+                f"Invalid context file: {context}, missing 'variables' key. Input file must be similar to tasks/unit-tests/testdata/gitlab_main_context_template.yml",
+                1,
+            )
+        return [[(k, v) for k, v in y["variables"].items()]]
+    else:
+        try:
+            j = json.loads(context)
+            return [[(k, v) for k, v in j.items()]]
+        except json.JSONDecodeError:
+            raise Exit(f"Invalid context: {context}, must be a valid json, or a path to a yaml file", 1)
diff --git a/tasks/libs/ciproviders/gitlab_api.py b/tasks/libs/ciproviders/gitlab_api.py
deleted file mode 100644
index 74136486a6cf3..0000000000000
--- a/tasks/libs/ciproviders/gitlab_api.py
+++ /dev/null
@@ -1,243 +0,0 @@
-import json
-import os
-import platform
-import subprocess
-from collections import UserList
-
-import gitlab
-import yaml
-from gitlab.v4.objects import Project
-from invoke.exceptions import Exit
-
-BASE_URL = "https://gitlab.ddbuild.io"
-
-
-def get_gitlab_token():
-    if "GITLAB_TOKEN" not in os.environ:
-        print("GITLAB_TOKEN not found in env. Trying keychain...")
-        if platform.system() == "Darwin":
-            try:
-                output = subprocess.check_output(
-                    ['security', 'find-generic-password', '-a', os.environ["USER"], '-s', 'GITLAB_TOKEN', '-w']
-                )
-                if len(output) > 0:
-                    return output.strip()
-            except subprocess.CalledProcessError:
-                print("GITLAB_TOKEN not found in keychain...")
-                pass
-        print(
-            "Please create an 'api' access token at "
-            "https://gitlab.ddbuild.io/-/profile/personal_access_tokens and "
-            "add it as GITLAB_TOKEN in your keychain "
-            "or export it from your .bashrc or equivalent."
-        )
-        raise Exit(code=1)
-    return os.environ["GITLAB_TOKEN"]
-
-
-def get_gitlab_bot_token():
-    if "GITLAB_BOT_TOKEN" not in os.environ:
-        print("GITLAB_BOT_TOKEN not found in env. Trying keychain...")
-        if platform.system() == "Darwin":
-            try:
-                output = subprocess.check_output(
-                    ['security', 'find-generic-password', '-a', os.environ["USER"], '-s', 'GITLAB_BOT_TOKEN', '-w']
-                )
-                if output:
-                    return output.strip()
-            except subprocess.CalledProcessError:
-                print("GITLAB_BOT_TOKEN not found in keychain...")
-                pass
-        print(
-            "Please make sure that the GITLAB_BOT_TOKEN is set or that " "the GITLAB_BOT_TOKEN keychain entry is set."
-        )
-        raise Exit(code=1)
-    return os.environ["GITLAB_BOT_TOKEN"]
-
-
-def get_gitlab_api(token=None) -> gitlab.Gitlab:
-    """
-    Returns the gitlab api object with the api token.
-    The token is the one of get_gitlab_token() by default.
-    """
-    token = token or get_gitlab_token()
-
-    return gitlab.Gitlab(BASE_URL, private_token=token)
-
-
-def get_gitlab_repo(repo='DataDog/datadog-agent', token=None) -> Project:
-    api = get_gitlab_api(token)
-    repo = api.projects.get(repo)
-
-    return repo
-
-
-class ReferenceTag(yaml.YAMLObject):
-    """
-    Custom yaml tag to handle references in gitlab-ci configuration
-    """
-
-    yaml_tag = u'!reference'
-
-    def __init__(self, references):
-        self.references = references
-
-    @classmethod
-    def from_yaml(cls, loader, node):
-        return UserList(loader.construct_sequence(node))
-
-    @classmethod
-    def to_yaml(cls, dumper, data):
-        return dumper.represent_sequence(cls.yaml_tag, data.data, flow_style=True)
-
-
-def generate_gitlab_full_configuration(input_file, context=None, compare_to=None):
-    """
-    Generate a full gitlab-ci configuration by resolving all includes
-    """
-    # Update loader/dumper to handle !reference tag
-    yaml.SafeLoader.add_constructor(ReferenceTag.yaml_tag, ReferenceTag.from_yaml)
-    yaml.SafeDumper.add_representer(UserList, ReferenceTag.to_yaml)
-    yaml_contents = []
-    read_includes(input_file, yaml_contents)
-    full_configuration = {}
-    for yaml_file in yaml_contents:
-        full_configuration.update(yaml_file)
-    # Override some variables with a dedicated context
-    if context:
-        full_configuration["variables"].update(context)
-    if compare_to:
-        for value in full_configuration.values():
-            if (
-                isinstance(value, dict)
-                and "changes" in value
-                and isinstance(value["changes"], dict)
-                and "compare_to" in value["changes"]
-            ):
-                value["changes"]["compare_to"] = compare_to
-            elif isinstance(value, list):
-                for v in value:
-                    if (
-                        isinstance(v, dict)
-                        and "changes" in v
-                        and isinstance(v["changes"], dict)
-                        and "compare_to" in v["changes"]
-                    ):
-                        v["changes"]["compare_to"] = compare_to
-    return yaml.safe_dump(full_configuration)
-
-
-def read_includes(yaml_file, includes):
-    """
-    Recursive method to read all includes from yaml files and store them in a list
-    """
-    current_file = read_content(yaml_file)
-    if 'include' not in current_file:
-        includes.append(current_file)
-    else:
-        for include in current_file['include']:
-            read_includes(include, includes)
-        del current_file['include']
-        includes.append(current_file)
-
-
-def read_content(file_path):
-    """
-    Read the content of a file, either from a local file or from an http endpoint
-    """
-    content = None
-    if file_path.startswith('http'):
-        import requests
-
-        response = requests.get(file_path)
-        response.raise_for_status()
-        content = response.text
-    else:
-        with open(file_path) as f:
-            content = f.read()
-    return yaml.safe_load(content)
-
-
-def get_preset_contexts(required_tests):
-    possible_tests = ["all", "main", "release", "mq"]
-    required_tests = required_tests.casefold().split(",")
-    if set(required_tests) | set(possible_tests) != set(possible_tests):
-        raise Exit(f"Invalid test required: {required_tests} must contain only values from {possible_tests}", 1)
-    main_contexts = [
-        ("BUCKET_BRANCH", ["nightly"]),  # ["dev", "nightly", "beta", "stable", "oldnightly"]
-        ("CI_COMMIT_BRANCH", ["main"]),  # ["main", "mq-working-branch-main", "7.42.x", "any/name"]
-        ("CI_COMMIT_TAG", [""]),  # ["", "1.2.3-rc.4", "6.6.6"]
-        ("CI_PIPELINE_SOURCE", ["pipeline"]),  # ["trigger", "pipeline", "schedule"]
-        ("DEPLOY_AGENT", ["true"]),
-        ("RUN_ALL_BUILDS", ["true"]),
-        ("RUN_E2E_TESTS", ["auto"]),
-        ("RUN_KMT_TESTS", ["on"]),
-        ("RUN_UNIT_TESTS", ["on"]),
-        ("TESTING_CLEANUP", ["true"]),
-    ]
-    release_contexts = [
-        ("BUCKET_BRANCH", ["stable"]),
-        ("CI_COMMIT_BRANCH", ["7.42.x"]),
-        ("CI_COMMIT_TAG", ["3.2.1", "1.2.3-rc.4"]),
-        ("CI_PIPELINE_SOURCE", ["schedule"]),
-        ("DEPLOY_AGENT", ["true"]),
-        ("RUN_ALL_BUILDS", ["true"]),
-        ("RUN_E2E_TESTS", ["auto"]),
-        ("RUN_KMT_TESTS", ["on"]),
-        ("RUN_UNIT_TESTS", ["on"]),
-        ("TESTING_CLEANUP", ["true"]),
-    ]
-    mq_contexts = [
-        ("BUCKET_BRANCH", ["dev"]),
-        ("CI_COMMIT_BRANCH", ["mq-working-branch-main"]),
-        ("CI_PIPELINE_SOURCE", ["pipeline"]),
-        ("DEPLOY_AGENT", ["false"]),
-        ("RUN_ALL_BUILDS", ["false"]),
-        ("RUN_E2E_TESTS", ["auto"]),
-        ("RUN_KMT_TESTS", ["off"]),
-        ("RUN_UNIT_TESTS", ["off"]),
-        ("TESTING_CLEANUP", ["false"]),
-    ]
-    all_contexts = []
-    for test in required_tests:
-        if test in ["all", "main"]:
-            generate_contexts(main_contexts, [], all_contexts)
-        if test in ["all", "release"]:
-            generate_contexts(release_contexts, [], all_contexts)
-        if test in ["all", "mq"]:
-            generate_contexts(mq_contexts, [], all_contexts)
-    return all_contexts
-
-
-def generate_contexts(contexts, context, all_contexts):
-    """
-    Recursive method to generate all possible contexts from a list of tuples
-    """
-    if len(contexts) == 0:
-        all_contexts.append(context[:])
-        return
-    for value in contexts[0][1]:
-        context.append((contexts[0][0], value))
-        generate_contexts(contexts[1:], context, all_contexts)
-        context.pop()
-
-
-def load_context(context):
-    """
-    Load a context either from a yaml file or from a json string
-    """
-    if os.path.exists(context):
-        with open(context) as f:
-            y = yaml.safe_load(f)
-        if "variables" not in y:
-            raise Exit(
-                f"Invalid context file: {context}, missing 'variables' key. Input file must be similar to tasks/unit-tests/testdata/gitlab_main_context_template.yml",
-                1,
-            )
-        return [[(k, v) for k, v in y["variables"].items()]]
-    else:
-        try:
-            j = json.loads(context)
-            return [[(k, v) for k, v in j.items()]]
-        except json.JSONDecodeError:
-            raise Exit(f"Invalid context: {context}, must be a valid json, or a path to a yaml file", 1)
diff --git a/tasks/libs/common/remote_api.py b/tasks/libs/common/remote_api.py
new file mode 100644
index 0000000000000..20f4008abed1f
--- /dev/null
+++ b/tasks/libs/common/remote_api.py
@@ -0,0 +1,123 @@
+import errno
+import re
+import time
+
+from invoke.exceptions import Exit
+
+errno_regex = re.compile(r".*\[Errno (\d+)\] (.*)")
+
+
+class APIError(Exception):
+    def __init__(self, request, api_name):
+        super(APIError, self).__init__(f"{api_name} says: {request.content}")
+        self.status_code = request.status_code
+        self.request = request
+
+
+class RemoteAPI(object):
+    """
+    Helper class to perform calls against a given remote API.
+    """
+
+    BASE_URL = ""
+
+    def __init__(self, api_name, sleep_time=1, retry_count=5):
+        self.api_name = api_name
+        self.authorization_error_message = "HTTP 401 Unauthorized"
+        self.requests_sleep_time = sleep_time
+        self.requests_500_retry_count = retry_count
+
+    def request(
+        self,
+        path,
+        headers=None,
+        data=None,
+        json_input=False,
+        json_output=False,
+        stream_output=False,
+        raw_output=False,
+        method=None,
+    ):
+        """
+        Utility to make a request to a remote API.
+
+        headers: A hash of headers to pass to the request.
+        data: An object containing the body of the request.
+        json_input: If set to true, data is passed with the json parameter of requests.post instead of the data parameter.
+
+        By default, the request method is GET, or POST if data is not empty.
+        method: Can be set to GET, POST, PUT or DELETE to force the REST method used.
+
+        By default, we return the text field of the response object. The following fields can alter this behavior:
+        json_output: the json field of the response object is returned.
+        stream_output: the request asks for a stream response, and the raw response object is returned.
+        raw_output: the content field of the resposne object is returned.
+        """
+        import requests
+
+        url = self.BASE_URL + path
+
+        # TODO: Use the param argument of requests instead of handling URL params
+        # manually
+        try:
+            # If json_input is true, we specifically want to send data using the json
+            # parameter of requests.post / requests.put
+            for retry_count in range(self.requests_500_retry_count):
+                if method == "PUT":
+                    if json_input:
+                        r = requests.put(url, headers=headers, json=data, stream=stream_output)
+                    else:
+                        r = requests.put(url, headers=headers, data=data, stream=stream_output)
+                elif method == "DELETE":
+                    r = requests.delete(url, headers=headers, stream=stream_output)
+                elif data or method == "POST":
+                    if json_input:
+                        r = requests.post(url, headers=headers, json=data, stream=stream_output)
+                    else:
+                        r = requests.post(url, headers=headers, data=data, stream=stream_output)
+                else:
+                    r = requests.get(url, headers=headers, stream=stream_output)
+                if r.status_code >= 400:
+                    if r.status_code == 401:
+                        print(self.authorization_error_message)
+                    elif 500 <= r.status_code < 600:
+                        sleep_time = self.requests_sleep_time + retry_count * self.requests_sleep_time
+                        if sleep_time > 0:
+                            print(
+                                f"Request failed with error {r.status_code}, retrying in {sleep_time} seconds (retry {retry_count}/{self.requests_500_retry_count}"
+                            )
+                        time.sleep(sleep_time)
+                        continue
+                    raise APIError(r, self.api_name)
+                else:
+                    break
+        except requests.exceptions.Timeout:
+            print(f"Connection to {self.api_name} ({url}) timed out.")
+            raise Exit(code=1)
+        except requests.exceptions.RequestException as e:
+            m = errno_regex.match(str(e))
+            if not m:
+                print(f"Unknown error raised connecting to {self.api_name} ({url}): {e}")
+                raise e
+
+            # Parse errno to give a better explanation
+            # Requests doesn't have granularity at the level we want:
+            # http://docs.python-requests.org/en/master/_modules/requests/exceptions/
+            errno_code = int(m.group(1))
+            message = m.group(2)
+
+            if errno_code == errno.ENOEXEC:
+                exit_msg = f"Error resolving {url}: {message}"
+            elif errno_code == errno.ECONNREFUSED:
+                exit_msg = f"Connection to {self.api_name} ({url}) refused"
+            else:
+                exit_msg = f"Error while connecting to {url}: {str(e)}"
+            raise Exit(message=exit_msg, code=1)
+
+        if json_output:
+            return r.json()
+        if raw_output:
+            return r.content
+        if stream_output:
+            return r
+        return r.text
diff --git a/tasks/libs/pipeline/data.py b/tasks/libs/pipeline/data.py
index acaf9ccdff05b..4e5b5fa1c9fe5 100644
--- a/tasks/libs/pipeline/data.py
+++ b/tasks/libs/pipeline/data.py
@@ -1,9 +1,6 @@
 import re
-from collections import defaultdict
 
-from gitlab.v4.objects import ProjectJob
-
-from tasks.libs.ciproviders.gitlab_api import get_gitlab_repo
+from tasks.libs.ciproviders.gitlab import Gitlab, get_gitlab_token
 from tasks.libs.types.types import FailedJobReason, FailedJobs, FailedJobType
 
 
@@ -11,47 +8,47 @@ def get_failed_jobs(project_name: str, pipeline_id: str) -> FailedJobs:
     """
     Retrieves the list of failed jobs for a given pipeline id in a given project.
     """
-    repo = get_gitlab_repo(project_name)
-    pipeline = repo.pipelines.get(pipeline_id)
-    jobs = pipeline.jobs.list(per_page=100, all=True)
 
-    # Get instances of failed jobs grouped by name
-    failed_jobs = defaultdict(list)
+    gitlab = Gitlab(project_name=project_name, api_token=get_gitlab_token())
+
+    # gitlab.all_jobs yields a generator, it needs to be converted to a list to be able to
+    # go through it twice
+    jobs = list(gitlab.all_jobs(pipeline_id))
+
+    # Get instances of failed jobs
+    failed_jobs = {job["name"]: [] for job in jobs if job["status"] == "failed"}
+
+    # Group jobs per name
     for job in jobs:
-        if job.status == "failed":
-            failed_jobs[job.name].append(job)
+        if job["name"] in failed_jobs:
+            failed_jobs[job["name"]].append(job)
 
     # There, we now have the following map:
     # job name -> list of jobs with that name, including at least one failed job
     processed_failed_jobs = FailedJobs()
     for job_name, jobs in failed_jobs.items():
         # We sort each list per creation date
-        jobs.sort(key=lambda x: x.created_at)
+        jobs.sort(key=lambda x: x["created_at"])
         # We truncate the job name to increase readability
         job_name = truncate_job_name(job_name)
-        job = jobs[-1]
         # Check the final job in the list: it contains the current status of the job
         # This excludes jobs that were retried and succeeded
-        trace = str(repo.jobs.get(job.id, lazy=True).trace(), 'utf-8')
-        failure_type, failure_reason = get_job_failure_context(trace)
-        final_status = ProjectJob(
-            repo.manager,
-            attrs={
-                "name": job_name,
-                "id": job.id,
-                "stage": job.stage,
-                "status": job.status,
-                "tag_list": job.tag_list,
-                "allow_failure": job.allow_failure,
-                "web_url": job.web_url,
-                "retry_summary": [ijob.status for ijob in jobs],
-                "failure_type": failure_type,
-                "failure_reason": failure_reason,
-            },
-        )
+        failure_type, failure_reason = get_job_failure_context(gitlab.job_log(jobs[-1]["id"]))
+        final_status = {
+            "name": job_name,
+            "id": jobs[-1]["id"],
+            "stage": jobs[-1]["stage"],
+            "status": jobs[-1]["status"],
+            "tag_list": jobs[-1]["tag_list"],
+            "allow_failure": jobs[-1]["allow_failure"],
+            "url": jobs[-1]["web_url"],
+            "retry_summary": [job["status"] for job in jobs],
+            "failure_type": failure_type,
+            "failure_reason": failure_reason,
+        }
 
         # Also exclude jobs allowed to fail
-        if final_status.status == "failed" and should_report_job(job_name, final_status.allow_failure):
+        if final_status["status"] == "failed" and should_report_job(job_name, final_status["allow_failure"]):
             processed_failed_jobs.add_failed_job(final_status)
 
     return processed_failed_jobs
diff --git a/tasks/libs/pipeline/notifications.py b/tasks/libs/pipeline/notifications.py
index c35282f1cea94..960eb5a283234 100644
--- a/tasks/libs/pipeline/notifications.py
+++ b/tasks/libs/pipeline/notifications.py
@@ -6,12 +6,10 @@
 from collections import defaultdict
 from typing import Dict
 
-import gitlab
 import yaml
-from gitlab.v4.objects import ProjectJob
 from invoke.context import Context
 
-from tasks.libs.ciproviders.gitlab_api import get_gitlab_repo
+from tasks.libs.ciproviders.gitlab import Gitlab, get_gitlab_token
 from tasks.libs.owners.parsing import read_owners
 from tasks.libs.types.types import FailedJobReason, FailedJobs, Test
 
@@ -53,16 +51,13 @@ def check_for_missing_owners_slack_and_jira(print_missing_teams=True, owners_fil
     return error
 
 
-def get_failed_tests(project_name, job: ProjectJob, owners_file=".github/CODEOWNERS"):
-    repo = get_gitlab_repo(project_name)
+def get_failed_tests(project_name, job, owners_file=".github/CODEOWNERS"):
+    gitlab = Gitlab(project_name=project_name, api_token=get_gitlab_token())
     owners = read_owners(owners_file)
-    try:
-        test_output = str(repo.jobs.get(job.id, lazy=True).artifact('test_output.json'), 'utf-8')
-    except gitlab.exceptions.GitlabGetError:
-        test_output = ''
+    test_output = gitlab.artifact(job["id"], "test_output.json", ignore_not_found=True)
     failed_tests = {}  # type: dict[tuple[str, str], Test]
     if test_output:
-        for line in test_output.splitlines():
+        for line in test_output.iter_lines():
             json_test = json.loads(line)
             if 'Test' in json_test:
                 name = json_test['Test']
@@ -91,11 +86,11 @@ def find_job_owners(failed_jobs: FailedJobs, owners_file: str = ".gitlab/JOBOWNE
 
     # For e2e test infrastructure errors, notify the agent-e2e-testing team
     for job in failed_jobs.mandatory_infra_job_failures:
-        if job.failure_type == FailedJobReason.E2E_INFRA_FAILURE:
+        if job["failure_type"] == FailedJobReason.E2E_INFRA_FAILURE:
             owners_to_notify["@datadog/agent-e2e-testing"].add_failed_job(job)
 
     for job in failed_jobs.all_non_infra_failures():
-        job_owners = owners.of(job.name)
+        job_owners = owners.of(job["name"])
         # job_owners is a list of tuples containing the type of owner (eg. USERNAME, TEAM) and the name of the owner
         # eg. [('TEAM', '@DataDog/agent-ci-experience')]
 
diff --git a/tasks/libs/pipeline/stats.py b/tasks/libs/pipeline/stats.py
index 8bc9e1b0f9113..46a862bfbb94e 100644
--- a/tasks/libs/pipeline/stats.py
+++ b/tasks/libs/pipeline/stats.py
@@ -31,10 +31,10 @@ def get_failed_jobs_stats(project_name, pipeline_id):
         global_failure_reason = FailedJobType.INFRA_FAILURE.name
 
     for job in failed_jobs.all_mandatory_failures():
-        failure_type = job.failure_type
-        failure_reason = job.failure_reason
+        failure_type = job["failure_type"]
+        failure_reason = job["failure_reason"]
 
-        key = tuple(sorted(job.tag_list + [f"type:{failure_type.name}", f"reason:{failure_reason.name}"]))
+        key = tuple(sorted(job["tag_list"] + [f"type:{failure_type.name}", f"reason:{failure_reason.name}"]))
         job_failure_stats[key] += 1
 
     return global_failure_reason, job_failure_stats
diff --git a/tasks/libs/pipeline/tools.py b/tasks/libs/pipeline/tools.py
index 513abfa14b85c..d026d61b5f6a6 100644
--- a/tasks/libs/pipeline/tools.py
+++ b/tasks/libs/pipeline/tools.py
@@ -3,10 +3,6 @@
 import platform
 import sys
 from time import sleep, time
-from typing import List
-
-from gitlab import GitlabError
-from gitlab.v4.objects import Project, ProjectJob, ProjectPipeline
 
 from tasks.libs.common.color import color_message
 from tasks.libs.common.user_interactions import yes_no_question
@@ -19,11 +15,11 @@ class FilteredOutException(Exception):
     pass
 
 
-def get_running_pipelines_on_same_ref(repo: Project, ref, sha=None) -> List[ProjectPipeline]:
-    pipelines = repo.pipelines.list(ref=ref, sha=sha, per_page=100, all=True)
+def get_running_pipelines_on_same_ref(gitlab, ref, sha=None):
+    pipelines = gitlab.all_pipelines_for_ref(ref, sha=sha)
 
     RUNNING_STATUSES = ["created", "pending", "running"]
-    running_pipelines = [pipeline for pipeline in pipelines if pipeline.status in RUNNING_STATUSES]
+    running_pipelines = [pipeline for pipeline in pipelines if pipeline["status"] in RUNNING_STATUSES]
 
     return running_pipelines
 
@@ -36,37 +32,37 @@ def parse_datetime(dt):
     return datetime.datetime.strptime(dt, "%Y-%m-%dT%H:%M:%S.%f%z")
 
 
-def cancel_pipelines_with_confirmation(repo: Project, pipelines: List[ProjectPipeline]):
+def cancel_pipelines_with_confirmation(gitlab, pipelines):
     for pipeline in pipelines:
-        commit = repo.commits.get(pipeline.sha)
+        commit_author, commit_short_sha, commit_title = get_commit_for_pipeline(gitlab, pipeline['id'])
 
         print(
             color_message("Pipeline", "blue"),
-            color_message(pipeline.id, "bold"),
-            color_message(f"({repo.web_url}/pipelines/{pipeline.id})", "green"),
+            color_message(pipeline['id'], "bold"),
+            color_message(f"(https://gitlab.ddbuild.io/{gitlab.project_name}/pipelines/{pipeline['id']})", "green"),
         )
 
-        pipeline_creation_date = pipeline.created_at
+        pipeline_creation_date = pipeline['created_at']
         print(
             f"{color_message('Started at', 'blue')} {parse_datetime(pipeline_creation_date).astimezone():%c} ({pipeline_creation_date})"
         )
 
         print(
             color_message("Commit:", "blue"),
-            color_message(commit.title, "green"),
-            color_message(f"({commit.short_id})", "grey"),
+            color_message(commit_title, "green"),
+            color_message(f"({commit_short_sha})", "grey"),
             color_message("by", "blue"),
-            color_message(commit.author_name, "bold"),
+            color_message(commit_author, "bold"),
         )
 
         if yes_no_question("Do you want to cancel this pipeline?", color="orange", default=True):
-            pipeline.cancel()
-            print(f"Pipeline {color_message(pipeline.id, 'bold')} has been cancelled.\n")
+            gitlab.cancel_pipeline(pipeline['id'])
+            print(f"Pipeline {color_message(pipeline['id'], 'bold')} has been cancelled.\n")
         else:
-            print(f"Pipeline {color_message(pipeline.id, 'bold')} will keep running.\n")
+            print(f"Pipeline {color_message(pipeline['id'], 'bold')} will keep running.\n")
 
 
-def gracefully_cancel_pipeline(repo: Project, pipeline: ProjectPipeline, force_cancel_stages):
+def gracefully_cancel_pipeline(gitlab, pipeline, force_cancel_stages):
     """
     Gracefully cancel pipeline
     - Cancel all the jobs that did not start to run yet
@@ -74,17 +70,17 @@ def gracefully_cancel_pipeline(repo: Project, pipeline: ProjectPipeline, force_c
     - Jobs in the stages specified in 'force_cancel_stages' variables will always be canceled even if running
     """
 
-    jobs = pipeline.jobs.list(per_page=100, all=True)
+    jobs = gitlab.all_jobs(pipeline["id"])
 
     for job in jobs:
-        if job.stage in force_cancel_stages or (
-            job.status not in ["running", "canceled"] and "cleanup" not in job.name
+        if job["stage"] in force_cancel_stages or (
+            job["status"] not in ["running", "canceled"] and "cleanup" not in job["name"]
         ):
-            repo.jobs.get(job.id, lazy=True).cancel()
+            gitlab.cancel_job(job["id"])
 
 
 def trigger_agent_pipeline(
-    repo: Project,
+    gitlab,
     ref=DEFAULT_BRANCH,
     release_version_6="nightly",
     release_version_7="nightly-a7",
@@ -94,7 +90,7 @@ def trigger_agent_pipeline(
     e2e_tests=False,
     rc_build=False,
     rc_k8s_deployments=False,
-) -> ProjectPipeline:
+):
     """
     Trigger a pipeline on the datadog-agent repositories. Multiple options are available:
     - run a pipeline with all builds (by default, a pipeline only runs a subset of all available builds),
@@ -141,40 +137,39 @@ def trigger_agent_pipeline(
             ref, "\n".join(f"  - {k}: {args[k]}" for k in args)
         )
     )
-    try:
-        variables = [{'key': key, 'value': value} for (key, value) in args.items()]
+    result = gitlab.create_pipeline(ref, args)
 
-        return repo.pipelines.create({'ref': ref, 'variables': variables})
-    except GitlabError as e:
-        if "filtered out by workflow rules" in e.error_message:
-            raise FilteredOutException
+    if result and "id" in result:
+        return result["id"]
 
-        raise RuntimeError(f"Invalid response from Gitlab API: {e}")
+    if result and "filtered out by workflow rules" in result.get("message", {}).get("base", [""])[0]:
+        raise FilteredOutException
 
+    raise RuntimeError(f"Invalid response from Gitlab: {result}")
 
-def wait_for_pipeline(
-    repo: Project, pipeline: ProjectPipeline, pipeline_finish_timeout_sec=PIPELINE_FINISH_TIMEOUT_SEC
-):
+
+def wait_for_pipeline(gitlab, pipeline_id, pipeline_finish_timeout_sec=PIPELINE_FINISH_TIMEOUT_SEC):
     """
     Follow a given pipeline, periodically checking the pipeline status
     and printing changes to the job statuses.
     """
-    commit = repo.commits.get(pipeline.sha)
+    commit_author, commit_short_sha, commit_title = get_commit_for_pipeline(gitlab, pipeline_id)
 
     print(
         color_message(
             "Commit: "
-            + color_message(commit.title, "green")
-            + color_message(f" ({commit.short_id})", "grey")
+            + color_message(commit_title, "green")
+            + color_message(f" ({commit_short_sha})", "grey")
             + " by "
-            + color_message(commit.author_name, "bold"),
+            + color_message(commit_author, "bold"),
             "blue",
         ),
         flush=True,
     )
     print(
         color_message(
-            "Pipeline Link: " + color_message(pipeline.web_url, "green"),
+            "Pipeline Link: "
+            + color_message(f"https://gitlab.ddbuild.io/{gitlab.project_name}/pipelines/{pipeline_id}", "green"),
             "blue",
         ),
         flush=True,
@@ -182,10 +177,19 @@ def wait_for_pipeline(
 
     print(color_message("Waiting for pipeline to finish. Exiting won't cancel it.", "blue"), flush=True)
 
-    f = functools.partial(pipeline_status, pipeline)
+    f = functools.partial(pipeline_status, gitlab, pipeline_id)
 
     loop_status(f, pipeline_finish_timeout_sec)
 
+    return pipeline_id
+
+
+def get_commit_for_pipeline(gitlab, pipeline_id):
+    pipeline = gitlab.pipeline(pipeline_id)
+    sha = pipeline['sha']
+    commit = gitlab.commit(sha)
+    return commit['author_name'], commit['short_id'], commit['title']
+
 
 def loop_status(callable, timeout_sec):
     """
@@ -202,49 +206,50 @@ def loop_status(callable, timeout_sec):
         sleep(10)
 
 
-def pipeline_status(pipeline: ProjectPipeline, job_status):
+def pipeline_status(gitlab, pipeline_id, job_status):
     """
     Checks the pipeline status and updates job statuses.
     """
-    jobs = pipeline.jobs.list(per_page=100, all=True)
+    jobs = gitlab.all_jobs(pipeline_id)
 
     job_status = update_job_status(jobs, job_status)
 
     # Check pipeline status
-    pipestatus = pipeline.status.lower().strip()
-    ref = pipeline.ref
+    pipeline = gitlab.pipeline(pipeline_id)
+    pipestatus = pipeline["status"].lower().strip()
+    ref = pipeline["ref"]
 
     if pipestatus == "success":
         print(
             color_message(
-                f"Pipeline {pipeline.web_url} for {ref} succeeded",
+                f"Pipeline https://gitlab.ddbuild.io/{gitlab.project_name}/pipelines/{pipeline_id} for {ref} succeeded",
                 "green",
             ),
             flush=True,
         )
-        notify("Pipeline success", f"Pipeline {pipeline.id} for {ref} succeeded.")
+        notify("Pipeline success", f"Pipeline {pipeline_id} for {ref} succeeded.")
         return True, job_status
 
     if pipestatus == "failed":
         print(
             color_message(
-                f"Pipeline {pipeline.web_url} for {ref} failed",
+                f"Pipeline https://gitlab.ddbuild.io/{gitlab.project_name}/pipelines/{pipeline_id} for {ref} failed",
                 "red",
             ),
             flush=True,
         )
-        notify("Pipeline failure", f"Pipeline {pipeline.id} for {ref} failed.")
+        notify("Pipeline failure", f"Pipeline {pipeline_id} for {ref} failed.")
         return True, job_status
 
     if pipestatus == "canceled":
         print(
             color_message(
-                f"Pipeline {pipeline.web_url} for {ref} was canceled",
+                f"Pipeline https://gitlab.ddbuild.io/{gitlab.project_name}/pipelines/{pipeline_id} for {ref} was canceled",
                 "grey",
             ),
             flush=True,
         )
-        notify("Pipeline canceled", f"Pipeline {pipeline.id} for {ref} was canceled.")
+        notify("Pipeline canceled", f"Pipeline {pipeline_id} for {ref} was canceled.")
         return True, job_status
 
     if pipestatus not in ["created", "running", "pending"]:
@@ -253,36 +258,36 @@ def pipeline_status(pipeline: ProjectPipeline, job_status):
     return False, job_status
 
 
-def update_job_status(jobs: List[ProjectJob], job_status):
+def update_job_status(jobs, job_status):
     """
     Updates job statuses and notify on changes.
     """
     notify = {}
     for job in jobs:
-        if job_status.get(job.name, None) is None:
-            job_status[job.name] = job
-            notify[job.id] = job
+        if job_status.get(job['name'], None) is None:
+            job_status[job['name']] = job
+            notify[job['id']] = job
         else:
             # There are two reasons why we want to notify:
             # - status change on job (when we refresh)
             # - another job with the same name exists (when a job is retried)
             # Check for id to see if we're in the first case.
-            old_job = job_status[job.name]
-            if job.id == old_job.id and job.status != old_job.status:
-                job_status[job.name] = job
-                notify[job.id] = job
-            if job.id != old_job.id and job.created_at > old_job.created_at:
-                job_status[job.name] = job
+            old_job = job_status[job['name']]
+            if job['id'] == old_job['id'] and job['status'] != old_job['status']:
+                job_status[job['name']] = job
+                notify[job['id']] = job
+            if job['id'] != old_job['id'] and job['created_at'] > old_job['created_at']:
+                job_status[job['name']] = job
                 # Check if old job already in notification list, to append retry message
-                notify_old_job = notify.get(old_job.id, None)
+                notify_old_job = notify.get(old_job['id'], None)
                 if notify_old_job is not None:
-                    notify_old_job.retried_old = True  # Add message to say the job got retried
-                    notify_old_job.retried_created_at = job.created_at
-                    notify[old_job.id] = notify_old_job
+                    notify_old_job['retried_old'] = True  # Add message to say the job got retried
+                    notify_old_job['retried_created_at'] = job['created_at']
+                    notify[old_job['id']] = notify_old_job
                 # If not (eg. previous job was notified in last refresh), add retry message to new job
                 else:
-                    job.retried_new = True
-                notify[job.id] = job
+                    job['retried_new'] = True
+                notify[job['id']] = job
 
     for job in notify.values():
         print_job_status(job)
@@ -307,49 +312,53 @@ def print_job(name, stage, color, date, duration, status, link):
     def print_retry(name, date):
         print(color_message(f"[{date}] Job {name} was retried", "grey"))
 
-    duration = job.duration
-    date = job.finished_at  # Date that is printed in the console log. In most cases, it's when the job finished.
+    name = job['name']
+    stage = job['stage']
+    allow_failure = job['allow_failure']
+    duration = job['duration']
+    date = job['finished_at']  # Date that is printed in the console log. In most cases, it's when the job finished.
+    status = job['status']  # Gitlab job status
     job_status = None  # Status string printed in the console
     link = ''  # Link to the pipeline. Only filled for failing jobs, to be able to quickly go to the failing job.
     color = 'grey'  # Log output color
 
     # A None duration is set by Gitlab when the job gets canceled before it was started.
     # In that case, set a duration of 0s.
-    if job.duration is None:
+    if duration is None:
         duration = 0
 
-    if job.status == 'success':
+    if status == 'success':
         job_status = 'succeeded'
         color = 'green'
-    elif job.status == 'failed':
-        if job.allow_failure:
+    elif status == 'failed':
+        if allow_failure:
             job_status = 'failed (allowed to fail)'
             color = 'orange'
         else:
             job_status = 'failed'
             color = 'red'
-            link = f"Link: {job.web_url}"
+            link = f"Link: {job['web_url']}"
             # Only notify on real (not retried) failures
             # Best-effort, as there can be situations where the retried
             # job didn't get created yet
-            if getattr(job, 'retried_old', None) is None:
-                notify("Job failure", f"Job {job.name} failed.")
-    elif job.status == 'canceled':
+            if job.get('retried_old', None) is None:
+                notify("Job failure", f"Job {name} failed.")
+    elif status == 'canceled':
         job_status = 'was canceled'
         color = 'grey'
-    elif job.status == 'running':
+    elif status == 'running':
         job_status = 'started running'
-        date = job.started_at
+        date = job['started_at']
         color = 'blue'
     else:
         return
 
     # Some logic to print the retry message in the correct order (before the new job or after the old job)
-    if getattr(job, 'retried_new', None) is not None:
-        print_retry(job.name, job.created_at)
-    print_job(job.name, job.stage, color, date, duration, job_status, link)
-    if getattr(job, 'retried_old', None) is not None:
-        print_retry(job.name, job.retried_created_at)
+    if job.get('retried_new', None) is not None:
+        print_retry(name, job['created_at'])
+    print_job(name, stage, color, date, duration, job_status, link)
+    if job.get('retried_old', None) is not None:
+        print_retry(name, job['retried_created_at'])
 
 
 def notify(title, info_text, sound=True):
diff --git a/tasks/libs/types/types.py b/tasks/libs/types/types.py
index 10bc2eaeea10e..3c0d4c103e565 100644
--- a/tasks/libs/types/types.py
+++ b/tasks/libs/types/types.py
@@ -3,8 +3,6 @@
 from collections import defaultdict
 from enum import Enum
 
-from gitlab.v4.objects import ProjectJob
-
 
 class Test:
     PACKAGE_PREFIX = "github.com/DataDog/datadog-agent/"
@@ -62,12 +60,12 @@ def __init__(self):
         self.mandatory_infra_job_failures = []
         self.optional_infra_job_failures = []
 
-    def add_failed_job(self, job: ProjectJob):
-        if job.failure_type == FailedJobType.INFRA_FAILURE and job.allow_failure:
+    def add_failed_job(self, job):
+        if job["failure_type"] == FailedJobType.INFRA_FAILURE and job["allow_failure"]:
             self.optional_infra_job_failures.append(job)
-        elif job.failure_type == FailedJobType.INFRA_FAILURE and not job.allow_failure:
+        elif job["failure_type"] == FailedJobType.INFRA_FAILURE and not job["allow_failure"]:
             self.mandatory_infra_job_failures.append(job)
-        elif job.allow_failure:
+        elif job["allow_failure"]:
             self.optional_job_failures.append(job)
         else:
             self.mandatory_job_failures.append(job)
@@ -113,13 +111,13 @@ def __render_jobs_section(self, header: str, jobs: list, buffer: io.StringIO):
 
         jobs_per_stage = defaultdict(list)
         for job in jobs:
-            jobs_per_stage[job.stage].append(job)
+            jobs_per_stage[job["stage"]].append(job)
 
         for stage, jobs in jobs_per_stage.items():
             jobs_info = []
             for job in jobs:
-                num_retries = len(job.retry_summary) - 1
-                job_info = f"<{job.web_url}|{job.name}>"
+                num_retries = len(job["retry_summary"]) - 1
+                job_info = f"<{job['url']}|{job['name']}>"
                 if num_retries > 0:
                     job_info += f" ({num_retries} retries)"
 
@@ -133,7 +131,7 @@ def __render_jobs_section(self, header: str, jobs: list, buffer: io.StringIO):
     def __render_tests_section(self, buffer):
         print(self.TEST_SECTION_HEADER, file=buffer)
         for (test_name, test_package), jobs in self.failed_tests.items():
-            job_list = ", ".join(f"<{job.web_url}|{job.name}>" for job in jobs[: self.MAX_JOBS_PER_TEST])
+            job_list = ", ".join(f"<{job['url']}|{job['name']}>" for job in jobs[: self.MAX_JOBS_PER_TEST])
             if len(jobs) > self.MAX_JOBS_PER_TEST:
                 job_list += f" and {len(jobs) - self.MAX_JOBS_PER_TEST} more"
             print(f"- `{test_name}` from package `{test_package}` (in {job_list})", file=buffer)
diff --git a/tasks/linter.py b/tasks/linter.py
index d79cf02a7aea7..c6906ffaceeb1 100644
--- a/tasks/linter.py
+++ b/tasks/linter.py
@@ -8,9 +8,10 @@
 from tasks.build_tags import compute_build_tags_for_flavor
 from tasks.flavor import AgentFlavor
 from tasks.go import run_golangci_lint
-from tasks.libs.ciproviders.gitlab_api import (
+from tasks.libs.ciproviders.gitlab import (
+    Gitlab,
     generate_gitlab_full_configuration,
-    get_gitlab_repo,
+    get_gitlab_token,
     get_preset_contexts,
     load_context,
 )
@@ -380,15 +381,15 @@ def gitlab_ci(_, test="all", custom_context=None):
     else:
         all_contexts = get_preset_contexts(test)
     print(f"We will tests {len(all_contexts)} contexts.")
-    agent = get_gitlab_repo()
     for context in all_contexts:
         print("Test gitlab configuration with context: ", context)
         config = generate_gitlab_full_configuration(".gitlab-ci.yml", dict(context))
-        res = agent.ci_lint.create({"content": config})
-        status = color_message("valid", "green") if res.valid else color_message("invalid", "red")
+        gitlab = Gitlab(api_token=get_gitlab_token())
+        res = gitlab.lint(config)
+        status = color_message("valid", "green") if res["valid"] else color_message("invalid", "red")
         print(f"Config is {status}")
-        if len(res.warnings) > 0:
-            print(color_message(f"Warnings: {res.warnings}", "orange"), file=sys.stderr)
-        if not res.valid:
-            print(color_message(f"Errors: {res.errors}", "red"), file=sys.stderr)
+        if len(res["warnings"]) > 0:
+            print(color_message(f"Warnings: {res['warnings']}", "orange"), file=sys.stderr)
+        if not res["valid"]:
+            print(color_message(f"Errors: {res['errors']}", "red"), file=sys.stderr)
             raise Exit(code=1)
diff --git a/tasks/notify.py b/tasks/notify.py
index a5c8da6f26ce8..037f16be047a6 100644
--- a/tasks/notify.py
+++ b/tasks/notify.py
@@ -291,7 +291,7 @@ def update_statistics(job_executions):
     # Update statistics and collect consecutive failed jobs
     alert_jobs = {"consecutive": [], "cumulative": []}
     failed_jobs = get_failed_jobs(PROJECT_NAME, os.getenv("CI_PIPELINE_ID"))
-    failed_set = {job.name for job in failed_jobs.all_failures()}
+    failed_set = {job["name"] for job in failed_jobs.all_failures()}
     current_set = set(job_executions["jobs"].keys())
     # Insert data for newly failing jobs
     new_failed_jobs = failed_set - current_set
diff --git a/tasks/pipeline.py b/tasks/pipeline.py
index 260d860f66f35..9a9478e7f9e8e 100644
--- a/tasks/pipeline.py
+++ b/tasks/pipeline.py
@@ -6,13 +6,11 @@
 from datetime import datetime, timedelta, timezone
 
 import yaml
-from gitlab import GitlabError
-from gitlab.v4.objects import Project
 from invoke import task
 from invoke.exceptions import Exit
 
 from tasks.libs.ciproviders.github_api import GithubAPI
-from tasks.libs.ciproviders.gitlab_api import get_gitlab_bot_token, get_gitlab_repo
+from tasks.libs.ciproviders.gitlab import Gitlab, get_gitlab_bot_token, get_gitlab_token
 from tasks.libs.common.color import color_message
 from tasks.libs.common.utils import (
     DEFAULT_BRANCH,
@@ -56,7 +54,7 @@ def GitlabYamlLoader():
 # Tasks to trigger pipelines
 
 
-def check_deploy_pipeline(repo: Project, git_ref, release_version_6, release_version_7, repo_branch):
+def check_deploy_pipeline(gitlab, git_ref, release_version_6, release_version_7, repo_branch):
     """
     Run checks to verify a deploy pipeline is valid:
     - it targets a valid repo branch
@@ -83,9 +81,9 @@ def check_deploy_pipeline(repo: Project, git_ref, release_version_6, release_ver
     if release_version_6 and match:
         # release_version_6 is not empty and git_ref matches v7 pattern, construct v6 tag and check.
         tag_name = "6." + "".join(match.groups())
-        try:
-            repo.tags.get(tag_name)
-        except GitlabError:
+        gitlab_tag = gitlab.find_tag(tag_name)
+
+        if ("name" not in gitlab_tag) or gitlab_tag["name"] != tag_name:
             print(f"Cannot find GitLab v6 tag {tag_name} while trying to build git ref {git_ref}")
             raise Exit(code=1)
 
@@ -96,9 +94,9 @@ def check_deploy_pipeline(repo: Project, git_ref, release_version_6, release_ver
         if release_version_7 and match:
             # release_version_7 is not empty and git_ref matches v6 pattern, construct v7 tag and check.
             tag_name = "7." + "".join(match.groups())
-            try:
-                repo.tags.get(tag_name)
-            except GitlabError:
+            gitlab_tag = gitlab.find_tag(tag_name)
+
+            if ("name" not in gitlab_tag) or gitlab_tag["name"] != tag_name:
                 print(f"Cannot find GitLab v7 tag {tag_name} while trying to build git ref {git_ref}")
                 raise Exit(code=1)
 
@@ -112,7 +110,8 @@ def clean_running_pipelines(ctx, git_ref=DEFAULT_BRANCH, here=False, use_latest_
     should be cancelled.
     """
 
-    agent = get_gitlab_repo()
+    gitlab = Gitlab(api_token=get_gitlab_token())
+    gitlab.test_project_found()
 
     if here:
         git_ref = ctx.run("git rev-parse --abbrev-ref HEAD", hide=True).stdout.strip()
@@ -125,14 +124,14 @@ def clean_running_pipelines(ctx, git_ref=DEFAULT_BRANCH, here=False, use_latest_
     elif not sha:
         print(f"Git sha not provided, fetching all running pipelines on {git_ref}")
 
-    pipelines = get_running_pipelines_on_same_ref(agent, git_ref, sha)
+    pipelines = get_running_pipelines_on_same_ref(gitlab, git_ref, sha)
 
     print(
         f"Found {len(pipelines)} running pipeline(s) matching the request.",
         "They are ordered from the newest one to the oldest one.\n",
         sep='\n',
     )
-    cancel_pipelines_with_confirmation(agent, pipelines)
+    cancel_pipelines_with_confirmation(gitlab, pipelines)
 
 
 def workflow_rules(gitlab_file=".gitlab-ci.yml"):
@@ -176,33 +175,37 @@ def auto_cancel_previous_pipelines(ctx):
     if not os.environ.get('GITLAB_TOKEN'):
         raise Exit("GITLAB_TOKEN variable needed to cancel pipelines on the same ref.", 1)
 
+    gitlab = Gitlab(api_token=get_gitlab_token())
+    gitlab.test_project_found()
+
     git_ref = os.getenv("CI_COMMIT_REF_NAME")
     git_sha = os.getenv("CI_COMMIT_SHA")
 
-    repo = get_gitlab_repo()
-    pipelines = get_running_pipelines_on_same_ref(repo, git_ref)
-    pipelines_without_current = [p for p in pipelines if p.sha != git_sha]
+    pipelines = get_running_pipelines_on_same_ref(gitlab, git_ref)
+    pipelines_without_current = [p for p in pipelines if p["sha"] != git_sha]
 
     for pipeline in pipelines_without_current:
         # We cancel pipeline only if it correspond to a commit that is an ancestor of the current commit
-        is_ancestor = ctx.run(f'git merge-base --is-ancestor {pipeline.sha} {git_sha}', warn=True, hide="both")
+        is_ancestor = ctx.run(f'git merge-base --is-ancestor {pipeline["sha"]} {git_sha}', warn=True, hide="both")
         if is_ancestor.exited == 0:
-            print(f'Gracefully canceling jobs that are not canceled on pipeline {pipeline.id} ({pipeline.web_url})')
-            gracefully_cancel_pipeline(repo, pipeline, force_cancel_stages=["package_build"])
+            print(
+                f'Gracefully canceling jobs that are not canceled on pipeline {pipeline["id"]} ({pipeline["web_url"]})'
+            )
+            gracefully_cancel_pipeline(gitlab, pipeline, force_cancel_stages=["package_build"])
         elif is_ancestor.exited == 1:
-            print(f'{pipeline.sha} is not an ancestor of {git_sha}, not cancelling pipeline {pipeline.id}')
+            print(f'{pipeline["sha"]} is not an ancestor of {git_sha}, not cancelling pipeline {pipeline["id"]}')
         elif is_ancestor.exited == 128:
             min_time_before_cancel = 5
             print(
-                f'Could not determine if {pipeline.sha} is an ancestor of {git_sha}, probably because it has been deleted from the history because of force push'
+                f'Could not determine if {pipeline["sha"]} is an ancestor of {git_sha}, probably because it has been deleted from the history because of force push'
             )
-            if datetime.strptime(pipeline.created_at, "%Y-%m-%dT%H:%M:%S.%fZ") < datetime.now() - timedelta(
+            if datetime.strptime(pipeline["created_at"], "%Y-%m-%dT%H:%M:%S.%fZ") < datetime.now() - timedelta(
                 minutes=min_time_before_cancel
             ):
                 print(
-                    f'Pipeline started earlier than {min_time_before_cancel} minutes ago, gracefully canceling pipeline {pipeline.id}'
+                    f'Pipeline started earlier than {min_time_before_cancel} minutes ago, gracefully canceling pipeline {pipeline["id"]}'
                 )
-                gracefully_cancel_pipeline(repo, pipeline, force_cancel_stages=["package_build"])
+                gracefully_cancel_pipeline(gitlab, pipeline, force_cancel_stages=["package_build"])
         else:
             print(is_ancestor.stderr)
             raise Exit(1)
@@ -263,7 +266,8 @@ def run(
       inv pipeline.run --deploy --use-release-entries --major-versions "6,7" --git-ref "7.32.0" --repo-branch "stable"
     """
 
-    repo = get_gitlab_repo()
+    gitlab = Gitlab(api_token=get_gitlab_token())
+    gitlab.test_project_found()
 
     if (not git_ref and not here) or (git_ref and here):
         raise Exit("ERROR: Exactly one of --here or --git-ref <git ref> must be specified.", code=1)
@@ -286,7 +290,7 @@ def run(
 
     if deploy:
         # Check the validity of the deploy pipeline
-        check_deploy_pipeline(repo, git_ref, release_version_6, release_version_7, repo_branch)
+        check_deploy_pipeline(gitlab, git_ref, release_version_6, release_version_7, repo_branch)
         # Force all builds and kitchen tests to be run
         if not all_builds:
             print(
@@ -305,7 +309,7 @@ def run(
             )
             e2e_tests = True
 
-    pipelines = get_running_pipelines_on_same_ref(repo, git_ref)
+    pipelines = get_running_pipelines_on_same_ref(gitlab, git_ref)
 
     if pipelines:
         print(
@@ -315,11 +319,11 @@ def run(
             "They are ordered from the newest one to the oldest one.\n",
             sep='\n',
         )
-        cancel_pipelines_with_confirmation(repo, pipelines)
+        cancel_pipelines_with_confirmation(gitlab, pipelines)
 
     try:
-        pipeline = trigger_agent_pipeline(
-            repo,
+        pipeline_id = trigger_agent_pipeline(
+            gitlab,
             git_ref,
             release_version_6,
             release_version_7,
@@ -334,7 +338,7 @@ def run(
         print(color_message(f"ERROR: pipeline does not match any workflow rule. Rules:\n{workflow_rules()}", "red"))
         return
 
-    wait_for_pipeline(repo, pipeline)
+    wait_for_pipeline(gitlab, pipeline_id)
 
 
 @task
@@ -352,7 +356,8 @@ def follow(ctx, id=None, git_ref=None, here=False, project_name="DataDog/datadog
     inv pipeline.follow --id 1234567
     """
 
-    repo = get_gitlab_repo(project_name)
+    gitlab = Gitlab(project_name=project_name, api_token=get_gitlab_token())
+    gitlab.test_project_found()
 
     args_given = 0
     if id is not None:
@@ -368,25 +373,22 @@ def follow(ctx, id=None, git_ref=None, here=False, project_name="DataDog/datadog
         )
 
     if id is not None:
-        pipeline = repo.pipelines.get(id)
-        wait_for_pipeline(repo, pipeline)
+        wait_for_pipeline(gitlab, id)
     elif git_ref is not None:
-        wait_for_pipeline_from_ref(repo, git_ref)
+        wait_for_pipeline_from_ref(gitlab, git_ref)
     elif here:
         git_ref = ctx.run("git rev-parse --abbrev-ref HEAD", hide=True).stdout.strip()
-        wait_for_pipeline_from_ref(repo, git_ref)
+        wait_for_pipeline_from_ref(gitlab, git_ref)
 
 
-def wait_for_pipeline_from_ref(repo: Project, ref):
-    # Get last updated pipeline
-    pipelines = repo.pipelines.list(ref=ref, per_page=1, order_by='updated_at')
-    if len(pipelines) == 0:
+def wait_for_pipeline_from_ref(gitlab, ref):
+    pipeline = gitlab.last_pipeline_for_ref(ref)
+    if pipeline is not None:
+        wait_for_pipeline(gitlab, pipeline['id'])
+    else:
         print(f"No pipelines found for {ref}")
         raise Exit(code=1)
 
-    pipeline = pipelines[0]
-    wait_for_pipeline(repo, pipeline)
-
 
 @task(iterable=['variable'])
 def trigger_child_pipeline(_, git_ref, project_name, variable=None, follow=True):
@@ -400,9 +402,9 @@ def trigger_child_pipeline(_, git_ref, project_name, variable=None, follow=True)
     Use --follow to make this task wait for the pipeline to finish, and return 1 if it fails. (requires GITLAB_TOKEN).
 
     Examples:
-    inv pipeline.trigger-child-pipeline --git-ref "main" --project-name "DataDog/agent-release-management" --variable "RELEASE_VERSION"
+    inv pipeline.trigger-child-pipeline --git-ref "master" --project-name "DataDog/agent-release-management" --variables "RELEASE_VERSION"
 
-    inv pipeline.trigger-child-pipeline --git-ref "main" --project-name "DataDog/agent-release-management" --variable "VAR1" --variable "VAR2" --variable "VAR3"
+    inv pipeline.trigger-child-pipeline --git-ref "master" --project-name "DataDog/agent-release-management" --variables "VAR1,VAR2,VAR3"
     """
 
     if not os.environ.get('CI_JOB_TOKEN'):
@@ -416,7 +418,7 @@ def trigger_child_pipeline(_, git_ref, project_name, variable=None, follow=True)
             # set, but trigger_pipeline doesn't use it
             os.environ["GITLAB_TOKEN"] = os.environ['CI_JOB_TOKEN']
 
-    repo = get_gitlab_repo(project_name)
+    gitlab = Gitlab(project_name=project_name, api_token=get_gitlab_token())
 
     data = {"token": os.environ['CI_JOB_TOKEN'], "ref": git_ref, "variables": {}}
 
@@ -441,22 +443,23 @@ def trigger_child_pipeline(_, git_ref, project_name, variable=None, follow=True)
         flush=True,
     )
 
-    try:
-        data['variables'] = [{'key': key, 'value': value} for (key, value) in data['variables'].items()]
+    res = gitlab.trigger_pipeline(data)
 
-        pipeline = repo.pipelines.create(data)
-    except GitlabError as e:
-        raise Exit(f"Failed to create child pipeline: {e}", code=1)
+    if 'id' not in res:
+        raise Exit(f"Failed to create child pipeline: {res}", code=1)
 
-    print(f"Created a child pipeline with id={pipeline.id}, url={pipeline.web_url}", flush=True)
+    pipeline_id = res['id']
+    pipeline_url = res['web_url']
+    print(f"Created a child pipeline with id={pipeline_id}, url={pipeline_url}", flush=True)
 
     if follow:
         print("Waiting for child pipeline to finish...", flush=True)
 
-        wait_for_pipeline(repo, pipeline)
+        wait_for_pipeline(gitlab, pipeline_id)
 
         # Check pipeline status
-        pipestatus = pipeline.status.lower().strip()
+        pipeline = gitlab.pipeline(pipeline_id)
+        pipestatus = pipeline["status"].lower().strip()
 
         if pipestatus != "success":
             raise Exit(f"Error: child pipeline status {pipestatus.title()}", code=1)
@@ -579,16 +582,21 @@ def changelog(ctx, new_commit_sha):
     )
 
 
+def _init_pipeline_schedule_task():
+    gitlab = Gitlab(api_token=get_gitlab_bot_token())
+    gitlab.test_project_found()
+    return gitlab
+
+
 @task
 def get_schedules(_):
     """
     Pretty-print all pipeline schedules on the repository.
     """
 
-    repo = get_gitlab_repo(token=get_gitlab_bot_token())
-
-    for sched in repo.pipelineschedules.list(per_page=100, all=True):
-        sched.pprint()
+    gitlab = _init_pipeline_schedule_task()
+    for ps in gitlab.all_pipeline_schedules():
+        pprint.pprint(ps)
 
 
 @task
@@ -597,11 +605,9 @@ def get_schedule(_, schedule_id):
     Pretty-print a single pipeline schedule on the repository.
     """
 
-    repo = get_gitlab_repo(token=get_gitlab_bot_token())
-
-    sched = repo.pipelineschedules.get(schedule_id)
-
-    sched.pprint()
+    gitlab = _init_pipeline_schedule_task()
+    result = gitlab.pipeline_schedule(schedule_id)
+    pprint.pprint(result)
 
 
 @task
@@ -612,13 +618,9 @@ def create_schedule(_, description, ref, cron, cron_timezone=None, active=False)
     Note that unless you explicitly specify the --active flag, the schedule will be created as inactive.
     """
 
-    repo = get_gitlab_repo(token=get_gitlab_bot_token())
-
-    sched = repo.pipelineschedules.create(
-        {'description': description, 'ref': ref, 'cron': cron, 'cron_timezone': cron_timezone, 'active': active}
-    )
-
-    sched.pprint()
+    gitlab = _init_pipeline_schedule_task()
+    result = gitlab.create_pipeline_schedule(description, ref, cron, cron_timezone, active)
+    pprint.pprint(result)
 
 
 @task
@@ -627,14 +629,9 @@ def edit_schedule(_, schedule_id, description=None, ref=None, cron=None, cron_ti
     Edit an existing pipeline schedule on the repository.
     """
 
-    repo = get_gitlab_repo(token=get_gitlab_bot_token())
-
-    data = {'description': description, 'ref': ref, 'cron': cron, 'cron_timezone': cron_timezone}
-    data = {key: value for (key, value) in data.items() if value is not None}
-
-    sched = repo.pipelineschedules.update(schedule_id, data)
-
-    pprint.pprint(sched)
+    gitlab = _init_pipeline_schedule_task()
+    result = gitlab.edit_pipeline_schedule(schedule_id, description, ref, cron, cron_timezone)
+    pprint.pprint(result)
 
 
 @task
@@ -643,11 +640,9 @@ def activate_schedule(_, schedule_id):
     Activate an existing pipeline schedule on the repository.
     """
 
-    repo = get_gitlab_repo(token=get_gitlab_bot_token())
-
-    sched = repo.pipelineschedules.update(schedule_id, {'active': True})
-
-    sched.pprint()
+    gitlab = _init_pipeline_schedule_task()
+    result = gitlab.edit_pipeline_schedule(schedule_id, active=True)
+    pprint.pprint(result)
 
 
 @task
@@ -656,11 +651,9 @@ def deactivate_schedule(_, schedule_id):
     Deactivate an existing pipeline schedule on the repository.
     """
 
-    repo = get_gitlab_repo(token=get_gitlab_bot_token())
-
-    sched = repo.pipelineschedules.update(schedule_id, {'active': False})
-
-    sched.pprint()
+    gitlab = _init_pipeline_schedule_task()
+    result = gitlab.edit_pipeline_schedule(schedule_id, active=False)
+    pprint.pprint(result)
 
 
 @task
@@ -669,11 +662,9 @@ def delete_schedule(_, schedule_id):
     Delete an existing pipeline schedule on the repository.
     """
 
-    repo = get_gitlab_repo(token=get_gitlab_bot_token())
-
-    repo.pipelineschedules.delete(schedule_id)
-
-    print('Deleted schedule', schedule_id)
+    gitlab = _init_pipeline_schedule_task()
+    result = gitlab.delete_pipeline_schedule(schedule_id)
+    pprint.pprint(result)
 
 
 @task
@@ -682,12 +673,9 @@ def create_schedule_variable(_, schedule_id, key, value):
     Create a variable for an existing schedule on the repository.
     """
 
-    repo = get_gitlab_repo(token=get_gitlab_bot_token())
-
-    sched = repo.pipelineschedules.get(schedule_id)
-    sched.variables.create({'key': key, 'value': value})
-
-    sched.pprint()
+    gitlab = _init_pipeline_schedule_task()
+    result = gitlab.create_pipeline_schedule_variable(schedule_id, key, value)
+    pprint.pprint(result)
 
 
 @task
@@ -696,12 +684,9 @@ def edit_schedule_variable(_, schedule_id, key, value):
     Edit an existing variable for a schedule on the repository.
     """
 
-    repo = get_gitlab_repo(token=get_gitlab_bot_token())
-
-    sched = repo.pipelineschedules.get(schedule_id)
-    sched.variables.update(key, {'value': value})
-
-    sched.pprint()
+    gitlab = _init_pipeline_schedule_task()
+    result = gitlab.edit_pipeline_schedule_variable(schedule_id, key, value)
+    pprint.pprint(result)
 
 
 @task
@@ -710,12 +695,9 @@ def delete_schedule_variable(_, schedule_id, key):
     Delete an existing variable for a schedule on the repository.
     """
 
-    repo = get_gitlab_repo(token=get_gitlab_bot_token())
-
-    sched = repo.pipelineschedules.get(schedule_id)
-    sched.variables.delete(key)
-
-    sched.pprint()
+    gitlab = _init_pipeline_schedule_task()
+    result = gitlab.delete_pipeline_schedule_variable(schedule_id, key)
+    pprint.pprint(result)
 
 
 @task(
@@ -926,28 +908,28 @@ def test_merge_queue(ctx):
     pr.create_issue_comment("/merge")
     # Search for the generated pipeline
     print(f"PR {pr.html_url} is waiting for MQ pipeline generation")
-    agent = get_gitlab_repo()
+    gitlab = Gitlab(api_token=get_gitlab_token())
     max_attempts = 5
     for attempt in range(max_attempts):
         time.sleep(30)
-        pipelines = agent.pipelines.list(per_page=100)
+        pipelines = gitlab.last_pipelines()
         try:
-            pipeline = next(p for p in pipelines if p.ref.startswith(f"mq-working-branch-{test_main}"))
-            print(f"Pipeline found: {pipeline.web_url}")
+            pipeline = next(p for p in pipelines if p["ref"].startswith(f"mq-working-branch-{test_main}"))
+            print(f"Pipeline found: {pipeline['web_url']}")
             break
         except StopIteration:
             if attempt == max_attempts - 1:
                 raise RuntimeError("No pipeline found for the merge queue")
             continue
-    success = pipeline.status == "running"
+    success = pipeline["status"] == "running"
     if success:
         print("Pipeline correctly created, congrats")
     else:
-        print(f"[ERROR] Impossible to generate a pipeline for the merge queue, please check {pipeline.web_url}")
+        print(f"[ERROR] Impossible to generate a pipeline for the merge queue, please check {pipeline['web_url']}")
     # Clean up
     print("Cleaning up")
     if success:
-        pipeline.cancel()
+        gitlab.cancel_pipeline(pipeline["id"])
     pr.edit(state="closed")
     ctx.run(f"git checkout {current_branch}", hide=True)
     ctx.run(f"git branch -D {test_main}", hide=True)
diff --git a/tasks/release.py b/tasks/release.py
index 27eb8813640da..9b882ccabdbb0 100644
--- a/tasks/release.py
+++ b/tasks/release.py
@@ -11,12 +11,11 @@
 from datetime import date
 from time import sleep
 
-from gitlab import GitlabError
 from invoke import Failure, task
 from invoke.exceptions import Exit
 
 from tasks.libs.ciproviders.github_api import GithubAPI
-from tasks.libs.ciproviders.gitlab_api import get_gitlab_repo
+from tasks.libs.ciproviders.gitlab import Gitlab, get_gitlab_token
 from tasks.libs.common.color import color_message
 from tasks.libs.common.user_interactions import yes_no_question
 from tasks.libs.common.utils import (
@@ -1333,7 +1332,7 @@ def build_rc(ctx, major_versions="6,7", patch_version=False, k8s_deployments=Fal
     if sys.version_info[0] < 3:
         return Exit(message="Must use Python 3 for this task", code=1)
 
-    datadog_agent = get_gitlab_repo()
+    gitlab = Gitlab(project_name=GITHUB_REPO_NAME, api_token=get_gitlab_token())
     list_major_versions = parse_major_versions(major_versions)
 
     # Get the version of the highest major: needed for tag_version and to know
@@ -1382,11 +1381,7 @@ def build_rc(ctx, major_versions="6,7", patch_version=False, k8s_deployments=Fal
     print(color_message(f"Waiting until the {new_version} tag appears in Gitlab", "bold"))
     gitlab_tag = None
     while not gitlab_tag:
-        try:
-            gitlab_tag = datadog_agent.tags.get(str(new_version))
-        except GitlabError:
-            continue
-
+        gitlab_tag = gitlab.find_tag(str(new_version)).get("name", None)
         sleep(5)
 
     print(color_message("Creating RC pipeline", "bold"))
diff --git a/tasks/unit-tests/gitlab_api_tests.py b/tasks/unit-tests/gitlab_api_tests.py
index 24399f816c8bd..ad618b0d380ff 100644
--- a/tasks/unit-tests/gitlab_api_tests.py
+++ b/tasks/unit-tests/gitlab_api_tests.py
@@ -1,6 +1,97 @@
 import unittest
+from itertools import cycle
+from unittest import mock
 
-from tasks.libs.ciproviders.gitlab_api import generate_gitlab_full_configuration, read_includes
+from invoke.exceptions import Exit
+
+from tasks.libs.ciproviders.gitlab import Gitlab, generate_gitlab_full_configuration, get_gitlab_token, read_includes
+from tasks.libs.common.remote_api import APIError
+
+
+class MockResponse:
+    def __init__(self, content, status_code):
+        self.content = content
+        self.status_code = status_code
+
+    def json(self):
+        return self.content
+
+
+#################### FAIL REQUEST  #####################
+
+
+def fail_not_found_request(*_args, **_kwargs):
+    return MockResponse([], 404)
+
+
+##################### MOCKED GITLAB #####################
+
+
+def mocked_502_gitlab_requests(*_args, **_kwargs):
+    return MockResponse(
+        "<html>\r\n<head><title>502 Bad Gateway</title></head>\r\n<body>\r\n<center><h1>502 Bad Gateway</h1></center>\r\n</body>\r\n</html>\r\n",
+        502,
+    )
+
+
+def mocked_gitlab_project_request(*_args, **_kwargs):
+    return MockResponse("name", 200)
+
+
+class SideEffect:
+    def __init__(self, *fargs):
+        self.functions = cycle(fargs)
+
+    def __call__(self, *args, **kwargs):
+        func = next(self.functions)
+        return func(*args, **kwargs)
+
+
+class TestStatusCode5XX(unittest.TestCase):
+    @mock.patch('requests.get', side_effect=SideEffect(mocked_502_gitlab_requests, mocked_gitlab_project_request))
+    def test_gitlab_one_fail_one_success(self, _):
+        gitlab = Gitlab(api_token=get_gitlab_token())
+        gitlab.requests_sleep_time = 0
+        gitlab.test_project_found()
+
+    @mock.patch(
+        'requests.get',
+        side_effect=SideEffect(
+            mocked_502_gitlab_requests,
+            mocked_502_gitlab_requests,
+            mocked_502_gitlab_requests,
+            mocked_502_gitlab_requests,
+            mocked_gitlab_project_request,
+        ),
+    )
+    def test_gitlab_last_one_success(self, _):
+        gitlab = Gitlab(api_token=get_gitlab_token())
+        gitlab.requests_sleep_time = 0
+        gitlab.test_project_found()
+
+    @mock.patch('requests.get', side_effect=SideEffect(mocked_502_gitlab_requests))
+    def test_gitlab_full_fail(self, _):
+        failed = False
+        try:
+            gitlab = Gitlab(api_token=get_gitlab_token())
+            gitlab.requests_sleep_time = 0
+            gitlab.test_project_found()
+        except Exit:
+            failed = True
+        if not failed:
+            Exit("GitlabAPI was expected to fail")
+
+    @mock.patch('requests.get', side_effect=SideEffect(fail_not_found_request, mocked_gitlab_project_request))
+    def test_gitlab_real_fail(self, _):
+        failed = False
+        try:
+            gitlab = Gitlab(api_token=get_gitlab_token())
+            gitlab.requests_sleep_time = 0
+            gitlab.test_project_found()
+        except APIError:
+            failed = True
+        if not failed:
+            Exit("GitlabAPI was expected to fail")
 
 
 class TestReadIncludes(unittest.TestCase):
diff --git a/tasks/unit-tests/notify_tests.py b/tasks/unit-tests/notify_tests.py
index 8d3c5b7173d1a..01b54f89170c3 100644
--- a/tasks/unit-tests/notify_tests.py
+++ b/tasks/unit-tests/notify_tests.py
@@ -2,10 +2,8 @@
 import os
 import pathlib
 import unittest
-from typing import List
 from unittest.mock import MagicMock, patch
 
-from gitlab.v4.objects import ProjectJob
 from invoke import MockContext, Result
 from invoke.exceptions import UnexpectedExit
 
@@ -13,95 +11,67 @@
 from tasks.libs.types.types import FailedJobs, FailedJobType
 
 
-def get_fake_jobs() -> List[ProjectJob]:
-    with open("tasks/unit-tests/testdata/jobs.json") as f:
-        jobs = json.load(f)
-
-    return [ProjectJob(MagicMock(), attrs=job) for job in jobs]
-
-
 class TestSendMessage(unittest.TestCase):
-    @patch('tasks.libs.ciproviders.gitlab_api.get_gitlab_api')
-    def test_merge(self, api_mock):
-        repo_mock = api_mock.return_value.projects.get.return_value
-        repo_mock.jobs.get.return_value.trace.return_value = b"Log trace"
-        list_mock = repo_mock.pipelines.get.return_value.jobs.list
-        list_mock.side_effect = [get_fake_jobs(), []]
-        notify.send_message(MockContext(), notification_type="merge", print_to_stdout=True)
-        list_mock.assert_called()
-
     @patch("tasks.notify.get_failed_jobs")
     def test_merge_without_get_failed_call(self, get_failed_jobs_mock):
         failed = FailedJobs()
         failed.add_failed_job(
-            ProjectJob(
-                MagicMock(),
-                attrs={
-                    "name": "job1",
-                    "stage": "stage1",
-                    "retry_summary": [],
-                    "web_url": "http://www.job.com",
-                    "failure_type": FailedJobType.INFRA_FAILURE,
-                    "allow_failure": False,
-                },
-            )
+            {
+                "name": "job1",
+                "stage": "stage1",
+                "retry_summary": [],
+                "url": "http://www.job.com",
+                "failure_type": FailedJobType.INFRA_FAILURE,
+                "allow_failure": False,
+            }
         )
         failed.add_failed_job(
-            ProjectJob(
-                MagicMock(),
-                attrs={
-                    "name": "job2",
-                    "stage": "stage2",
-                    "retry_summary": [],
-                    "web_url": "http://www.job.com",
-                    "failure_type": FailedJobType.INFRA_FAILURE,
-                    "allow_failure": True,
-                },
-            )
+            {
+                "name": "job2",
+                "stage": "stage2",
+                "retry_summary": [],
+                "url": "http://www.job.com",
+                "failure_type": FailedJobType.INFRA_FAILURE,
+                "allow_failure": True,
+            }
         )
         failed.add_failed_job(
-            ProjectJob(
-                MagicMock(),
-                attrs={
-                    "name": "job3",
-                    "stage": "stage3",
-                    "retry_summary": [],
-                    "web_url": "http://www.job.com",
-                    "failure_type": FailedJobType.JOB_FAILURE,
-                    "allow_failure": False,
-                },
-            )
+            {
+                "name": "job3",
+                "stage": "stage3",
+                "retry_summary": [],
+                "url": "http://www.job.com",
+                "failure_type": FailedJobType.JOB_FAILURE,
+                "allow_failure": False,
+            }
         )
         failed.add_failed_job(
-            ProjectJob(
-                MagicMock(),
-                attrs={
-                    "name": "job4",
-                    "stage": "stage4",
-                    "retry_summary": [],
-                    "web_url": "http://www.job.com",
-                    "failure_type": FailedJobType.JOB_FAILURE,
-                    "allow_failure": True,
-                },
-            )
+            {
+                "name": "job4",
+                "stage": "stage4",
+                "retry_summary": [],
+                "url": "http://www.job.com",
+                "failure_type": FailedJobType.JOB_FAILURE,
+                "allow_failure": True,
+            }
         )
         get_failed_jobs_mock.return_value = failed
         notify.send_message(MockContext(), notification_type="merge", print_to_stdout=True)
         get_failed_jobs_mock.assert_called()
 
-    @patch('tasks.libs.ciproviders.gitlab_api.get_gitlab_api')
-    def test_merge_with_get_failed_call(self, api_mock):
-        repo_mock = api_mock.return_value.projects.get.return_value
-        trace_mock = repo_mock.jobs.get.return_value.trace
-        list_mock = repo_mock.pipelines.get.return_value.jobs.list
-
-        trace_mock.return_value = b"no basic auth credentials"
-        list_mock.return_value = get_fake_jobs()
-
+    @patch("requests.get")
+    def test_merge_with_get_failed_call(self, get_mock):
+        with open("tasks/unit-tests/testdata/jobs.json") as f:
+            jobs = json.load(f)
+        job_list = {"json.return_value": jobs}
+        no_jobs = {"json.return_value": ""}
+        get_mock.side_effect = [
+            MagicMock(status_code=200, **job_list),
+            MagicMock(status_code=200, **no_jobs),
+            MagicMock(status_code=200, text="no basic auth credentials"),
+        ]
         notify.send_message(MockContext(), notification_type="merge", print_to_stdout=True)
-
-        trace_mock.assert_called()
-        list_mock.assert_called()
+        get_mock.assert_called()
 
     def test_post_to_channel1(self):
         self.assertTrue(notify._should_send_message_to_channel('main', default_branch='main'))
@@ -132,40 +102,39 @@ def test_post_to_author5(self):
 
 
 class TestSendStats(unittest.TestCase):
-    @patch('tasks.libs.ciproviders.gitlab_api.get_gitlab_api')
+    @patch("requests.get")
     @patch("tasks.notify.create_count", new=MagicMock())
-    def test_nominal(self, api_mock):
-        repo_mock = api_mock.return_value.projects.get.return_value
-        trace_mock = repo_mock.jobs.get.return_value.trace
-        list_mock = repo_mock.pipelines.get.return_value.jobs.list
-
-        trace_mock.return_value = b"E2E INTERNAL ERROR"
-        list_mock.return_value = get_fake_jobs()
-
+    def test_nominal(self, get_mock):
+        with open("tasks/unit-tests/testdata/jobs.json") as f:
+            jobs = json.load(f)
+        job_list = {"json.return_value": jobs}
+        no_jobs = {"json.return_value": ""}
+        get_mock.side_effect = [
+            MagicMock(status_code=200, **job_list),
+            MagicMock(status_code=200, **no_jobs),
+            MagicMock(status_code=200, text="E2E INTERNAL ERROR"),
+        ]
         notify.send_stats(MockContext(), print_to_stdout=True)
-
-        trace_mock.assert_called()
-        list_mock.assert_called()
+        get_mock.assert_called()
 
 
 class TestCheckConsistentFailures(unittest.TestCase):
-    @patch('tasks.libs.ciproviders.gitlab_api.get_gitlab_api')
-    def test_nominal(self, api_mock):
+    @patch("requests.get")
+    def test_nominal(self, get_mock):
         os.environ["CI_PIPELINE_ID"] = "456"
-
-        repo_mock = api_mock.return_value.projects.get.return_value
-        trace_mock = repo_mock.jobs.get.return_value.trace
-        list_mock = repo_mock.pipelines.get.return_value.jobs.list
-
-        trace_mock.return_value = b"net/http: TLS handshake timeout"
-        list_mock.return_value = get_fake_jobs()
-
+        with open("tasks/unit-tests/testdata/jobs.json") as f:
+            jobs = json.load(f)
+        job_list = {"json.return_value": jobs}
+        no_jobs = {"json.return_value": ""}
+        get_mock.side_effect = [
+            MagicMock(status_code=200, **job_list),
+            MagicMock(status_code=200, **no_jobs),
+            MagicMock(status_code=200, text="net/http: TLS handshake timeout"),
+        ]
         notify.check_consistent_failures(
             MockContext(run=Result("test")), "tasks/unit-tests/testdata/job_executions.json"
         )
-
-        trace_mock.assert_called()
-        list_mock.assert_called()
+        get_mock.assert_called()
 
 
 class TestRetrieveJobExecutionsCreated(unittest.TestCase):
@@ -204,9 +173,7 @@ class TestUpdateStatistics(unittest.TestCase):
     @patch('tasks.notify.get_failed_jobs')
     def test_nominal(self, mock_get_failed):
         failed_jobs = mock_get_failed.return_value
-        failed_jobs.all_failures.return_value = [
-            ProjectJob(MagicMock(), attrs=a) for a in [{"name": "nifnif"}, {"name": "nafnaf"}]
-        ]
+        failed_jobs.all_failures.return_value = [{"name": "nifnif"}, {"name": "nafnaf"}]
         j = {
             "jobs": {
                 "nafnaf": {"consecutive_failures": 2, "cumulative_failures": [0, 0, 0, 0, 0, 0, 0, 0, 1, 1]},
@@ -228,9 +195,7 @@ def test_nominal(self, mock_get_failed):
     @patch('tasks.notify.get_failed_jobs')
     def test_multiple_failures(self, mock_get_failed):
         failed_jobs = mock_get_failed.return_value
-        failed_jobs.all_failures.return_value = [
-            ProjectJob(MagicMock(), attrs=a) for a in [{"name": "poulidor"}, {"name": "virenque"}, {"name": "bardet"}]
-        ]
+        failed_jobs.all_failures.return_value = [{"name": "poulidor"}, {"name": "virenque"}, {"name": "bardet"}]
         j = {
             "jobs": {
                 "poulidor": {"consecutive_failures": 8, "cumulative_failures": [0, 0, 1, 1, 1, 1, 1, 1, 1, 1]},

From 57f095ff9b1dc470e840a5b0471013cf7c60655e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Hugo=20Beauz=C3=A9e-Luyssen?= <hugo.beauzee@datadoghq.com>
Date: Fri, 12 Apr 2024 17:44:49 +0200
Subject: [PATCH 2/9] CI: kitchen_deploy: reduce contention when deploying
 debian packages (#24610)

The kitchen_deploy jobs are publishing to a dedicated repo for each
pipeline, meaning there's no need to protect against corruption coming
from another pipeline
---
 .gitlab/kitchen_deploy/kitchen_deploy.yml | 13 -------------
 1 file changed, 13 deletions(-)

diff --git a/.gitlab/kitchen_deploy/kitchen_deploy.yml b/.gitlab/kitchen_deploy/kitchen_deploy.yml
index 246701370d1a5..bd34481a63dde 100644
--- a/.gitlab/kitchen_deploy/kitchen_deploy.yml
+++ b/.gitlab/kitchen_deploy/kitchen_deploy.yml
@@ -33,21 +33,10 @@
   - filename=$(ls datadog-signing-keys*.deb); mv $filename datadog-signing-keys_${DD_PIPELINE_ID}.deb
   - popd
 
-# Avoid simultaneous writes on the repo metadata file that made kitchen tests fail before
-.deploy_deb_resource_group-a6: &deploy_deb_resource_group-a6
-  resource_group: deploy_deb_a6
-
-.deploy_deb_resource_group-a7: &deploy_deb_resource_group-a7
-  resource_group: deploy_deb_a7
-
-.deploy_deb_resource_group-i7: &deploy_deb_resource_group-i7
-  resource_group: deploy_deb_i7
-
 .deploy_deb_testing-a6:
   stage: kitchen_deploy
   image: 486234852809.dkr.ecr.us-east-1.amazonaws.com/ci/datadog-agent-builders/gitlab_agent_deploy:$DATADOG_AGENT_BUILDERS
   tags: ["arch:amd64"]
-  <<: *deploy_deb_resource_group-a6
   variables:
     DD_PIPELINE_ID: $CI_PIPELINE_ID-a6
   before_script:
@@ -58,7 +47,6 @@
   stage: kitchen_deploy
   image: 486234852809.dkr.ecr.us-east-1.amazonaws.com/ci/datadog-agent-builders/gitlab_agent_deploy:$DATADOG_AGENT_BUILDERS
   tags: ["arch:amd64"]
-  <<: *deploy_deb_resource_group-i7
   variables:
     DD_PIPELINE_ID: $CI_PIPELINE_ID-i7
   before_script:
@@ -104,7 +92,6 @@ deploy_deb_testing-a6_arm64:
   stage: kitchen_deploy
   image: 486234852809.dkr.ecr.us-east-1.amazonaws.com/ci/datadog-agent-builders/gitlab_agent_deploy:$DATADOG_AGENT_BUILDERS
   tags: ["arch:amd64"]
-  <<: *deploy_deb_resource_group-a7
   variables:
     DD_PIPELINE_ID: $CI_PIPELINE_ID-a7
   before_script:

From 1359713c94a521e71a95b3193cf5fc2b6c4fe363 Mon Sep 17 00:00:00 2001
From: Paul Cacheux <paul.cacheux@datadoghq.com>
Date: Fri, 12 Apr 2024 17:54:29 +0200
Subject: [PATCH 3/9] fix `inv -e security-agent.sync-secl-win-pkg` on macOS
 (#24646)

---
 tasks/security_agent.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/tasks/security_agent.py b/tasks/security_agent.py
index a938b55323fa8..0af5fc538de7e 100644
--- a/tasks/security_agent.py
+++ b/tasks/security_agent.py
@@ -945,5 +945,8 @@ def sync_secl_win_pkg(ctx):
             fto = ffrom
 
         ctx.run(f"cp pkg/security/secl/model/{ffrom} pkg/security/seclwin/model/{fto}")
-        ctx.run(f"sed -i '/^\\/\\/go:build/d' pkg/security/seclwin/model/{fto}")
+        if sys.platform == "darwin":
+            ctx.run(f"sed -i '' '/^\\/\\/go:build/d' pkg/security/seclwin/model/{fto}")
+        else:
+            ctx.run(f"sed -i '/^\\/\\/go:build/d' pkg/security/seclwin/model/{fto}")
         ctx.run(f"gofmt -s -w pkg/security/seclwin/model/{fto}")

From 911bd7d0c8574dcc5fc206c70d1bdf08afabe918 Mon Sep 17 00:00:00 2001
From: Nicolas Schweitzer <nicolas.schweitzer@datadoghq.com>
Date: Fri, 12 Apr 2024 17:57:39 +0200
Subject: [PATCH 4/9] feat(ci): Remove usage of unused image (#24639)

* feat(ci): Remove usage of unused image

* Remove reference on unused variables
---
 .gitlab-ci.yml                       | 6 +-----
 .gitlab/check_merge/do_not_merge.yml | 2 --
 .gitlab/source_test/linux.yml        | 4 ++--
 3 files changed, 3 insertions(+), 9 deletions(-)

diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index baac10e636852..748a75f3fa7f5 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -170,10 +170,6 @@ variables:
   DATADOG_AGENT_ARMBUILDIMAGES: v31802788-2dee8fe9
   DATADOG_AGENT_SYSPROBE_BUILDIMAGES_SUFFIX: ""
   DATADOG_AGENT_SYSPROBE_BUILDIMAGES: v31802788-2dee8fe9
-  DATADOG_AGENT_KERNEL_MATRIX_TESTING_BUILDIMAGES_SUFFIX: ""
-  DATADOG_AGENT_KERNEL_MATRIX_TESTING_BUILDIMAGES: v31802788-2dee8fe9
-  DATADOG_AGENT_NIKOS_BUILDIMAGES_SUFFIX: ""
-  DATADOG_AGENT_NIKOS_BUILDIMAGES: v31802788-2dee8fe9
   DATADOG_AGENT_BTF_GEN_BUILDIMAGES_SUFFIX: ""
   DATADOG_AGENT_BTF_GEN_BUILDIMAGES: v31802788-2dee8fe9
   DATADOG_AGENT_BUILDERS: v28719426-b6a4fd9
@@ -367,7 +363,7 @@ variables:
       - .gitlab/container_build/fakeintake.yml
       - .gitlab/dev_container_deploy/fakeintake.yml
     compare_to: main # TODO: use a variable, when this is supported https://gitlab.com/gitlab-org/gitlab/-/issues/369916
-  
+
 #
 # Workflow rules
 # Rules used to define whether a pipeline should run, and with which variables
diff --git a/.gitlab/check_merge/do_not_merge.yml b/.gitlab/check_merge/do_not_merge.yml
index 877698f1d2881..b0405b10dd467 100644
--- a/.gitlab/check_merge/do_not_merge.yml
+++ b/.gitlab/check_merge/do_not_merge.yml
@@ -16,8 +16,6 @@ do-not-merge:
              [ ! -z "$DATADOG_AGENT_WINBUILDIMAGES_SUFFIX" ] ||
              [ ! -z "$DATADOG_AGENT_ARMBUILDIMAGES_SUFFIX" ] ||
              [ ! -z "$DATADOG_AGENT_SYSPROBE_BUILDIMAGES_SUFFIX" ] ||
-             [ ! -z "$DATADOG_AGENT_KERNEL_MATRIX_TESTING_BUILDIMAGES_SUFFIX" ] ||
-             [ ! -z "$DATADOG_AGENT_NIKOS_BUILDIMAGES_SUFFIX" ] ||
              [ ! -z "$DATADOG_AGENT_BTF_GEN_BUILDIMAGES_SUFFIX" ] ||
              [ ! -z "$TEST_INFRA_DEFINITIONS_BUILDIMAGES_SUFFIX" ]; then
             echo "Pull request uses non-empty BUILDIMAGES_SUFFIX variable"
diff --git a/.gitlab/source_test/linux.yml b/.gitlab/source_test/linux.yml
index 5304729be7918..561eb1a201077 100644
--- a/.gitlab/source_test/linux.yml
+++ b/.gitlab/source_test/linux.yml
@@ -157,7 +157,7 @@ tests_rpm-x64-py2:
     - !reference [.except_disable_unit_tests]
     - !reference [.except_mergequeue]
     - when: on_success
-  image: 486234852809.dkr.ecr.us-east-1.amazonaws.com/ci/datadog-agent-buildimages/rpm_x64_testing$DATADOG_AGENT_BUILDIMAGES_SUFFIX:$DATADOG_AGENT_BUILDIMAGES
+  image: 486234852809.dkr.ecr.us-east-1.amazonaws.com/ci/datadog-agent-buildimages/rpm_x64$DATADOG_AGENT_BUILDIMAGES_SUFFIX:$DATADOG_AGENT_BUILDIMAGES
   tags: ["arch:amd64"]
   variables:
     PYTHON_RUNTIMES: '2'
@@ -168,7 +168,7 @@ tests_rpm-x64-py3:
   extends:
     - .rtloader_tests
     - .linux_tests_with_upload
-  image: 486234852809.dkr.ecr.us-east-1.amazonaws.com/ci/datadog-agent-buildimages/rpm_x64_testing$DATADOG_AGENT_BUILDIMAGES_SUFFIX:$DATADOG_AGENT_BUILDIMAGES
+  image: 486234852809.dkr.ecr.us-east-1.amazonaws.com/ci/datadog-agent-buildimages/rpm_x64$DATADOG_AGENT_BUILDIMAGES_SUFFIX:$DATADOG_AGENT_BUILDIMAGES
   tags: ["arch:amd64"]
   variables:
     PYTHON_RUNTIMES: '3'

From c0e41ecacc17ae029f928f1051962f9cbd42bf5c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?C=C3=A9lian=20Raimbault?=
 <161456554+CelianR@users.noreply.github.com>
Date: Fri, 12 Apr 2024 18:27:39 +0200
Subject: [PATCH 5/9] [fix] Benchmark gitlab import error (#24647)

* [fix-benchmark-gitlab-import] Test

* [fix-benchmark-gitlab-import] Test

* [fix-benchmark-gitlab-import] Cleaned code
---
 .gitlab/benchmarks/benchmarks.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.gitlab/benchmarks/benchmarks.yml b/.gitlab/benchmarks/benchmarks.yml
index 7152e9c0e2d86..b38fe9b56d894 100644
--- a/.gitlab/benchmarks/benchmarks.yml
+++ b/.gitlab/benchmarks/benchmarks.yml
@@ -10,6 +10,7 @@ benchmark:
   tags: ["team:apm-k8s-tweaked-metal-datadog-agent", "specific:true"]
   script:
     - export ARTIFACTS_DIR="$(pwd)/artifacts" && mkdir -p $ARTIFACTS_DIR
+    - pip install -r requirements.txt
     - ./test/benchmarks/apm_scripts/capture-hardware-software-info.sh
     - ./test/benchmarks/apm_scripts/run-benchmarks.sh
     - ./test/benchmarks/apm_scripts/analyze-results.sh

From d4a7be27b1ca8e9fbad533d590682f793fbd429d Mon Sep 17 00:00:00 2001
From: Gustavo Caso <gustavo.caso@datadoghq.com>
Date: Fri, 12 Apr 2024 18:33:53 +0200
Subject: [PATCH 6/9] fix jmx and check command to have a valid settings
 component (#24635)

---
 cmd/agent/subcommands/jmx/command.go | 6 +++++-
 pkg/cli/subcommands/check/command.go | 7 +++++--
 2 files changed, 10 insertions(+), 3 deletions(-)

diff --git a/cmd/agent/subcommands/jmx/command.go b/cmd/agent/subcommands/jmx/command.go
index 239e1ea7392d1..1f29b02025a64 100644
--- a/cmd/agent/subcommands/jmx/command.go
+++ b/cmd/agent/subcommands/jmx/command.go
@@ -43,6 +43,7 @@ import (
 	"github.com/DataDog/datadog-agent/comp/core/log/logimpl"
 	"github.com/DataDog/datadog-agent/comp/core/secrets"
 	"github.com/DataDog/datadog-agent/comp/core/settings"
+	"github.com/DataDog/datadog-agent/comp/core/settings/settingsimpl"
 	"github.com/DataDog/datadog-agent/comp/core/status"
 	"github.com/DataDog/datadog-agent/comp/core/tagger"
 	"github.com/DataDog/datadog-agent/comp/core/workloadmeta"
@@ -141,6 +142,10 @@ func Commands(globalParams *command.GlobalParams) []*cobra.Command {
 			workloadmeta.Module(),
 			apiimpl.Module(),
 			authtokenimpl.Module(),
+			// The jmx command do not have settings that change are runtime
+			// still, we need to pass it to ensure the API server is proprely initialized
+			settingsimpl.Module(),
+			fx.Supply(settings.Settings{}),
 			// TODO(components): this is a temporary hack as the StartServer() method of the API package was previously called with nil arguments
 			// This highlights the fact that the API Server created by JMX (through ExecJmx... function) should be different from the ones created
 			// in others commands such as run.
@@ -153,7 +158,6 @@ func Commands(globalParams *command.GlobalParams) []*cobra.Command {
 			fx.Provide(func() inventoryagent.Component { return nil }),
 			fx.Provide(func() inventoryhost.Component { return nil }),
 			fx.Provide(func() demultiplexer.Component { return nil }),
-			fx.Provide(func() settings.Component { return nil }),
 			fx.Provide(func() inventorychecks.Component { return nil }),
 			fx.Provide(func() packagesigning.Component { return nil }),
 			fx.Provide(func() optional.Option[rcservice.Component] { return optional.NewNoneOption[rcservice.Component]() }),
diff --git a/pkg/cli/subcommands/check/command.go b/pkg/cli/subcommands/check/command.go
index 79870a8ad1bae..55dfdfb18b07e 100644
--- a/pkg/cli/subcommands/check/command.go
+++ b/pkg/cli/subcommands/check/command.go
@@ -44,6 +44,7 @@ import (
 	"github.com/DataDog/datadog-agent/comp/core/log/logimpl"
 	"github.com/DataDog/datadog-agent/comp/core/secrets"
 	"github.com/DataDog/datadog-agent/comp/core/settings"
+	"github.com/DataDog/datadog-agent/comp/core/settings/settingsimpl"
 	"github.com/DataDog/datadog-agent/comp/core/status"
 	"github.com/DataDog/datadog-agent/comp/core/status/statusimpl"
 	"github.com/DataDog/datadog-agent/comp/core/sysprobeconfig/sysprobeconfigimpl"
@@ -208,7 +209,10 @@ func MakeCommand(globalParamsGetter func() GlobalParams) *cobra.Command {
 					},
 				),
 				statusimpl.Module(),
-
+				// The check command do not have settings that change are runtime
+				// still, we need to pass it to ensure the API server is proprely initialized
+				settingsimpl.Module(),
+				fx.Supply(settings.Settings{}),
 				// TODO(components): this is a temporary hack as the StartServer() method of the API package was previously called with nil arguments
 				// This highlights the fact that the API Server created by JMX (through ExecJmx... function) should be different from the ones created
 				// in others commands such as run.
@@ -217,7 +221,6 @@ func MakeCommand(globalParamsGetter func() GlobalParams) *cobra.Command {
 				fx.Provide(func() replay.Component { return nil }),
 				fx.Provide(func() pidmap.Component { return nil }),
 				fx.Provide(func() serverdebug.Component { return nil }),
-				fx.Provide(func() settings.Component { return nil }),
 				fx.Provide(func() host.Component { return nil }),
 				fx.Provide(func() inventoryagent.Component { return nil }),
 				fx.Provide(func() inventoryhost.Component { return nil }),

From 44fb1ec7e8c4b0009ddc549b78af4c9217e8ddc6 Mon Sep 17 00:00:00 2001
From: Paul Cacheux <paul.cacheux@datadoghq.com>
Date: Fri, 12 Apr 2024 19:10:47 +0200
Subject: [PATCH 7/9] [CWS] make MacroEvaluator use the cached fields (#24652)

---
 pkg/security/secl/compiler/eval/macro.go | 8 +-------
 1 file changed, 1 insertion(+), 7 deletions(-)

diff --git a/pkg/security/secl/compiler/eval/macro.go b/pkg/security/secl/compiler/eval/macro.go
index b01e5e4e218d2..2135c8f178fba 100644
--- a/pkg/security/secl/compiler/eval/macro.go
+++ b/pkg/security/secl/compiler/eval/macro.go
@@ -169,11 +169,5 @@ func (m *Macro) GetFields() []Field {
 
 // GetFields - Returns all the Field that the MacroEvaluator handles
 func (m *MacroEvaluator) GetFields() []Field {
-	fields := make([]Field, len(m.fieldValues))
-	i := 0
-	for key := range m.fieldValues {
-		fields[i] = key
-		i++
-	}
-	return fields
+	return m.fields
 }

From c4d753e27ea247e0cbb665b80cc9bf1ff29f9a25 Mon Sep 17 00:00:00 2001
From: Baptiste Foy <baptiste.foy@datadoghq.com>
Date: Fri, 12 Apr 2024 19:35:40 +0200
Subject: [PATCH 8/9] upgrade(installer): Add APM injector package installation
 support (#24372)

* fix(errors): Clearer errors

* upgrade(updater): Add injector support

* fix(updater): Use privileged command to write

* fix(updater): Update catalog and support ld.so.preload not existing

* upgrade(updater): Add injector docker support

* chore(updater): Make writing to ld.so.preload safer and remove experiment

* remove catalog changes for less conflicts

* fix(installer): Cleanup APM injector on setup failure

* fix(updater): Remove APM injector on purge

* test(installer): Add E2E test for injector installation

* fix(installer): Add agent config support for apm injector & fix test

* fix(installer): Add error message to helper commands and tentatively fix e2e

* fix(installer): Fix e2e tests

* refactor(installer): Refactor injector installation

* refactor(installer): Manipulate files in go instead of string manipulation

* feat(updater): Add lock to package installation commands

* fix(tests): Version is not resolved anymore in docker's daemon.json

* fix(installer): Allow installation of the injector before the agent

* address part of the review

* chore(installer): Add more tests

* fix(tests): Skip some tests to be able to merge
---
 pkg/updater/install.go                    |  34 ++-
 pkg/updater/service/apm_inject.go         | 356 ++++++++++++++++++++++
 pkg/updater/service/apm_inject_test.go    | 155 ++++++++++
 pkg/updater/service/apm_inject_windows.go |  19 ++
 pkg/updater/service/datadog_agent.go      |  20 ++
 pkg/updater/service/docker.go             | 196 ++++++++++++
 pkg/updater/service/docker_test.go        | 137 +++++++++
 pkg/updater/service/helper/main.go        |  30 +-
 pkg/updater/service/systemd.go            |  58 +++-
 pkg/updater/service/systemd_test.go       |  13 +-
 pkg/updater/updater.go                    |   7 +-
 test/new-e2e/tests/updater/docker.go      |  91 ++++++
 test/new-e2e/tests/updater/linux_test.go  | 141 ++++++++-
 13 files changed, 1233 insertions(+), 24 deletions(-)
 create mode 100644 pkg/updater/service/apm_inject.go
 create mode 100644 pkg/updater/service/apm_inject_test.go
 create mode 100644 pkg/updater/service/apm_inject_windows.go
 create mode 100644 pkg/updater/service/docker.go
 create mode 100644 pkg/updater/service/docker_test.go
 create mode 100644 test/new-e2e/tests/updater/docker.go

diff --git a/pkg/updater/install.go b/pkg/updater/install.go
index b18b000a4a354..092013a087f67 100644
--- a/pkg/updater/install.go
+++ b/pkg/updater/install.go
@@ -13,6 +13,7 @@ import (
 	"os"
 	"path/filepath"
 	"strings"
+	"sync"
 
 	oci "github.com/google/go-containerregistry/pkg/v1"
 	"github.com/google/go-containerregistry/pkg/v1/types"
@@ -27,11 +28,15 @@ const (
 	datadogPackageConfigLayerMediaType types.MediaType = "application/vnd.datadog.package.config.layer.v1.tar+zstd"
 	datadogPackageMaxSize                              = 3 << 30 // 3GiB
 	defaultConfigsDir                                  = "/etc"
+
+	packageDatadogAgent = "datadog-agent"
+	packageAPMInjector  = "datadog-apm-inject"
 )
 
 type installer struct {
 	repositories *repository.Repositories
 	configsDir   string
+	installLock  sync.Mutex
 }
 
 func newInstaller(repositories *repository.Repositories) *installer {
@@ -56,10 +61,17 @@ func (i *installer) installStable(pkg string, version string, image oci.Image) e
 	if err != nil {
 		return fmt.Errorf("could not create repository: %w", err)
 	}
-	if pkg == "datadog-agent" {
+
+	i.installLock.Lock()
+	defer i.installLock.Unlock()
+	switch pkg {
+	case packageDatadogAgent:
 		return service.SetupAgentUnits()
+	case packageAPMInjector:
+		return service.SetupAPMInjector()
+	default:
+		return nil
 	}
-	return nil
 }
 
 func (i *installer) installExperiment(pkg string, version string, image oci.Image) error {
@@ -100,19 +112,25 @@ func (i *installer) uninstallExperiment(pkg string) error {
 }
 
 func (i *installer) startExperiment(pkg string) error {
-	// TODO(arthur): currently we only support the datadog-agent package
-	if pkg != "datadog-agent" {
+	i.installLock.Lock()
+	defer i.installLock.Unlock()
+	switch pkg {
+	case packageDatadogAgent:
+		return service.StartAgentExperiment()
+	default:
 		return nil
 	}
-	return service.StartAgentExperiment()
 }
 
 func (i *installer) stopExperiment(pkg string) error {
-	// TODO(arthur): currently we only support the datadog-agent package
-	if pkg != "datadog-agent" {
+	i.installLock.Lock()
+	defer i.installLock.Unlock()
+	switch pkg {
+	case packageDatadogAgent:
+		return service.StopAgentExperiment()
+	default:
 		return nil
 	}
-	return service.StopAgentExperiment()
 }
 
 func extractPackageLayers(image oci.Image, configDir string, packageDir string) error {
diff --git a/pkg/updater/service/apm_inject.go b/pkg/updater/service/apm_inject.go
new file mode 100644
index 0000000000000..4982b61c2a826
--- /dev/null
+++ b/pkg/updater/service/apm_inject.go
@@ -0,0 +1,356 @@
+// Unless explicitly stated otherwise all files in this repository are licensed
+// under the Apache License Version 2.0.
+// This product includes software developed at Datadog (https://www.datadoghq.com/).
+// Copyright 2016-present Datadog, Inc.
+
+//go:build !windows
+
+// Package service provides a way to interact with os services
+package service
+
+import (
+	"bytes"
+	"fmt"
+	"os"
+	"path"
+	"strings"
+
+	"github.com/DataDog/datadog-agent/pkg/util/log"
+)
+
+var (
+	injectorConfigPrefix = []byte("# BEGIN LD PRELOAD CONFIG")
+	injectorConfigSuffix = []byte("# END LD PRELOAD CONFIG")
+)
+
+const (
+	injectorConfigTemplate = `
+apm_config:
+  receiver_socket: %s
+use_dogstatsd: true
+dogstatsd_socket: %s
+`
+	datadogConfigPath = "/etc/datadog-agent/datadog.yaml"
+	ldSoPreloadPath   = "/etc/ld.so.preload"
+)
+
+// SetupAPMInjector sets up the injector at bootstrap
+func SetupAPMInjector() error {
+	// Enforce dd-installer is in the dd-agent group
+	if err := setInstallerAgentGroup(); err != nil {
+		return err
+	}
+
+	installer := &apmInjectorInstaller{
+		installPath: "/opt/datadog-packages/datadog-apm-inject/stable",
+	}
+	return installer.Setup()
+}
+
+// RemoveAPMInjector removes the APM injector
+func RemoveAPMInjector() error {
+	installer := &apmInjectorInstaller{
+		installPath: "/opt/datadog-packages/datadog-apm-inject/stable",
+	}
+	return installer.Remove()
+}
+
+type apmInjectorInstaller struct {
+	installPath string
+}
+
+// Setup sets up the APM injector
+func (a *apmInjectorInstaller) Setup() error {
+	var err error
+	defer func() {
+		if err != nil {
+			removeErr := a.Remove()
+			if removeErr != nil {
+				log.Warnf("Failed to remove APM injector: %v", removeErr)
+			}
+		}
+	}()
+	if err := a.setAgentConfig(); err != nil {
+		return err
+	}
+	if err := a.setRunPermissions(); err != nil {
+		return err
+	}
+	if err := a.setLDPreloadConfig(); err != nil {
+		return err
+	}
+	if err := a.setDockerConfig(); err != nil {
+		return err
+	}
+	return nil
+}
+
+func (a *apmInjectorInstaller) Remove() error {
+	if err := a.deleteAgentConfig(); err != nil {
+		return err
+	}
+	if err := a.deleteLDPreloadConfig(); err != nil {
+		return err
+	}
+	if err := a.deleteDockerConfig(); err != nil {
+		return err
+	}
+	return nil
+}
+
+func (a *apmInjectorInstaller) setRunPermissions() error {
+	return os.Chmod(path.Join(a.installPath, "inject", "run"), 0777)
+}
+
+// setLDPreloadConfig adds preload options on /etc/ld.so.preload, overriding existing ones
+func (a *apmInjectorInstaller) setLDPreloadConfig() error {
+	var ldSoPreload []byte
+	stat, err := os.Stat(ldSoPreloadPath)
+	if err == nil {
+		ldSoPreload, err = os.ReadFile(ldSoPreloadPath)
+		if err != nil {
+			return err
+		}
+	} else if !os.IsNotExist(err) {
+		return err
+	}
+
+	newLdSoPreload, err := a.setLDPreloadConfigContent(ldSoPreload)
+	if err != nil {
+		return err
+	}
+	if bytes.Equal(ldSoPreload, newLdSoPreload) {
+		// No changes needed
+		return nil
+	}
+
+	perms := os.FileMode(0644)
+	if stat != nil {
+		perms = stat.Mode()
+	}
+	err = os.WriteFile("/tmp/ld.so.preload.tmp", newLdSoPreload, perms)
+	if err != nil {
+		return err
+	}
+
+	return executeCommand(string(replaceLDPreloadCommand))
+}
+
+// setLDPreloadConfigContent sets the content of the LD preload configuration
+func (a *apmInjectorInstaller) setLDPreloadConfigContent(ldSoPreload []byte) ([]byte, error) {
+	launcherPreloadPath := path.Join(a.installPath, "inject", "launcher.preload.so")
+
+	if strings.Contains(string(ldSoPreload), launcherPreloadPath) {
+		// If the line of interest is already in /etc/ld.so.preload, return fast
+		return ldSoPreload, nil
+	}
+
+	// Append the launcher preload path to the file
+	if len(ldSoPreload) > 0 && ldSoPreload[len(ldSoPreload)-1] != '\n' {
+		ldSoPreload = append(ldSoPreload, '\n')
+	}
+	ldSoPreload = append(ldSoPreload, []byte(launcherPreloadPath+"\n")...)
+	return ldSoPreload, nil
+}
+
+// deleteLDPreloadConfig removes the preload options from /etc/ld.so.preload
+func (a *apmInjectorInstaller) deleteLDPreloadConfig() error {
+	var ldSoPreload []byte
+	stat, err := os.Stat(ldSoPreloadPath)
+	if err == nil {
+		ldSoPreload, err = os.ReadFile(ldSoPreloadPath)
+		if err != nil {
+			return err
+		}
+	} else if !os.IsNotExist(err) {
+		return err
+	} else {
+		return nil
+	}
+
+	newLdSoPreload, err := a.deleteLDPreloadConfigContent(ldSoPreload)
+	if err != nil {
+		return err
+	}
+	if bytes.Equal(ldSoPreload, newLdSoPreload) {
+		// No changes needed
+		return nil
+	}
+
+	perms := os.FileMode(0644)
+	if stat != nil {
+		perms = stat.Mode()
+	}
+	err = os.WriteFile("/tmp/ld.so.preload.tmp", newLdSoPreload, perms)
+	if err != nil {
+		return err
+	}
+
+	return executeCommand(string(replaceLDPreloadCommand))
+}
+
+// deleteLDPreloadConfigContent deletes the content of the LD preload configuration
+func (a *apmInjectorInstaller) deleteLDPreloadConfigContent(ldSoPreload []byte) ([]byte, error) {
+	launcherPreloadPath := path.Join(a.installPath, "inject", "launcher.preload.so")
+
+	if !strings.Contains(string(ldSoPreload), launcherPreloadPath) {
+		// If the line of interest isn't there, return fast
+		return ldSoPreload, nil
+	}
+
+	// Possible configurations of the preload path, order matters
+	replacementsToTest := [][]byte{
+		[]byte(launcherPreloadPath + "\n"),
+		[]byte("\n" + launcherPreloadPath),
+		[]byte(launcherPreloadPath + " "),
+		[]byte(" " + launcherPreloadPath),
+	}
+	for _, replacement := range replacementsToTest {
+		ldSoPreloadNew := bytes.Replace(ldSoPreload, replacement, []byte{}, 1)
+		if !bytes.Equal(ldSoPreloadNew, ldSoPreload) {
+			return ldSoPreloadNew, nil
+		}
+	}
+	if bytes.Equal(ldSoPreload, []byte(launcherPreloadPath)) {
+		// If the line is the only one in the file without newlines, return an empty file
+		return []byte{}, nil
+	}
+
+	return nil, fmt.Errorf("failed to remove %s from %s", launcherPreloadPath, ldSoPreloadPath)
+}
+
+// setAgentConfig adds the agent configuration for the APM injector if it is not there already
+// We assume that the agent file has been created by the installer's postinst script
+//
+// Note: This is not safe, as it assumes there were no changes to the agent configuration made without
+// restart by the user. This means that the agent can crash on restart. This is a limitation of the current
+// installer system and this will be replaced by a proper experiment when available. This is a temporary
+// solution to allow the APM injector to be installed, and if the agent crashes, we try to detect it and
+// restore the previous configuration
+func (a *apmInjectorInstaller) setAgentConfig() (err error) {
+	err = backupAgentConfig()
+	if err != nil {
+		return err
+	}
+	defer func() {
+		if err != nil {
+			restoreErr := restoreAgentConfig()
+			if restoreErr != nil {
+				log.Warnf("Failed to restore agent config: %v", restoreErr)
+			}
+		}
+	}()
+
+	content, err := os.ReadFile(datadogConfigPath)
+	if err != nil {
+		return err
+	}
+
+	newContent := a.setAgentConfigContent(content)
+	if bytes.Equal(content, newContent) {
+		// No changes needed
+		return nil
+	}
+
+	err = os.WriteFile(datadogConfigPath, newContent, 0644)
+	if err != nil {
+		return err
+	}
+
+	err = restartTraceAgent()
+	return
+}
+
+func (a *apmInjectorInstaller) setAgentConfigContent(content []byte) []byte {
+	runPath := path.Join(a.installPath, "inject", "run")
+	apmSocketPath := path.Join(runPath, "apm.socket")
+	dsdSocketPath := path.Join(runPath, "dsd.socket")
+
+	if !bytes.Contains(content, injectorConfigPrefix) {
+		content = append(content, []byte("\n")...)
+		content = append(content, injectorConfigPrefix...)
+		content = append(content, []byte(
+			fmt.Sprintf(injectorConfigTemplate, apmSocketPath, dsdSocketPath),
+		)...)
+		content = append(content, injectorConfigSuffix...)
+		content = append(content, []byte("\n")...)
+	}
+	return content
+}
+
+// deleteAgentConfig removes the agent configuration for the APM injector
+func (a *apmInjectorInstaller) deleteAgentConfig() (err error) {
+	err = backupAgentConfig()
+	if err != nil {
+		return err
+	}
+	defer func() {
+		if err != nil {
+			restoreErr := restoreAgentConfig()
+			if restoreErr != nil {
+				log.Warnf("Failed to restore agent config: %v", restoreErr)
+			}
+		}
+	}()
+
+	content, err := os.ReadFile(datadogConfigPath)
+	if err != nil {
+		return err
+	}
+
+	newContent := a.deleteAgentConfigContent(content)
+	if bytes.Equal(content, newContent) {
+		// No changes needed
+		return nil
+	}
+
+	err = os.WriteFile(datadogConfigPath, content, 0644)
+	if err != nil {
+		return err
+	}
+
+	return restartTraceAgent()
+}
+
+// deleteAgentConfigContent deletes the agent configuration for the APM injector
+func (a *apmInjectorInstaller) deleteAgentConfigContent(content []byte) []byte {
+	start := bytes.Index(content, injectorConfigPrefix)
+	end := bytes.Index(content, injectorConfigSuffix) + len(injectorConfigSuffix)
+	if start == -1 || end == -1 || start >= end {
+		// Config not found
+		return content
+	}
+
+	return append(content[:start], content[end:]...)
+}
+
+// backupAgentConfig backs up the agent configuration
+func backupAgentConfig() error {
+	return executeCommandStruct(privilegeCommand{
+		Command: string(backupCommand),
+		Path:    datadogConfigPath,
+	})
+}
+
+// restoreAgentConfig restores the agent configuration & restarts the agent
+func restoreAgentConfig() error {
+	err := executeCommandStruct(privilegeCommand{
+		Command: string(restoreCommand),
+		Path:    datadogConfigPath,
+	})
+	if err != nil {
+		return err
+	}
+	return restartTraceAgent()
+}
+
+// restartTraceAgent restarts the trace agent, both stable and experimental
+func restartTraceAgent() error {
+	if err := restartUnit("datadog-agent-trace.service"); err != nil {
+		return err
+	}
+	if err := restartUnit("datadog-agent-trace-exp.service"); err != nil {
+		return err
+	}
+	return nil
+}
diff --git a/pkg/updater/service/apm_inject_test.go b/pkg/updater/service/apm_inject_test.go
new file mode 100644
index 0000000000000..813f800a0ee74
--- /dev/null
+++ b/pkg/updater/service/apm_inject_test.go
@@ -0,0 +1,155 @@
+// Unless explicitly stated otherwise all files in this repository are licensed
+// under the Apache License Version 2.0.
+// This product includes software developed at Datadog (https://www.datadoghq.com/).
+// Copyright 2016-present Datadog, Inc.
+
+//go:build !windows
+
+// Package service provides a way to interact with os services
+package service
+
+import (
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+)
+
+func TestSetLDPreloadConfig(t *testing.T) {
+	a := &apmInjectorInstaller{
+		installPath: "/tmp/stable",
+	}
+
+	for input, expected := range map[string]string{
+		// File doesn't exist
+		"": "/tmp/stable/inject/launcher.preload.so\n",
+		// File contains unrelated entries
+		"/abc/def/preload.so\n": "/abc/def/preload.so\n/tmp/stable/inject/launcher.preload.so\n",
+		// File contains unrelated entries with no newline
+		"/abc/def/preload.so": "/abc/def/preload.so\n/tmp/stable/inject/launcher.preload.so\n",
+	} {
+		output, err := a.setLDPreloadConfigContent([]byte(input))
+		assert.Nil(t, err)
+		assert.Equal(t, expected, string(output))
+	}
+}
+
+func TestRemoveLDPreloadConfig(t *testing.T) {
+	a := &apmInjectorInstaller{
+		installPath: "/tmp/stable",
+	}
+
+	for input, expected := range map[string]string{
+		// File doesn't exist
+		"": "",
+		// File only contains the entry to remove
+		"/tmp/stable/inject/launcher.preload.so\n": "",
+		// File only contains the entry to remove without newline
+		"/tmp/stable/inject/launcher.preload.so": "",
+		// File contains unrelated entries
+		"/abc/def/preload.so\n/tmp/stable/inject/launcher.preload.so\n": "/abc/def/preload.so\n",
+		// File contains unrelated entries at the end
+		"/tmp/stable/inject/launcher.preload.so\n/def/abc/preload.so": "/def/abc/preload.so",
+		// File contains multiple unrelated entries
+		"/abc/def/preload.so\n/tmp/stable/inject/launcher.preload.so\n/def/abc/preload.so": "/abc/def/preload.so\n/def/abc/preload.so",
+		// File contains unrelated entries with no newline (reformatted by customer?)
+		"/abc/def/preload.so /tmp/stable/inject/launcher.preload.so": "/abc/def/preload.so",
+		// File contains unrelated entries with no newline (reformatted by customer?)
+		"/abc/def/preload.so /tmp/stable/inject/launcher.preload.so /def/abc/preload.so": "/abc/def/preload.so /def/abc/preload.so",
+		// File contains unrelated entries with no newline (reformatted by customer?)
+		"/tmp/stable/inject/launcher.preload.so /def/abc/preload.so": "/def/abc/preload.so",
+		// File doesn't contain the entry to remove (removed by customer?)
+		"/abc/def/preload.so /def/abc/preload.so": "/abc/def/preload.so /def/abc/preload.so",
+	} {
+		output, err := a.deleteLDPreloadConfigContent([]byte(input))
+		assert.Nil(t, err)
+		assert.Equal(t, expected, string(output))
+	}
+
+	// File is badly formatted (non-breaking space instead of space)
+	input := "/tmp/stable/inject/launcher.preload.so\u00a0/def/abc/preload.so"
+	output, err := a.deleteLDPreloadConfigContent([]byte(input))
+	assert.NotNil(t, err)
+	assert.Equal(t, "", string(output))
+}
+
+func TestSetAgentConfig(t *testing.T) {
+	a := &apmInjectorInstaller{
+		installPath: "/tmp/stable",
+	}
+
+	for input, expected := range map[string]string{
+		// File doesn't exist
+		"": `
+# BEGIN LD PRELOAD CONFIG
+apm_config:
+  receiver_socket: /tmp/stable/inject/run/apm.socket
+use_dogstatsd: true
+dogstatsd_socket: /tmp/stable/inject/run/dsd.socket
+# END LD PRELOAD CONFIG
+`,
+		// File contains unrelated entries
+		`api_key: 000000000
+site: datad0g.com`: `api_key: 000000000
+site: datad0g.com
+# BEGIN LD PRELOAD CONFIG
+apm_config:
+  receiver_socket: /tmp/stable/inject/run/apm.socket
+use_dogstatsd: true
+dogstatsd_socket: /tmp/stable/inject/run/dsd.socket
+# END LD PRELOAD CONFIG
+`,
+		// File already contains the agent config
+		`# BEGIN LD PRELOAD CONFIG
+apm_config:
+  receiver_socket: /tmp/stable/inject/run/apm.socket
+use_dogstatsd: true
+dogstatsd_socket: /tmp/stable/inject/run/dsd.socket
+# END LD PRELOAD CONFIG`: `# BEGIN LD PRELOAD CONFIG
+apm_config:
+  receiver_socket: /tmp/stable/inject/run/apm.socket
+use_dogstatsd: true
+dogstatsd_socket: /tmp/stable/inject/run/dsd.socket
+# END LD PRELOAD CONFIG`,
+	} {
+		output := a.setAgentConfigContent([]byte(input))
+		assert.Equal(t, expected, string(output))
+	}
+}
+
+func TestRemoveAgentConfig(t *testing.T) {
+	a := &apmInjectorInstaller{
+		installPath: "/tmp/stable",
+	}
+
+	for input, expected := range map[string]string{
+		// File doesn't exist
+		"": "",
+		// File only contains the agent config
+		`# BEGIN LD PRELOAD CONFIG
+        apm_config:
+          receiver_socket: /tmp/stable/inject/run/apm.socket
+        use_dogstatsd: true
+        dogstatsd_socket: /tmp/stable/inject/run/dsd.socket
+        # END LD PRELOAD CONFIG`: "",
+		// File contains unrelated entries
+		`api_key: 000000000
+site: datad0g.com
+# BEGIN LD PRELOAD CONFIG
+apm_config:
+  receiver_socket: /tmp/stable/inject/run/apm.socket
+use_dogstatsd: true
+dogstatsd_socket: /tmp/stable/inject/run/dsd.socket
+# END LD PRELOAD CONFIG
+`: `api_key: 000000000
+site: datad0g.com
+
+`,
+		// File **only** contains unrelated entries somehow
+		`api_key: 000000000
+site: datad0g.com`: `api_key: 000000000
+site: datad0g.com`,
+	} {
+		output := a.deleteAgentConfigContent([]byte(input))
+		assert.Equal(t, expected, string(output))
+	}
+}
diff --git a/pkg/updater/service/apm_inject_windows.go b/pkg/updater/service/apm_inject_windows.go
new file mode 100644
index 0000000000000..8bbb49c5c7095
--- /dev/null
+++ b/pkg/updater/service/apm_inject_windows.go
@@ -0,0 +1,19 @@
+// Unless explicitly stated otherwise all files in this repository are licensed
+// under the Apache License Version 2.0.
+// This product includes software developed at Datadog (https://www.datadoghq.com/).
+// Copyright 2016-present Datadog, Inc.
+
+//go:build windows
+
+// Package service provides a way to interact with os services
+package service
+
+// SetupAPMInjector noop
+func SetupAPMInjector() error {
+	return nil
+}
+
+// RemoveAPMInjector noop
+func RemoveAPMInjector() error {
+	return nil
+}
diff --git a/pkg/updater/service/datadog_agent.go b/pkg/updater/service/datadog_agent.go
index e183f9f5f5229..8767e7a20d864 100644
--- a/pkg/updater/service/datadog_agent.go
+++ b/pkg/updater/service/datadog_agent.go
@@ -9,6 +9,9 @@
 package service
 
 import (
+	"os/exec"
+	"strings"
+
 	"github.com/DataDog/datadog-agent/pkg/util/installinfo"
 	"github.com/DataDog/datadog-agent/pkg/util/log"
 )
@@ -52,6 +55,10 @@ func SetupAgentUnits() (err error) {
 		}
 	}()
 
+	if err = setInstallerAgentGroup(); err != nil {
+		return
+	}
+
 	for _, unit := range stableUnits {
 		if err = loadUnit(unit); err != nil {
 			return
@@ -132,3 +139,16 @@ func StartAgentExperiment() error {
 func StopAgentExperiment() error {
 	return startUnit(agentUnit)
 }
+
+// setInstallerAgentGroup adds the dd-installer to the dd-agent group if it's not already in it
+func setInstallerAgentGroup() error {
+	// Get groups of dd-installer
+	out, err := exec.Command("id", "-Gn", "dd-installer").Output()
+	if err != nil {
+		return err
+	}
+	if strings.Contains(string(out), "dd-agent") {
+		return nil
+	}
+	return executeCommand(string(addInstallerToAgentGroup))
+}
diff --git a/pkg/updater/service/docker.go b/pkg/updater/service/docker.go
new file mode 100644
index 0000000000000..c4cdb3fc0de20
--- /dev/null
+++ b/pkg/updater/service/docker.go
@@ -0,0 +1,196 @@
+// Unless explicitly stated otherwise all files in this repository are licensed
+// under the Apache License Version 2.0.
+// This product includes software developed at Datadog (https://www.datadoghq.com/).
+// Copyright 2016-present Datadog, Inc.
+
+//go:build !windows
+
+// Package service provides a way to interact with os services
+package service
+
+import (
+	"bytes"
+	"encoding/json"
+	"os"
+	"os/exec"
+	"path"
+
+	"github.com/DataDog/datadog-agent/pkg/util/log"
+)
+
+type dockerDaemonConfig map[string]interface{}
+
+const (
+	tmpDockerDaemonPath = "/tmp/daemon.json.tmp"
+	dockerDaemonPath    = "/etc/docker/daemon.json"
+)
+
+// setDockerConfig sets up the docker daemon to use the APM injector
+// even if docker isn't installed, to prepare for if it is installed
+// later
+func (a *apmInjectorInstaller) setDockerConfig() error {
+	// Create docker dir if it doesn't exist
+	err := executeCommand(createDockerDirCommand)
+	if err != nil {
+		return err
+	}
+
+	var file []byte
+	stat, err := os.Stat(dockerDaemonPath)
+	if err == nil {
+		// Read the existing configuration
+		file, err = os.ReadFile(dockerDaemonPath)
+		if err != nil {
+			return err
+		}
+	} else if !os.IsNotExist(err) {
+		return err
+	}
+
+	dockerConfigJSON, err := a.setDockerConfigContent(file)
+	if err != nil {
+		return err
+	}
+
+	// Write the new configuration to a temporary file
+	perms := os.FileMode(0644)
+	if stat != nil {
+		perms = stat.Mode()
+	}
+	err = os.WriteFile(tmpDockerDaemonPath, dockerConfigJSON, perms)
+	if err != nil {
+		return err
+	}
+
+	// Move the temporary file to the final location
+	err = executeCommand(string(replaceDockerCommand))
+	if err != nil {
+		return err
+	}
+
+	return restartDocker()
+}
+
+// setDockerConfigContent sets the content of the docker daemon configuration
+func (a *apmInjectorInstaller) setDockerConfigContent(previousContent []byte) ([]byte, error) {
+	dockerConfig := dockerDaemonConfig{}
+
+	if len(previousContent) > 0 {
+		err := json.Unmarshal(previousContent, &dockerConfig)
+		if err != nil {
+			return nil, err
+		}
+	}
+
+	if _, ok := dockerConfig["default-runtime"]; ok {
+		dockerConfig["default-runtime-backup"] = dockerConfig["default-runtime"]
+	}
+	dockerConfig["default-runtime"] = "dd-shim"
+	runtimes, ok := dockerConfig["runtimes"].(map[string]interface{})
+	if !ok {
+		runtimes = map[string]interface{}{}
+	}
+	runtimes["dd-shim"] = map[string]interface{}{
+		"path": path.Join(a.installPath, "inject", "auto_inject_runc"),
+	}
+	dockerConfig["runtimes"] = runtimes
+
+	dockerConfigJSON, err := json.MarshalIndent(dockerConfig, "", "    ")
+	if err != nil {
+		return nil, err
+	}
+
+	return dockerConfigJSON, nil
+}
+
+// deleteDockerConfig restores the docker daemon configuration
+func (a *apmInjectorInstaller) deleteDockerConfig() error {
+	var file []byte
+	stat, err := os.Stat(dockerDaemonPath)
+	if err == nil {
+		// Read the existing configuration
+		file, err = os.ReadFile(dockerDaemonPath)
+		if err != nil {
+			return err
+		}
+	} else if os.IsNotExist(err) {
+		// If the file doesn't exist, there's nothing to do
+		return nil
+	}
+
+	dockerConfigJSON, err := a.deleteDockerConfigContent(file)
+	if err != nil {
+		return err
+	}
+
+	// Write the new configuration to a temporary file
+	perms := os.FileMode(0644)
+	if stat != nil {
+		perms = stat.Mode()
+	}
+	err = os.WriteFile(tmpDockerDaemonPath, dockerConfigJSON, perms)
+	if err != nil {
+		return err
+	}
+
+	// Move the temporary file to the final location
+	err = executeCommand(string(replaceDockerCommand))
+	if err != nil {
+		return err
+	}
+	return restartDocker()
+}
+
+// deleteDockerConfigContent restores the content of the docker daemon configuration
+func (a *apmInjectorInstaller) deleteDockerConfigContent(previousContent []byte) ([]byte, error) {
+	dockerConfig := dockerDaemonConfig{}
+
+	if len(previousContent) > 0 {
+		err := json.Unmarshal(previousContent, &dockerConfig)
+		if err != nil {
+			return nil, err
+		}
+	}
+
+	if _, ok := dockerConfig["default-runtime-backup"]; ok {
+		dockerConfig["default-runtime"] = dockerConfig["default-runtime-backup"]
+		delete(dockerConfig, "default-runtime-backup")
+	} else {
+		dockerConfig["default-runtime"] = "runc"
+	}
+	runtimes, ok := dockerConfig["runtimes"].(map[string]interface{})
+	if !ok {
+		runtimes = map[string]interface{}{}
+	}
+	delete(runtimes, "dd-shim")
+	dockerConfig["runtimes"] = runtimes
+
+	dockerConfigJSON, err := json.MarshalIndent(dockerConfig, "", "    ")
+	if err != nil {
+		return nil, err
+	}
+
+	return dockerConfigJSON, nil
+}
+
+// restartDocker reloads the docker daemon if it exists
+func restartDocker() error {
+	if !isDockerInstalled() {
+		log.Info("updater: docker is not installed, skipping reload")
+		return nil
+	}
+	return executeCommand(restartDockerCommand)
+}
+
+// isDockerInstalled checks if docker is installed on the system
+func isDockerInstalled() bool {
+	cmd := exec.Command("which", "docker")
+	var outb bytes.Buffer
+	cmd.Stdout = &outb
+	err := cmd.Run()
+	if err != nil {
+		log.Warn("updater: failed to check if docker is installed, assuming it isn't: ", err)
+		return false
+	}
+	return len(outb.String()) != 0
+}
diff --git a/pkg/updater/service/docker_test.go b/pkg/updater/service/docker_test.go
new file mode 100644
index 0000000000000..912a4d680a606
--- /dev/null
+++ b/pkg/updater/service/docker_test.go
@@ -0,0 +1,137 @@
+// Unless explicitly stated otherwise all files in this repository are licensed
+// under the Apache License Version 2.0.
+// This product includes software developed at Datadog (https://www.datadoghq.com/).
+// Copyright 2016-present Datadog, Inc.
+
+//go:build !windows
+
+// Package service provides a way to interact with os services
+package service
+
+import (
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+)
+
+func TestSetDockerConfig(t *testing.T) {
+	a := &apmInjectorInstaller{
+		installPath: "/tmp/stable",
+	}
+
+	for input, expected := range map[string]string{
+		// File doesn't exist
+		"": `{
+    "default-runtime": "dd-shim",
+    "runtimes": {
+        "dd-shim": {
+            "path": "/tmp/stable/inject/auto_inject_runc"
+        }
+    }
+}`,
+		// File contains unrelated entries
+		`{
+    "cgroup-parent": "abc",
+    "raw-logs": false
+}`: `{
+    "cgroup-parent": "abc",
+    "default-runtime": "dd-shim",
+    "raw-logs": false,
+    "runtimes": {
+        "dd-shim": {
+            "path": "/tmp/stable/inject/auto_inject_runc"
+        }
+    }
+}`,
+		// File has already overridden the default runtime
+		`{
+    "default-runtime": "containerd",
+    "runtimes": {
+        "containerd": {
+            "path": "/usr/bin/containerd"
+        }
+    }
+}`: `{
+    "default-runtime": "dd-shim",
+    "default-runtime-backup": "containerd",
+    "runtimes": {
+        "containerd": {
+            "path": "/usr/bin/containerd"
+        },
+        "dd-shim": {
+            "path": "/tmp/stable/inject/auto_inject_runc"
+        }
+    }
+}`,
+	} {
+		output, err := a.setDockerConfigContent([]byte(input))
+		assert.Nil(t, err)
+		assert.Equal(t, expected, string(output))
+	}
+}
+
+func TestRemoveDockerConfig(t *testing.T) {
+	a := &apmInjectorInstaller{
+		installPath: "/tmp/stable",
+	}
+
+	for input, expected := range map[string]string{
+		// Empty file, shouldn't happen but still tested
+		"": `{
+    "default-runtime": "runc",
+    "runtimes": {}
+}`,
+		// File only contains the injected content
+		`{
+			"default-runtime": "dd-shim",
+			"runtimes": {
+				"dd-shim": {
+					"path": "/tmp/stable/inject/auto_inject_runc"
+				}
+			}
+		}`: `{
+    "default-runtime": "runc",
+    "runtimes": {}
+}`,
+		// File contained unrelated entries
+		`{
+    "cgroup-parent": "abc",
+    "default-runtime": "dd-shim",
+    "raw-logs": false,
+    "runtimes": {
+        "dd-shim": {
+            "path": "/tmp/stable/inject/auto_inject_runc"
+        }
+    }
+}`: `{
+    "cgroup-parent": "abc",
+    "default-runtime": "runc",
+    "raw-logs": false,
+    "runtimes": {}
+}`,
+		// File had already overridden the default runtime
+		`{
+    "default-runtime": "dd-shim",
+	"default-runtime-backup": "containerd",
+    "runtimes": {
+        "containerd": {
+            "path": "/usr/bin/containerd"
+        },
+        "dd-shim": {
+            "path": "/tmp/stable/inject/auto_inject_runc"
+        }
+    }
+}`: `{
+    "default-runtime": "containerd",
+    "runtimes": {
+        "containerd": {
+            "path": "/usr/bin/containerd"
+        }
+    }
+}`,
+	} {
+		output, err := a.deleteDockerConfigContent([]byte(input))
+		assert.Nil(t, err)
+		assert.Equal(t, expected, string(output))
+	}
+}
diff --git a/pkg/updater/service/helper/main.go b/pkg/updater/service/helper/main.go
index 37f9ac13a06c6..a20a1de7a99c1 100644
--- a/pkg/updater/service/helper/main.go
+++ b/pkg/updater/service/helper/main.go
@@ -8,6 +8,7 @@
 package main
 
 import (
+	"bytes"
 	"encoding/json"
 	"fmt"
 	"log"
@@ -25,6 +26,8 @@ var (
 	installPath string
 	systemdPath = "/lib/systemd/system" // todo load it at build time from omnibus
 	pkgDir      = "/opt/datadog-packages"
+	agentDir    = "/etc/datadog-agent"
+	dockerDir   = "/etc/docker"
 	testSkipUID = ""
 )
 
@@ -36,6 +39,7 @@ type privilegeCommand struct {
 	Command string `json:"command,omitempty"`
 	Unit    string `json:"unit,omitempty"`
 	Path    string `json:"path,omitempty"`
+	Content string `json:"content,omitempty"`
 }
 
 func isValidUnitChar(c rune) bool {
@@ -66,6 +70,16 @@ func buildCommand(inputCommand privilegeCommand) (*exec.Cmd, error) {
 		return exec.Command("ln", "-sf", "/opt/datadog-packages/datadog-agent/stable/bin/agent/agent", "/usr/bin/datadog-agent"), nil
 	case "rm-agent-symlink":
 		return exec.Command("rm", "-f", "/usr/bin/datadog-agent"), nil
+	case "create-docker-dir":
+		return exec.Command("mkdir", "-p", "/etc/docker"), nil
+	case "replace-docker":
+		return exec.Command("mv", "/tmp/daemon.json.tmp", "/etc/docker/daemon.json"), nil
+	case "restart-docker":
+		return exec.Command("systemctl", "restart", "docker"), nil
+	case "replace-ld-preload":
+		return exec.Command("mv", "/tmp/ld.so.preload.tmp", "/etc/ld.so.preload"), nil
+	case "add-installer-to-agent-group":
+		return exec.Command("usermod", "-aG", "dd-agent", "dd-installer"), nil
 	default:
 		return nil, fmt.Errorf("invalid command")
 	}
@@ -99,7 +113,7 @@ func buildPathCommand(inputCommand privilegeCommand) (*exec.Cmd, error) {
 	if absPath != path || err != nil {
 		return nil, fmt.Errorf("invalid path")
 	}
-	if !strings.HasPrefix(path, pkgDir) {
+	if !strings.HasPrefix(path, pkgDir) && !strings.HasPrefix(path, agentDir) {
 		return nil, fmt.Errorf("invalid path")
 	}
 	switch inputCommand.Command {
@@ -107,6 +121,10 @@ func buildPathCommand(inputCommand privilegeCommand) (*exec.Cmd, error) {
 		return exec.Command("chown", "-R", "dd-agent:dd-agent", path), nil
 	case "rm":
 		return exec.Command("rm", "-rf", path), nil
+	case "backup-file":
+		return exec.Command("cp", "-f", path, path+".bak"), nil
+	case "restore-file":
+		return exec.Command("mv", path+".bak", path), nil
 	default:
 		return nil, fmt.Errorf("invalid command")
 	}
@@ -121,7 +139,7 @@ func executeCommand() error {
 	var pc privilegeCommand
 	err := json.Unmarshal([]byte(inputCommand), &pc)
 	if err != nil {
-		return fmt.Errorf("decoding command")
+		return fmt.Errorf("decoding command %s", inputCommand)
 	}
 
 	currentUser := syscall.Getuid()
@@ -150,8 +168,14 @@ func executeCommand() error {
 		}()
 	}
 
+	commandErr := new(bytes.Buffer)
+	command.Stderr = commandErr
 	log.Printf("Running command: %s", command.String())
-	return command.Run()
+	err = command.Run()
+	if err != nil {
+		return fmt.Errorf("running command (%s): %s", err.Error(), commandErr.String())
+	}
+	return nil
 }
 
 func main() {
diff --git a/pkg/updater/service/systemd.go b/pkg/updater/service/systemd.go
index 21f70d94b0fef..2f384b010ad2e 100644
--- a/pkg/updater/service/systemd.go
+++ b/pkg/updater/service/systemd.go
@@ -10,25 +10,58 @@ package service
 
 import (
 	"encoding/json"
+	"os"
+	"path"
+
+	"github.com/DataDog/datadog-agent/pkg/util/log"
 )
 
 type unitCommand string
 
+var (
+	systemdPath = "/lib/systemd/system" // todo load it at build time from omnibus
+)
+
 const (
-	startCommand         unitCommand = "start"
-	stopCommand          unitCommand = "stop"
-	enableCommand        unitCommand = "enable"
-	disableCommand       unitCommand = "disable"
-	loadCommand          unitCommand = "load-unit"
-	removeCommand        unitCommand = "remove-unit"
-	systemdReloadCommand             = `{"command":"systemd-reload"}`
-	adminExecutor                    = "datadog-updater-admin.service"
+	startCommand             unitCommand = "start"
+	stopCommand              unitCommand = "stop"
+	enableCommand            unitCommand = "enable"
+	disableCommand           unitCommand = "disable"
+	loadCommand              unitCommand = "load-unit"
+	removeCommand            unitCommand = "remove-unit"
+	addInstallerToAgentGroup unitCommand = "add-installer-to-agent-group"
+	backupCommand            unitCommand = `backup-file`
+	restoreCommand           unitCommand = `restore-file`
+	replaceDockerCommand                 = `{"command":"replace-docker"}`
+	restartDockerCommand                 = `{"command":"restart-docker"}`
+	createDockerDirCommand               = `{"command":"create-docker-dir"}`
+	replaceLDPreloadCommand              = `{"command":"replace-ld-preload"}`
+	systemdReloadCommand                 = `{"command":"systemd-reload"}`
+	adminExecutor                        = "datadog-updater-admin.service"
 )
 
 type privilegeCommand struct {
 	Command string `json:"command,omitempty"`
 	Unit    string `json:"unit,omitempty"`
 	Path    string `json:"path,omitempty"`
+	Content string `json:"content,omitempty"`
+}
+
+// restartUnit restarts a systemd unit
+func restartUnit(unit string) error {
+	// check that the unit exists first
+	if _, err := os.Stat(path.Join(systemdPath, unit)); os.IsNotExist(err) {
+		log.Infof("Unit %s does not exist, skipping restart", unit)
+		return nil
+	}
+
+	if err := stopUnit(unit); err != nil {
+		return err
+	}
+	if err := startUnit(unit); err != nil {
+		return err
+	}
+	return nil
 }
 
 func stopUnit(unit string) error {
@@ -68,3 +101,12 @@ func wrapUnitCommand(command unitCommand, unit string) string {
 	}
 	return string(rawJSON)
 }
+
+func executeCommandStruct(command privilegeCommand) error {
+	rawJSON, err := json.Marshal(command)
+	if err != nil {
+		return err
+	}
+	privilegeCommandJSON := string(rawJSON)
+	return executeCommand(privilegeCommandJSON)
+}
diff --git a/pkg/updater/service/systemd_test.go b/pkg/updater/service/systemd_test.go
index 85f48151561e4..51212f6caa015 100644
--- a/pkg/updater/service/systemd_test.go
+++ b/pkg/updater/service/systemd_test.go
@@ -26,8 +26,8 @@ func TestInvalidCommands(t *testing.T) {
 	// assert wrong commands
 	for input, expected := range map[string]string{
 		// fail assert_command characters assertion
-		";": "error: decoding command\n",
-		"&": "error: decoding command\n",
+		";": "error: decoding command ;\n",
+		"&": "error: decoding command &\n",
 		`{"command":"start", "unit":"does-not-exist"}`:                       "error: invalid unit\n",
 		`{"command":"start", "unit":"datadog-//"}`:                           "error: invalid unit\n",
 		`{"command":"does-not-exist", "unit":"datadog-"}`:                    "error: invalid command\n",
@@ -55,4 +55,13 @@ func TestAssertWorkingCommands(t *testing.T) {
 	assert.Equal(t, successErr, removeUnit("datadog-agent").Error())
 	assert.Equal(t, successErr, createAgentSymlink().Error())
 	assert.Equal(t, successErr, rmAgentSymlink().Error())
+	assert.Equal(t, successErr, backupAgentConfig().Error())
+	assert.Equal(t, successErr, restoreAgentConfig().Error())
+
+	a := &apmInjectorInstaller{
+		installPath: "/tmp/stable",
+	}
+	assert.Equal(t, successErr, a.setLDPreloadConfig().Error())
+	assert.Equal(t, successErr, a.setAgentConfig().Error())
+	assert.Equal(t, successErr, a.setDockerConfig().Error())
 }
diff --git a/pkg/updater/updater.go b/pkg/updater/updater.go
index 76ca92b255d31..766952a8ab57b 100644
--- a/pkg/updater/updater.go
+++ b/pkg/updater/updater.go
@@ -109,6 +109,9 @@ func Purge() {
 
 func purge(locksPath, repositoryPath string) {
 	service.RemoveAgentUnits()
+	if err := service.RemoveAPMInjector(); err != nil {
+		log.Warnf("updater: could not remove APM injector: %v", err)
+	}
 	cleanDir(locksPath, os.RemoveAll)
 	cleanDir(repositoryPath, service.RemoveAll)
 }
@@ -220,7 +223,7 @@ func (u *updaterImpl) BootstrapDefault(ctx context.Context, pkg string) (err err
 
 	stablePackage, ok := u.catalog.getDefaultPackage(u.bootstrapVersions, pkg, runtime.GOARCH, runtime.GOOS)
 	if !ok {
-		return fmt.Errorf("could not get default package %s for %s, %s", pkg, runtime.GOARCH, runtime.GOOS)
+		return fmt.Errorf("could not get default package '%s' for arch '%s' and platform '%s'", pkg, runtime.GOARCH, runtime.GOOS)
 	}
 	return u.boostrapPackage(ctx, stablePackage.URL, stablePackage.Name, stablePackage.Version)
 }
@@ -236,7 +239,7 @@ func (u *updaterImpl) BootstrapVersion(ctx context.Context, pkg string, version
 
 	stablePackage, ok := u.catalog.getPackage(pkg, version, runtime.GOARCH, runtime.GOOS)
 	if !ok {
-		return fmt.Errorf("could not get package %s version %s for %s, %s", pkg, version, runtime.GOARCH, runtime.GOOS)
+		return fmt.Errorf("could not get package '%s' version '%s' for arch '%s' and platform '%s'", pkg, version, runtime.GOARCH, runtime.GOOS)
 	}
 	return u.boostrapPackage(ctx, stablePackage.URL, stablePackage.Name, stablePackage.Version)
 }
diff --git a/test/new-e2e/tests/updater/docker.go b/test/new-e2e/tests/updater/docker.go
new file mode 100644
index 0000000000000..3762f53f589d4
--- /dev/null
+++ b/test/new-e2e/tests/updater/docker.go
@@ -0,0 +1,91 @@
+// Unless explicitly stated otherwise all files in this repository are licensed
+// under the Apache License Version 2.0.
+// This product includes software developed at Datadog (https://www.datadoghq.com/).
+// Copyright 2016-present Datadog, Inc.
+
+// Package updater contains tests for the updater package
+package updater
+
+import (
+	"testing"
+	"time"
+
+	"github.com/DataDog/datadog-agent/test/new-e2e/pkg/components"
+	"github.com/DataDog/test-infra-definitions/components/os"
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
+)
+
+// installDocker installs docker on the host
+func installDocker(distro os.Descriptor, t *testing.T, host *components.RemoteHost) {
+	switch distro {
+	case os.UbuntuDefault:
+		_, err := host.WriteFile("/tmp/install-docker.sh", []byte(`
+sudo apt-get update
+sudo apt-get install ca-certificates curl
+sudo install -m 0755 -d /etc/apt/keyrings
+sudo curl -fsSL https://download.docker.com/linux/ubuntu/gpg -o /etc/apt/keyrings/docker.asc
+sudo chmod a+r /etc/apt/keyrings/docker.asc
+echo \
+	"deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.asc] https://download.docker.com/linux/ubuntu \
+	$(. /etc/os-release && echo "$VERSION_CODENAME") stable" | \
+	sudo tee /etc/apt/sources.list.d/docker.list > /dev/null
+sudo apt-get update
+sudo apt-get install -y docker-ce docker-ce-cli containerd.io docker-buildx-plugin docker-compose-plugin
+		`))
+		require.Nil(t, err)
+		host.MustExecute(`sudo chmod +x /tmp/install-docker.sh`)
+		host.MustExecute(`sudo /tmp/install-docker.sh`)
+		err = host.Remove("/tmp/install-docker.sh")
+		require.Nil(t, err)
+	case os.DebianDefault:
+		_, err := host.WriteFile("/tmp/install-docker.sh", []byte(`
+sudo apt-get update
+sudo apt-get install ca-certificates curl
+sudo install -m 0755 -d /etc/apt/keyrings
+sudo curl -fsSL https://download.docker.com/linux/debian/gpg -o /etc/apt/keyrings/docker.asc
+sudo chmod a+r /etc/apt/keyrings/docker.asc
+
+# Add the repository to Apt sources:
+echo \
+	"deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.asc] https://download.docker.com/linux/debian \
+	$(. /etc/os-release && echo "$VERSION_CODENAME") stable" | \
+	sudo tee /etc/apt/sources.list.d/docker.list > /dev/null
+sudo apt-get update
+sudo apt-get install -y docker-ce docker-ce-cli containerd.io docker-buildx-plugin docker-compose-plugin
+	`))
+		require.Nil(t, err)
+		host.MustExecute(`sudo chmod +x /tmp/install-docker.sh`)
+		host.MustExecute(`sudo /tmp/install-docker.sh`)
+		err = host.Remove("/tmp/install-docker.sh")
+		require.Nil(t, err)
+	case os.CentOSDefault, os.RedHatDefault:
+		_, err := host.WriteFile("/tmp/install-docker.sh", []byte(`
+sudo yum install -y yum-utils
+sudo yum-config-manager --add-repo https://download.docker.com/linux/centos/docker-ce.repo
+sudo yum install -y docker-ce docker-ce-cli containerd.io docker-buildx-plugin docker-compose-plugin
+sudo systemctl start docker
+		`))
+		require.Nil(t, err)
+		host.MustExecute(`sudo chmod +x /tmp/install-docker.sh`)
+		host.MustExecute(`sudo /tmp/install-docker.sh`)
+		err = host.Remove("/tmp/install-docker.sh")
+		require.Nil(t, err)
+	default:
+		t.Fatalf("unsupported distro: %s", distro.String())
+	}
+}
+
+// launchJavaDockerContainer launches a small Java HTTP server in a docker container
+// and make a call to it
+func launchJavaDockerContainer(t *testing.T, host *components.RemoteHost) {
+	host.MustExecute(`sudo docker run -d -p8887:8888 baptistefoy702/message-server:latest`)
+	// for i := 0; i < 10; i++ {
+	assert.Eventually(t,
+		func() bool {
+			_, err := host.Execute(`curl -m 1 localhost:8887/messages`)
+			return err == nil
+		}, 10*time.Second, 100*time.Millisecond,
+	)
+	// }
+}
diff --git a/test/new-e2e/tests/updater/linux_test.go b/test/new-e2e/tests/updater/linux_test.go
index 4d49c168ef955..59280eaf1f0f7 100644
--- a/test/new-e2e/tests/updater/linux_test.go
+++ b/test/new-e2e/tests/updater/linux_test.go
@@ -12,6 +12,7 @@ import (
 	"regexp"
 	"strings"
 	"testing"
+	"time"
 
 	"github.com/DataDog/test-infra-definitions/components/os"
 	"github.com/DataDog/test-infra-definitions/scenarios/aws/ec2"
@@ -37,12 +38,14 @@ const (
 type vmUpdaterSuite struct {
 	e2e.BaseSuite[environments.Host]
 	packageManager string
+	distro         os.Descriptor
+	arch           os.Architecture
 }
 
 func runTest(t *testing.T, pkgManager string, arch os.Architecture, distro os.Descriptor) {
 	reg := regexp.MustCompile(`[^a-zA-Z0-9_\-.]`)
 	testName := reg.ReplaceAllString(distro.String()+"-"+string(arch), "_")
-	e2e.Run(t, &vmUpdaterSuite{packageManager: pkgManager}, e2e.WithProvisioner(awshost.ProvisionerNoFakeIntake(
+	e2e.Run(t, &vmUpdaterSuite{packageManager: pkgManager, distro: distro, arch: arch}, e2e.WithProvisioner(awshost.ProvisionerNoFakeIntake(
 		awshost.WithUpdater(),
 		awshost.WithEC2InstanceOptions(ec2.WithOSArch(distro, arch)),
 	)),
@@ -202,6 +205,142 @@ func (v *vmUpdaterSuite) TestPurgeAndInstallAgent() {
 	}
 }
 
+func (v *vmUpdaterSuite) TestPurgeAndInstallAPMInjector() {
+	// Temporarily disable CentOS & Redhat, as there is a bug in the APM injector
+	if v.distro == os.CentOSDefault || v.distro == os.RedHatDefault {
+		v.T().Skip("APM injector not available for CentOS or RedHat yet")
+	}
+	if v.distro == os.DebianDefault || v.distro == os.UbuntuDefault && v.arch == os.AMD64Arch {
+		// TODO (baptiste): Fix test
+		v.T().Skip("Test has been temporarily disabled")
+	}
+
+	host := v.Env().RemoteHost
+
+	///////////////////
+	// Setup machine //
+	///////////////////
+
+	host.MustExecute(fmt.Sprintf("sudo %v/bin/installer/installer purge", bootUpdaterDir))
+	// Install docker
+	installDocker(v.distro, v.T(), host)
+	defer func() {
+		// Best effort to stop any running container at the end of the test
+		host.Execute(`sudo docker ps -aq | xargs sudo docker stop | xargs sudo docker rm`)
+	}()
+
+	/////////////////////////
+	// Check initial state //
+	/////////////////////////
+
+	// packages dir exists; but there are no packages installed
+	host.MustExecute(`test -d /opt/datadog-packages`)
+	_, err := host.Execute(`test -d /opt/datadog-packages/datadog-apm-inject`)
+	require.NotNil(v.T(), err)
+	_, err = host.Execute(`test -d /opt/datadog-packages/datadog-agent`)
+	require.NotNil(v.T(), err)
+	_, err = host.Execute(`test -d /opt/datadog-packages/datadog-apm-library-java`)
+	require.NotNil(v.T(), err)
+
+	// /etc/ld.so.preload does not contain the injector
+	_, err = host.Execute(`grep "/opt/datadog-packages/datadog-apm-inject" /etc/ld.so.preload`)
+	require.NotNil(v.T(), err)
+
+	// docker daemon does not contain the injector
+	_, err = host.Execute(`grep "/opt/datadog-packages/datadog-apm-inject" /etc/docker/daemon.json`)
+	require.NotNil(v.T(), err)
+
+	////////////////////////
+	// Bootstrap packages //
+	////////////////////////
+
+	host.MustExecute(fmt.Sprintf(`sudo %v/bin/installer/installer bootstrap --url "oci://docker.io/datadog/agent-package-dev:7.54.0-devel.git.247.f92fbc1.pipeline.31778392-1"`, bootUpdaterDir))
+	host.MustExecute(fmt.Sprintf(`sudo %v/bin/installer/installer bootstrap --url "oci://docker.io/datadog/apm-library-java-package-dev:1.32.0-SNAPSHOT-8708864e8e-pipeline.30373268.beta.8708864e-1"`, bootUpdaterDir))
+	host.MustExecute(fmt.Sprintf(`sudo %v/bin/installer/installer bootstrap --url "oci://docker.io/datadog/apm-inject-package-dev:0.12.3-dev.bddec85.glci481808135.g8acdc698-1"`, bootUpdaterDir))
+
+	////////////////////////////////
+	// Check post-bootstrap state //
+	////////////////////////////////
+
+	// assert packages dir exist
+	host.MustExecute(`test -L /opt/datadog-packages/datadog-agent/stable`)
+	host.MustExecute(`test -L /opt/datadog-packages/datadog-apm-library-java/stable`)
+	host.MustExecute(`test -L /opt/datadog-packages/datadog-apm-inject/stable`)
+
+	// assert /etc/ld.so.preload contains the injector
+	res, err := host.Execute(`grep "/opt/datadog-packages/datadog-apm-inject" /etc/ld.so.preload`)
+	require.Nil(v.T(), err)
+	require.Equal(v.T(), "/opt/datadog-packages/datadog-apm-inject/stable/inject/launcher.preload.so\n", res)
+
+	// assert docker daemon contains the injector (removing blank spaces for easier comparison)
+	res, err = host.Execute(`grep "/opt/datadog-packages/datadog-apm-inject" /etc/docker/daemon.json | sed -re 's/^[[:blank:]]+|[[:blank:]]+$//g' -e 's/[[:blank:]]+/ /g'`)
+	require.Nil(v.T(), err)
+	require.Equal(v.T(), "\"path\": \"/opt/datadog-packages/datadog-apm-inject/stable/inject/auto_inject_runc\"\n", res)
+
+	// assert agent config has been changed
+	raw, err := host.ReadFile("/etc/datadog-agent/datadog.yaml")
+	require.Nil(v.T(), err)
+	require.True(v.T(), strings.Contains(string(raw), "# BEGIN LD PRELOAD CONFIG"), "missing LD_PRELOAD config, config:\n%s", string(raw))
+
+	// assert agent is running
+	host.MustExecute("sudo systemctl status datadog-agent.service")
+
+	_, err = host.Execute("sudo systemctl status datadog-agent-trace.service")
+	require.Nil(v.T(), err)
+
+	// assert required files exist
+	requiredFiles := []string{
+		"auto_inject_runc",
+		"launcher.preload.so",
+		"ld.so.preload",
+		"musl-launcher.preload.so",
+		"process",
+	}
+	for _, file := range requiredFiles {
+		host.MustExecute(fmt.Sprintf("test -f /opt/datadog-packages/datadog-apm-inject/stable/inject/%s", file))
+	}
+
+	// assert file ownerships
+	injectorDir := "/opt/datadog-packages/datadog-apm-inject"
+	require.Equal(v.T(), "dd-installer\n", host.MustExecute(`stat -c "%U" `+injectorDir))
+	require.Equal(v.T(), "dd-installer\n", host.MustExecute(`stat -c "%G" `+injectorDir))
+	require.Equal(v.T(), "drwxr-xr-x\n", host.MustExecute(`stat -c "%A" `+injectorDir))
+	require.Equal(v.T(), "1\n", host.MustExecute(`sudo ls -l /opt/datadog-packages/datadog-apm-inject | awk '$9 != "stable" && $3 == "dd-installer" && $4 == "dd-installer"' | wc -l`))
+
+	/////////////////////////////////////
+	// Check injection with a real app //
+	/////////////////////////////////////
+
+	launchJavaDockerContainer(v.T(), host)
+
+	// check "Dropping Payload due to non-retryable error" in trace agent logs
+	// as we don't have an API key the payloads can't be flushed successfully,
+	// but this log indicates that the trace agent managed to receive the payload
+	require.Eventually(v.T(), func() bool {
+		_, err := host.Execute(`cat /var/log/datadog/trace-agent.log | grep "Dropping Payload due to non-retryable error"`)
+		return err == nil
+	}, 30*time.Second, 100*time.Millisecond)
+
+	///////////////////////
+	// Check purge state //
+	///////////////////////
+
+	host.MustExecute(fmt.Sprintf("sudo %v/bin/installer/installer purge", bootUpdaterDir))
+
+	_, err = host.Execute(`test -d /opt/datadog-packages/datadog-apm-inject`)
+	require.NotNil(v.T(), err)
+	_, err = host.Execute(`test -d /opt/datadog-packages/datadog-agent`)
+	require.NotNil(v.T(), err)
+	_, err = host.Execute(`test -d /opt/datadog-packages/datadog-apm-library-java`)
+	require.NotNil(v.T(), err)
+	_, err = host.Execute(`grep "/opt/datadog-packages/datadog-apm-inject" /etc/ld.so.preload`)
+	require.NotNil(v.T(), err)
+	_, err = host.Execute(`grep "/opt/datadog-packages/datadog-apm-inject" /etc/docker/daemon.json`)
+	require.NotNil(v.T(), err)
+	_, err = host.Execute(`test -f /etc/docker/daemon.json.bak`)
+	require.NotNil(v.T(), err)
+}
+
 func assertInstallMethod(v *vmUpdaterSuite, t *testing.T, host *components.RemoteHost) {
 	rawYaml, err := host.ReadFile(filepath.Join(confDir, "install_info"))
 	assert.Nil(t, err)

From 1f33b617699c27397ea2b6f668c686f85d83f9c6 Mon Sep 17 00:00:00 2001
From: Dylan Yang <dylan.yang@datadoghq.com>
Date: Fri, 12 Apr 2024 16:03:46 -0400
Subject: [PATCH 9/9] [SVLS-4142] Create a Lambda span on timeouts (#21481)

* create a Lambda span on timeouts

* don't create a cold start span when the runtime restarts during timeouts

* fix linting

* fix test

* lint: rename name variables

* lint again

* small fixes

* refactor timeout span logic

* add mutexes

* fix span completed check

* revert refactor

* remove cold start span changes

* use mutex over rwmutex

* test routes

* add comment + update tests

* test endExecutionSpan

* add serverless.go test

* add test /hello for route

* only set span incomplete when /startInvocation has been hit

* time out -> timeout

Co-authored-by: Duncan Harvey <35278470+duncanpharvey@users.noreply.github.com>

---------

Co-authored-by: Duncan Harvey <35278470+duncanpharvey@users.noreply.github.com>
---
 cmd/serverless/main.go                        |   2 +-
 pkg/serverless/daemon/daemon.go               |  30 +++++
 pkg/serverless/daemon/routes.go               |   4 +
 pkg/serverless/daemon/routes_test.go          |  54 ++++++++
 .../invocationlifecycle/invocation_details.go |   1 +
 .../invocationlifecycle/lifecycle.go          |  53 ++++----
 .../invocationlifecycle/lifecycle_test.go     | 117 ++++++++++++++++++
 pkg/serverless/invocationlifecycle/trace.go   |  43 +++++--
 .../invocationlifecycle/trace_test.go         |  48 +++++++
 pkg/serverless/serverless.go                  |  22 ++++
 pkg/serverless/serverless_test.go             |  40 ++++++
 11 files changed, 379 insertions(+), 35 deletions(-)

diff --git a/cmd/serverless/main.go b/cmd/serverless/main.go
index b974114acbe6e..dd13654bc35be 100644
--- a/cmd/serverless/main.go
+++ b/cmd/serverless/main.go
@@ -287,7 +287,7 @@ func runAgent() {
 		ExtraTags:            serverlessDaemon.ExtraTags,
 		Demux:                serverlessDaemon.MetricAgent.Demux,
 		ProcessTrace:         ta.Process,
-		DetectLambdaLibrary:  func() bool { return serverlessDaemon.LambdaLibraryDetected },
+		DetectLambdaLibrary:  serverlessDaemon.IsLambdaLibraryDetected,
 		InferredSpansEnabled: inferredspan.IsInferredSpansEnabled(),
 	}
 
diff --git a/pkg/serverless/daemon/daemon.go b/pkg/serverless/daemon/daemon.go
index 21386b9653449..58bc1ac85190e 100644
--- a/pkg/serverless/daemon/daemon.go
+++ b/pkg/serverless/daemon/daemon.go
@@ -66,6 +66,15 @@ type Daemon struct {
 	// LambdaLibraryDetected represents whether the Datadog Lambda Library was detected in the environment
 	LambdaLibraryDetected bool
 
+	// LambdaLibraryStateLock keeps track of whether the Datadog Lambda Library was detected in the environment
+	LambdaLibraryStateLock sync.Mutex
+
+	// executionSpanIncomplete indicates whether the Lambda span has been completed by the Extension
+	executionSpanIncomplete bool
+
+	// ExecutionSpanStateLock keeps track of whether the serverless Invocation routes have been hit to complete the execution span
+	ExecutionSpanStateLock sync.Mutex
+
 	// runtimeStateMutex is used to ensure that modifying the state of the runtime is thread-safe
 	runtimeStateMutex sync.Mutex
 
@@ -435,3 +444,24 @@ func (d *Daemon) setTraceTags(tagMap map[string]string) bool {
 	}
 	return false
 }
+
+// IsLambdaLibraryDetected returns if the Lambda Library is in use
+func (d *Daemon) IsLambdaLibraryDetected() bool {
+	d.LambdaLibraryStateLock.Lock()
+	defer d.LambdaLibraryStateLock.Unlock()
+	return d.LambdaLibraryDetected
+}
+
+// IsExecutionSpanIncomplete checks if the Lambda execution span was finished
+func (d *Daemon) IsExecutionSpanIncomplete() bool {
+	d.ExecutionSpanStateLock.Lock()
+	defer d.ExecutionSpanStateLock.Unlock()
+	return d.executionSpanIncomplete
+}
+
+// SetExecutionSpanIncomplete keeps track of whether the Extension completed the Lambda execution span
+func (d *Daemon) SetExecutionSpanIncomplete(spanIncomplete bool) {
+	d.ExecutionSpanStateLock.Lock()
+	defer d.ExecutionSpanStateLock.Unlock()
+	d.executionSpanIncomplete = spanIncomplete
+}
diff --git a/pkg/serverless/daemon/routes.go b/pkg/serverless/daemon/routes.go
index 1b2379d8e1822..93e113782dbb8 100644
--- a/pkg/serverless/daemon/routes.go
+++ b/pkg/serverless/daemon/routes.go
@@ -26,6 +26,8 @@ type Hello struct {
 //nolint:revive // TODO(SERV) Fix revive linter
 func (h *Hello) ServeHTTP(w http.ResponseWriter, r *http.Request) {
 	log.Debug("Hit on the serverless.Hello route.")
+	h.daemon.LambdaLibraryStateLock.Lock()
+	defer h.daemon.LambdaLibraryStateLock.Unlock()
 	h.daemon.LambdaLibraryDetected = true
 }
 
@@ -53,6 +55,7 @@ type StartInvocation struct {
 
 func (s *StartInvocation) ServeHTTP(w http.ResponseWriter, r *http.Request) {
 	log.Debug("Hit on the serverless.StartInvocation route.")
+	s.daemon.SetExecutionSpanIncomplete(true)
 	startTime := time.Now()
 	reqBody, err := io.ReadAll(r.Body)
 	if err != nil {
@@ -86,6 +89,7 @@ type EndInvocation struct {
 
 func (e *EndInvocation) ServeHTTP(w http.ResponseWriter, r *http.Request) {
 	log.Debug("Hit on the serverless.EndInvocation route.")
+	e.daemon.SetExecutionSpanIncomplete(false)
 	endTime := time.Now()
 	ecs := e.daemon.ExecutionContext.GetCurrentState()
 	coldStartTags := e.daemon.ExecutionContext.GetColdStartTagsForRequestID(ecs.LastRequestID)
diff --git a/pkg/serverless/daemon/routes_test.go b/pkg/serverless/daemon/routes_test.go
index eab3e09e6be02..0cdae0c594057 100644
--- a/pkg/serverless/daemon/routes_test.go
+++ b/pkg/serverless/daemon/routes_test.go
@@ -161,6 +161,30 @@ func TestTraceContext(t *testing.T) {
 	}
 }
 
+func TestHello(t *testing.T) {
+	assert := assert.New(t)
+
+	port := testutil.FreeTCPPort(t)
+	d := StartDaemon(fmt.Sprintf("127.0.0.1:%d", port))
+	time.Sleep(100 * time.Millisecond)
+	defer d.Stop()
+	d.InvocationProcessor = &invocationlifecycle.LifecycleProcessor{
+		ExtraTags:           d.ExtraTags,
+		Demux:               nil,
+		ProcessTrace:        nil,
+		DetectLambdaLibrary: d.IsLambdaLibraryDetected,
+	}
+	client := &http.Client{}
+	body := bytes.NewBuffer([]byte(`{}`))
+	request, err := http.NewRequest(http.MethodPost, fmt.Sprintf("http://127.0.0.1:%d/lambda/hello", port), body)
+	assert.Nil(err)
+	assert.False(d.IsLambdaLibraryDetected())
+	response, err := client.Do(request)
+	assert.Nil(err)
+	response.Body.Close()
+	assert.True(d.IsLambdaLibraryDetected())
+}
+
 func TestStartEndInvocationSpanParenting(t *testing.T) {
 	port := testutil.FreeTCPPort(t)
 	d := StartDaemon(fmt.Sprintf("127.0.0.1:%d", port))
@@ -332,6 +356,36 @@ func TestStartEndInvocationSpanParenting(t *testing.T) {
 	}
 }
 
+func TestStartEndInvocationIsExecutionSpanIncomplete(t *testing.T) {
+	assert := assert.New(t)
+	port := testutil.FreeTCPPort(t)
+	d := StartDaemon(fmt.Sprintf("127.0.0.1:%d", port))
+	time.Sleep(100 * time.Millisecond)
+	defer d.Stop()
+
+	m := &mockLifecycleProcessor{}
+	d.InvocationProcessor = m
+
+	client := &http.Client{}
+	body := bytes.NewBuffer([]byte(`{"key": "value"}`))
+	startReq, err := http.NewRequest(http.MethodPost, fmt.Sprintf("http://127.0.0.1:%d/lambda/start-invocation", port), body)
+	assert.Nil(err)
+	startResp, err := client.Do(startReq)
+	assert.Nil(err)
+	startResp.Body.Close()
+	assert.True(m.OnInvokeStartCalled)
+	assert.True(d.IsExecutionSpanIncomplete())
+
+	body = bytes.NewBuffer([]byte(`{}`))
+	endReq, err := http.NewRequest(http.MethodPost, fmt.Sprintf("http://127.0.0.1:%d/lambda/end-invocation", port), body)
+	assert.Nil(err)
+	endResp, err := client.Do(endReq)
+	assert.Nil(err)
+	endResp.Body.Close()
+	assert.True(m.OnInvokeEndCalled)
+	assert.False(d.IsExecutionSpanIncomplete())
+}
+
 // Helper function for reading test file
 func getEventFromFile(filename string) string {
 	event, err := os.ReadFile("../trace/testdata/event_samples/" + filename)
diff --git a/pkg/serverless/invocationlifecycle/invocation_details.go b/pkg/serverless/invocationlifecycle/invocation_details.go
index bd0e285f8d377..0ad7d0a98b8ea 100644
--- a/pkg/serverless/invocationlifecycle/invocation_details.go
+++ b/pkg/serverless/invocationlifecycle/invocation_details.go
@@ -27,6 +27,7 @@ type InvocationStartDetails struct {
 type InvocationEndDetails struct {
 	EndTime            time.Time
 	IsError            bool
+	IsTimeout          bool
 	RequestID          string
 	ResponseRawPayload []byte
 	ColdStart          bool
diff --git a/pkg/serverless/invocationlifecycle/lifecycle.go b/pkg/serverless/invocationlifecycle/lifecycle.go
index d8c470b187db5..90e931767cef1 100644
--- a/pkg/serverless/invocationlifecycle/lifecycle.go
+++ b/pkg/serverless/invocationlifecycle/lifecycle.go
@@ -281,32 +281,14 @@ func (lp *LifecycleProcessor) OnInvokeEnd(endDetails *InvocationEndDetails) {
 		spans = append(spans, span)
 
 		if lp.InferredSpansEnabled {
-			log.Debug("[lifecycle] Attempting to complete the inferred span")
-			log.Debugf("[lifecycle] Inferred span context: %+v", lp.GetInferredSpan().Span)
-			if lp.GetInferredSpan().Span.Start != 0 {
-				span0, span1 := lp.requestHandler.inferredSpans[0], lp.requestHandler.inferredSpans[1]
-				if span1 != nil {
-					log.Debug("[lifecycle] Completing a secondary inferred span")
-					lp.setParentIDForMultipleInferredSpans()
-					span1.AddTagToInferredSpan("http.status_code", statusCode)
-					span1.AddTagToInferredSpan("peer.service", lp.GetServiceName())
-					span := lp.completeInferredSpan(span1, lp.getInferredSpanStart(), endDetails.IsError)
-					spans = append(spans, span)
-					log.Debug("[lifecycle] The secondary inferred span attributes are %v", lp.requestHandler.inferredSpans[1])
-				}
-				span0.AddTagToInferredSpan("http.status_code", statusCode)
-				span0.AddTagToInferredSpan("peer.service", lp.GetServiceName())
-				span := lp.completeInferredSpan(span0, endDetails.EndTime, endDetails.IsError)
-				spans = append(spans, span)
-				log.Debugf("[lifecycle] The inferred span attributes are: %v", lp.GetInferredSpan())
-			} else {
-				log.Debug("[lifecyle] Failed to complete inferred span due to a missing start time. Please check that the event payload was received with the appropriate data")
-			}
+			inferredSpans := lp.endInferredSpan(statusCode, endDetails.EndTime, endDetails.IsError)
+			spans = append(spans, inferredSpans...)
 		}
 		lp.processTrace(spans)
 	}
 
-	if endDetails.IsError {
+	// We don't submit an error metric on timeouts since it should have already been submitted when the Extension receives a SHUTDOWN event
+	if endDetails.IsError && !endDetails.IsTimeout {
 		serverlessMetrics.SendErrorsEnhancedMetric(
 			lp.ExtraTags.Tags, endDetails.EndTime, lp.Demux,
 		)
@@ -385,3 +367,30 @@ func (lp *LifecycleProcessor) setParentIDForMultipleInferredSpans() {
 	lp.requestHandler.inferredSpans[1].Span.ParentID = lp.requestHandler.inferredSpans[0].Span.ParentID
 	lp.requestHandler.inferredSpans[0].Span.ParentID = lp.requestHandler.inferredSpans[1].Span.SpanID
 }
+
+// endInferredSpan attempts to complete any inferred spans and send them to intake
+func (lp *LifecycleProcessor) endInferredSpan(statusCode string, endTime time.Time, isError bool) []*pb.Span {
+	spans := make([]*pb.Span, 0, 2)
+	log.Debug("[lifecycle] Attempting to complete the inferred span")
+	log.Debugf("[lifecycle] Inferred span context: %+v", lp.GetInferredSpan().Span)
+	if lp.GetInferredSpan().Span.Start != 0 {
+		span0, span1 := lp.requestHandler.inferredSpans[0], lp.requestHandler.inferredSpans[1]
+		if span1 != nil {
+			log.Debug("[lifecycle] Completing a secondary inferred span")
+			lp.setParentIDForMultipleInferredSpans()
+			span1.AddTagToInferredSpan("http.status_code", statusCode)
+			span1.AddTagToInferredSpan("peer.service", lp.GetServiceName())
+			span := lp.completeInferredSpan(span1, lp.getInferredSpanStart(), isError)
+			spans = append(spans, span)
+			log.Debug("[lifecycle] The secondary inferred span attributes are %v", lp.requestHandler.inferredSpans[1])
+		}
+		span0.AddTagToInferredSpan("http.status_code", statusCode)
+		span0.AddTagToInferredSpan("peer.service", lp.GetServiceName())
+		span := lp.completeInferredSpan(span0, endTime, isError)
+		spans = append(spans, span)
+		log.Debugf("[lifecycle] The inferred span attributes are: %v", lp.GetInferredSpan())
+	} else {
+		log.Debug("[lifecyle] Failed to complete inferred span due to a missing start time. Please check that the event payload was received with the appropriate data")
+	}
+	return spans
+}
diff --git a/pkg/serverless/invocationlifecycle/lifecycle_test.go b/pkg/serverless/invocationlifecycle/lifecycle_test.go
index e33d574035dd7..b7ee5aaa3057d 100644
--- a/pkg/serverless/invocationlifecycle/lifecycle_test.go
+++ b/pkg/serverless/invocationlifecycle/lifecycle_test.go
@@ -379,6 +379,123 @@ func TestCompleteInferredSpanWithOutStartTime(t *testing.T) {
 	completedInferredSpan := tracePayload.TracerPayload.Chunks[0].Spans[0]
 	assert.Equal(t, startInvocationTime.UnixNano(), completedInferredSpan.Start)
 }
+
+func TestTimeoutExecutionSpan(t *testing.T) {
+	t.Setenv(functionNameEnvVar, "my-function")
+	t.Setenv("DD_SERVICE", "mock-lambda-service")
+
+	extraTags := &logs.Tags{
+		Tags: []string{"functionname:test-function"},
+	}
+	demux := createDemultiplexer(t)
+	defer demux.Stop(false)
+	mockDetectLambdaLibrary := func() bool { return false }
+
+	var tracePayload *api.Payload
+	mockProcessTrace := func(payload *api.Payload) {
+		tracePayload = payload
+	}
+
+	testProcessor := LifecycleProcessor{
+		ExtraTags:            extraTags,
+		ProcessTrace:         mockProcessTrace,
+		DetectLambdaLibrary:  mockDetectLambdaLibrary,
+		Demux:                demux,
+		InferredSpansEnabled: true,
+	}
+	startTime := time.Now()
+	duration := 1 * time.Second
+	endTime := startTime.Add(duration)
+	startDetails := InvocationStartDetails{
+		StartTime:             time.Now(),
+		InvokeEventRawPayload: []byte(`{}`),
+		InvokedFunctionARN:    "arn:aws:lambda:us-east-1:123456789012:function:my-function",
+	}
+	testProcessor.OnInvokeStart(&startDetails)
+
+	timeoutCtx := &InvocationEndDetails{
+		RequestID:          "test-request-id",
+		Runtime:            "java11",
+		ColdStart:          false,
+		ProactiveInit:      false,
+		EndTime:            endTime,
+		IsError:            true,
+		IsTimeout:          true,
+		ResponseRawPayload: nil,
+	}
+	testProcessor.OnInvokeEnd(timeoutCtx)
+
+	spans := tracePayload.TracerPayload.Chunks[0].Spans
+	assert.Equal(t, 1, len(spans))
+	// No trace context passed
+	assert.NotZero(t, testProcessor.GetExecutionInfo().TraceID)
+	assert.Equal(t, uint64(0), testProcessor.GetExecutionInfo().SpanID)
+	assert.Equal(t, int32(-128), tracePayload.TracerPayload.Chunks[0].Priority)
+	// New trace ID and span ID has been created
+	assert.NotEqual(t, uint64(0), spans[0].TraceID)
+	assert.NotEqual(t, uint64(0), spans[0].SpanID)
+	assert.Equal(t, spans[0].TraceID, testProcessor.GetExecutionInfo().TraceID)
+	assert.Equal(t, spans[0].Error, int32(1))
+	assert.Equal(t, spans[0].GetMeta()["request_id"], "test-request-id")
+	assert.Equal(t, spans[0].GetMeta()["language"], "java")
+}
+
+func TestTimeoutExecutionSpanWithTraceContext(t *testing.T) {
+	t.Setenv(functionNameEnvVar, "my-function")
+	t.Setenv("DD_SERVICE", "mock-lambda-service")
+
+	extraTags := &logs.Tags{
+		Tags: []string{"functionname:test-function"},
+	}
+	demux := createDemultiplexer(t)
+	defer demux.Stop(false)
+	mockDetectLambdaLibrary := func() bool { return false }
+
+	var tracePayload *api.Payload
+	mockProcessTrace := func(payload *api.Payload) {
+		tracePayload = payload
+	}
+
+	testProcessor := LifecycleProcessor{
+		ExtraTags:            extraTags,
+		ProcessTrace:         mockProcessTrace,
+		DetectLambdaLibrary:  mockDetectLambdaLibrary,
+		Demux:                demux,
+		InferredSpansEnabled: true,
+	}
+	eventPayload := `a5a{"resource":"/users/create","path":"/users/create","httpMethod":"GET","headers":{"Accept":"*/*","Accept-Encoding":"gzip","x-datadog-parent-id":"1480558859903409531","x-datadog-sampling-priority":"1","x-datadog-trace-id":"5736943178450432258"}}0`
+	startTime := time.Now()
+	duration := 1 * time.Second
+	endTime := startTime.Add(duration)
+	startDetails := InvocationStartDetails{
+		StartTime:             startTime,
+		InvokeEventRawPayload: []byte(eventPayload),
+		InvokedFunctionARN:    "arn:aws:lambda:us-east-1:123456789012:function:my-function",
+	}
+	testProcessor.OnInvokeStart(&startDetails)
+	timeoutCtx := &InvocationEndDetails{
+		RequestID:          "test-request-id",
+		Runtime:            "java11",
+		ColdStart:          false,
+		ProactiveInit:      false,
+		EndTime:            endTime,
+		IsError:            true,
+		IsTimeout:          true,
+		ResponseRawPayload: nil,
+	}
+	testProcessor.OnInvokeEnd(timeoutCtx)
+
+	spans := tracePayload.TracerPayload.Chunks[0].Spans
+	assert.Equal(t, 1, len(spans))
+	// Trace context received
+	assert.Equal(t, spans[0].GetTraceID(), testProcessor.GetExecutionInfo().TraceID)
+	assert.Equal(t, spans[0].GetParentID(), testProcessor.GetExecutionInfo().parentID)
+	assert.Equal(t, tracePayload.TracerPayload.Chunks[0].Priority, int32(testProcessor.GetExecutionInfo().SamplingPriority))
+	assert.Equal(t, spans[0].Error, int32(1))
+	assert.Equal(t, spans[0].GetMeta()["request_id"], "test-request-id")
+	assert.Equal(t, spans[0].GetMeta()["language"], "java")
+}
+
 func TestTriggerTypesLifecycleEventForAPIGatewayRest(t *testing.T) {
 	startDetails := &InvocationStartDetails{
 		InvokeEventRawPayload: getEventFromFile("api-gateway.json"),
diff --git a/pkg/serverless/invocationlifecycle/trace.go b/pkg/serverless/invocationlifecycle/trace.go
index 6ed2344b1014f..cfd545ed144f4 100644
--- a/pkg/serverless/invocationlifecycle/trace.go
+++ b/pkg/serverless/invocationlifecycle/trace.go
@@ -18,6 +18,7 @@ import (
 
 	"github.com/DataDog/datadog-agent/pkg/config"
 	pb "github.com/DataDog/datadog-agent/pkg/proto/pbgo/trace"
+	"github.com/DataDog/datadog-agent/pkg/serverless/random"
 	"github.com/DataDog/datadog-agent/pkg/serverless/trace/inferredspan"
 	"github.com/DataDog/datadog-agent/pkg/trace/api"
 	"github.com/DataDog/datadog-agent/pkg/trace/info"
@@ -76,18 +77,29 @@ func (lp *LifecycleProcessor) startExecutionSpan(event interface{}, rawPayload [
 // It should be called at the end of the invocation.
 func (lp *LifecycleProcessor) endExecutionSpan(endDetails *InvocationEndDetails) *pb.Span {
 	executionContext := lp.GetExecutionInfo()
-	duration := endDetails.EndTime.UnixNano() - executionContext.startTime.UnixNano()
+	start := executionContext.startTime.UnixNano()
+
+	traceID := executionContext.TraceID
+	spanID := executionContext.SpanID
+	// If we fail to receive the trace and span IDs from the tracer during a timeout we create it ourselves
+	if endDetails.IsTimeout && traceID == 0 {
+		traceID = random.Random.Uint64()
+		lp.requestHandler.executionInfo.TraceID = traceID
+	}
+	if endDetails.IsTimeout && spanID == 0 {
+		spanID = random.Random.Uint64()
+	}
 
 	executionSpan := &pb.Span{
 		Service:  "aws.lambda", // will be replaced by the span processor
 		Name:     "aws.lambda",
 		Resource: os.Getenv(functionNameEnvVar),
 		Type:     "serverless",
-		TraceID:  executionContext.TraceID,
-		SpanID:   executionContext.SpanID,
+		TraceID:  traceID,
+		SpanID:   spanID,
 		ParentID: executionContext.parentID,
-		Start:    executionContext.startTime.UnixNano(),
-		Duration: duration,
+		Start:    start,
+		Duration: endDetails.EndTime.UnixNano() - start,
 		Meta:     lp.requestHandler.triggerTags,
 		Metrics:  lp.requestHandler.triggerMetrics,
 	}
@@ -110,17 +122,19 @@ func (lp *LifecycleProcessor) endExecutionSpan(endDetails *InvocationEndDetails)
 		} else {
 			capturePayloadAsTags(requestPayloadJSON, executionSpan, "function.request", 0, capturePayloadMaxDepth)
 		}
-		responsePayloadJSON := make(map[string]interface{})
-		if err := json.Unmarshal(endDetails.ResponseRawPayload, &responsePayloadJSON); err != nil {
-			log.Debugf("[lifecycle] Failed to parse response payload: %v", err)
-			executionSpan.Meta["function.response"] = string(endDetails.ResponseRawPayload)
-		} else {
-			capturePayloadAsTags(responsePayloadJSON, executionSpan, "function.response", 0, capturePayloadMaxDepth)
+		if endDetails.ResponseRawPayload != nil {
+			responsePayloadJSON := make(map[string]interface{})
+			if err := json.Unmarshal(endDetails.ResponseRawPayload, &responsePayloadJSON); err != nil {
+				log.Debugf("[lifecycle] Failed to parse response payload: %v", err)
+				executionSpan.Meta["function.response"] = string(endDetails.ResponseRawPayload)
+			} else {
+				capturePayloadAsTags(responsePayloadJSON, executionSpan, "function.response", 0, capturePayloadMaxDepth)
+			}
 		}
 	}
-
 	if endDetails.IsError {
 		executionSpan.Error = 1
+
 		if len(endDetails.ErrorMsg) > 0 {
 			executionSpan.Meta["error.msg"] = endDetails.ErrorMsg
 		}
@@ -130,6 +144,11 @@ func (lp *LifecycleProcessor) endExecutionSpan(endDetails *InvocationEndDetails)
 		if len(endDetails.ErrorStack) > 0 {
 			executionSpan.Meta["error.stack"] = endDetails.ErrorStack
 		}
+
+		if endDetails.IsTimeout {
+			executionSpan.Meta["error.type"] = "Impending Timeout"
+			executionSpan.Meta["error.msg"] = "Datadog detected an Impending Timeout"
+		}
 	}
 
 	return executionSpan
diff --git a/pkg/serverless/invocationlifecycle/trace_test.go b/pkg/serverless/invocationlifecycle/trace_test.go
index 0b925f9a25be6..6b45d32755165 100644
--- a/pkg/serverless/invocationlifecycle/trace_test.go
+++ b/pkg/serverless/invocationlifecycle/trace_test.go
@@ -649,6 +649,54 @@ func TestEndExecutionSpanWithError(t *testing.T) {
 	assert.Equal(t, executionSpan.Error, int32(1))
 }
 
+func TestEndExecutionSpanWithTimeout(t *testing.T) {
+	t.Setenv(functionNameEnvVar, "TestFunction")
+	currentExecutionInfo := &ExecutionStartInfo{}
+	lp := &LifecycleProcessor{
+		requestHandler: &RequestHandler{
+			executionInfo: currentExecutionInfo,
+			triggerTags:   make(map[string]string),
+		},
+	}
+
+	startTime := time.Now()
+	startDetails := &InvocationStartDetails{
+		StartTime:          startTime,
+		InvokeEventHeaders: http.Header{},
+	}
+	lp.startExecutionSpan(nil, []byte("[]"), startDetails)
+
+	assert.Zero(t, currentExecutionInfo.TraceID)
+	assert.Zero(t, currentExecutionInfo.SpanID)
+
+	duration := 1 * time.Second
+	endTime := startTime.Add(duration)
+
+	endDetails := &InvocationEndDetails{
+		EndTime:            endTime,
+		IsError:            true,
+		IsTimeout:          true,
+		RequestID:          "test-request-id",
+		ResponseRawPayload: nil,
+		ColdStart:          true,
+		ProactiveInit:      false,
+		Runtime:            "dotnet6",
+	}
+	executionSpan := lp.endExecutionSpan(endDetails)
+	assert.Equal(t, "aws.lambda", executionSpan.Name)
+	assert.Equal(t, "aws.lambda", executionSpan.Service)
+	assert.Equal(t, "TestFunction", executionSpan.Resource)
+	assert.Equal(t, "serverless", executionSpan.Type)
+	assert.Equal(t, "dotnet", executionSpan.Meta["language"])
+	assert.Equal(t, lp.requestHandler.executionInfo.TraceID, executionSpan.TraceID)
+	assert.NotZero(t, executionSpan.TraceID)
+	assert.NotZero(t, executionSpan.SpanID)
+	assert.Equal(t, startTime.UnixNano(), executionSpan.Start)
+	assert.Equal(t, duration.Nanoseconds(), executionSpan.Duration)
+	assert.Equal(t, "Impending Timeout", executionSpan.Meta["error.type"])
+	assert.Equal(t, "Datadog detected an Impending Timeout", executionSpan.Meta["error.msg"])
+}
+
 func TestParseLambdaPayload(t *testing.T) {
 	assert.Equal(t, []byte(""), ParseLambdaPayload([]byte("")))
 	assert.Equal(t, []byte("{}"), ParseLambdaPayload([]byte("{}")))
diff --git a/pkg/serverless/serverless.go b/pkg/serverless/serverless.go
index 091494b15afce..24c04e22a08ad 100644
--- a/pkg/serverless/serverless.go
+++ b/pkg/serverless/serverless.go
@@ -18,6 +18,7 @@ import (
 
 	"github.com/DataDog/datadog-agent/pkg/serverless/daemon"
 	"github.com/DataDog/datadog-agent/pkg/serverless/flush"
+	"github.com/DataDog/datadog-agent/pkg/serverless/invocationlifecycle"
 	"github.com/DataDog/datadog-agent/pkg/serverless/metrics"
 	"github.com/DataDog/datadog-agent/pkg/serverless/registration"
 	"github.com/DataDog/datadog-agent/pkg/serverless/tags"
@@ -139,6 +140,10 @@ func WaitForNextInvocation(stopCh chan struct{}, daemon *daemon.Daemon, id regis
 			metricTags = tags.AddInitTypeTag(metricTags)
 			metrics.SendTimeoutEnhancedMetric(metricTags, daemon.MetricAgent.Demux)
 			metrics.SendErrorsEnhancedMetric(metricTags, time.Now(), daemon.MetricAgent.Demux)
+
+			if daemon.IsExecutionSpanIncomplete() {
+				finishTimeoutExecutionSpan(daemon, coldStartTags.IsColdStart, coldStartTags.IsProactiveInit)
+			}
 		}
 		err := daemon.ExecutionContext.SaveCurrentExecutionContext()
 		if err != nil {
@@ -214,3 +219,20 @@ func removeQualifierFromArn(functionArn string) string {
 	}
 	return functionArn
 }
+
+func finishTimeoutExecutionSpan(daemon *daemon.Daemon, isColdStart bool, isProactiveInit bool) {
+	ecs := daemon.ExecutionContext.GetCurrentState()
+	timeoutDetails := &invocationlifecycle.InvocationEndDetails{
+		RequestID:          ecs.LastRequestID,
+		Runtime:            ecs.Runtime,
+		ColdStart:          isColdStart,
+		ProactiveInit:      isProactiveInit,
+		EndTime:            time.Now(),
+		IsError:            true,
+		IsTimeout:          true,
+		ResponseRawPayload: nil,
+	}
+	log.Debug("Could not complete the execution span due to a timeout. Attempting to finish the span without details from the tracer.")
+	daemon.InvocationProcessor.OnInvokeEnd(timeoutDetails)
+	daemon.SetExecutionSpanIncomplete(false)
+}
diff --git a/pkg/serverless/serverless_test.go b/pkg/serverless/serverless_test.go
index ccd144ea939bd..14bd868ab6548 100644
--- a/pkg/serverless/serverless_test.go
+++ b/pkg/serverless/serverless_test.go
@@ -15,6 +15,9 @@ import (
 	"github.com/stretchr/testify/assert"
 
 	"github.com/DataDog/datadog-agent/pkg/serverless/daemon"
+	"github.com/DataDog/datadog-agent/pkg/serverless/invocationlifecycle"
+	"github.com/DataDog/datadog-agent/pkg/serverless/trace"
+	"github.com/DataDog/datadog-agent/pkg/trace/testutil"
 )
 
 func TestMain(m *testing.M) {
@@ -69,3 +72,40 @@ func TestRemoveQualifierFromArnWithoutAlias(t *testing.T) {
 	functionArn := removeQualifierFromArn(invokedFunctionArn)
 	assert.Equal(t, functionArn, invokedFunctionArn)
 }
+
+type mockLifecycleProcessor struct {
+	isError         bool
+	isTimeout       bool
+	isColdStart     bool
+	isProactiveInit bool
+}
+
+func (m *mockLifecycleProcessor) GetExecutionInfo() *invocationlifecycle.ExecutionStartInfo {
+	return &invocationlifecycle.ExecutionStartInfo{}
+}
+func (m *mockLifecycleProcessor) OnInvokeStart(*invocationlifecycle.InvocationStartDetails) {}
+func (m *mockLifecycleProcessor) OnInvokeEnd(endDetails *invocationlifecycle.InvocationEndDetails) {
+	m.isError = endDetails.IsError
+	m.isTimeout = endDetails.IsTimeout
+	m.isColdStart = endDetails.ColdStart
+	m.isProactiveInit = endDetails.ProactiveInit
+}
+
+func TestFinishTimeoutExecutionSpan(t *testing.T) {
+	port := testutil.FreeTCPPort(t)
+	d := daemon.StartDaemon(fmt.Sprintf("127.0.0.1:%d", port))
+	d.TraceAgent = &trace.ServerlessTraceAgent{}
+	mock := &mockLifecycleProcessor{}
+	d.InvocationProcessor = mock
+	defer d.Stop()
+
+	assert.False(t, d.IsExecutionSpanIncomplete())
+	d.SetExecutionSpanIncomplete(true)
+	assert.True(t, d.IsExecutionSpanIncomplete())
+	finishTimeoutExecutionSpan(d, true, true)
+	assert.False(t, d.IsExecutionSpanIncomplete())
+	assert.True(t, mock.isError)
+	assert.True(t, mock.isTimeout)
+	assert.True(t, mock.isColdStart)
+	assert.True(t, mock.isProactiveInit)
+}