diff --git a/.dockerignore b/.dockerignore index 0b953d87e1..314930074a 100644 --- a/.dockerignore +++ b/.dockerignore @@ -1,4 +1,3 @@ build/ *.egg-info/ venv/ -.git/ diff --git a/.env b/.env new file mode 100644 index 0000000000..a305313ea8 --- /dev/null +++ b/.env @@ -0,0 +1,3 @@ +# This file is for use with docker compose so that mounting Neo4j volumes doesn't fail with perms errs +GID=10001 +UID=10001 diff --git a/.github/workflows/publish-to-ghcr.yml b/.github/workflows/publish-to-ghcr.yml index 7590440d00..fa48b8555a 100644 --- a/.github/workflows/publish-to-ghcr.yml +++ b/.github/workflows/publish-to-ghcr.yml @@ -44,7 +44,7 @@ jobs: - name: Build and push uses: docker/build-push-action@v3 with: - file: dist.Dockerfile + file: Dockerfile context: . push: true # push the image to ghcr tags: ${{ steps.meta.outputs.tags }} diff --git a/.github/workflows/test_suite.yml b/.github/workflows/test_suite.yml index 586e3b7823..e1013fa6e3 100644 --- a/.github/workflows/test_suite.yml +++ b/.github/workflows/test_suite.yml @@ -90,7 +90,7 @@ jobs: - name: make test_integration run: make test_integration - build-dist-docker-image: + build-docker-image: runs-on: ubuntu-latest steps: - uses: actions/checkout@v2 @@ -106,7 +106,7 @@ jobs: - name: Build uses: docker/build-push-action@v3 with: - file: dist.Dockerfile + file: Dockerfile push: false # only build the image, don't push it anywhere context: . tags: ${{ steps.meta.outputs.tags }} diff --git a/.gitignore b/.gitignore index ab612d3a5a..03e5e394a0 100644 --- a/.gitignore +++ b/.gitignore @@ -16,3 +16,4 @@ build/ .compose generated dist/ +.local diff --git a/Dockerfile b/Dockerfile index b82c7fffa4..620ccd2410 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,33 +1,20 @@ -FROM ubuntu:focal +# This is a thin distribution of the cartography software. +FROM python:3.10-slim -WORKDIR /srv/cartography +# the UID and GID to run cartography as +# (https://github.com/hexops/dockerfile#do-not-use-a-uid-below-10000). +ARG uid=10001 +ARG gid=10001 -ENV PATH=/venv/bin:$PATH -RUN apt-get update && \ - DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends python3.10-dev python3-pip python3-setuptools openssl libssl-dev gcc pkg-config libffi-dev libxml2-dev libxmlsec1-dev curl make git && \ - apt-get clean && \ - rm -rf /var/lib/apt/lists/* +WORKDIR /var/cartography +ENV HOME=/var/cartography -# Installs pip supported by python3.10 -RUN curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py && python3.10 get-pip.py +RUN pip install cartography -# Create cartography user so that we can give it ownership of the directory later for unit&integ tests -RUN groupadd cartography && \ - useradd -s /bin/bash -d /home/cartography -m -g cartography cartography +USER ${uid}:${gid} -# Installs python dependencies -COPY setup.py test-requirements.txt ./ -RUN pip install -e . && \ - pip install -r test-requirements.txt && \ - # Grant write access to the directory for unit and integration test coverage files - chmod -R a+w /srv/cartography +# verify that the binary at least runs +RUN cartography -h -# Install cartography, setting the owner so that tests work -COPY --chown=cartography:cartography . /srv/cartography - -USER cartography - -# Sets the directory as safe due to a mismatch in the user that cloned the repo -# and the user that is going to run the unit&integ tests. -RUN git config --global --add safe.directory /srv/cartography -RUN /usr/bin/git config --local user.name "cartography" +ENTRYPOINT ["cartography"] +CMD ["-h"] diff --git a/dev.Dockerfile b/dev.Dockerfile new file mode 100644 index 0000000000..eef8fea6ad --- /dev/null +++ b/dev.Dockerfile @@ -0,0 +1,28 @@ +# Builds cartography container for development by performing a Python editable install of the current source code. +FROM python:3.10-slim + +# the UID and GID to run cartography as +# (https://github.com/hexops/dockerfile#do-not-use-a-uid-below-10000). +ARG uid=10001 +ARG gid=10001 + +RUN apt-get update && \ + DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends make git && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* + +# Assumption: current working directory is the cartography source tree from github. +COPY . /var/cartography +WORKDIR /var/cartography +ENV HOME=/var/cartography + +RUN pip install -U -e . && \ + pip install -r test-requirements.txt && \ + # Grant write access to the directory for unit and integration test coverage files + chmod -R a+w /var/cartography && \ + # Sets the directory as safe due to a mismatch in the user that cloned the repo + # and the user that is going to run the unit&integ tests. This lets pre-commit work. + git config --global --add safe.directory /var/cartography && \ + git config --local user.name "cartography" + +USER ${uid}:${gid} diff --git a/dist.Dockerfile b/dist.Dockerfile deleted file mode 100644 index bbb377a476..0000000000 --- a/dist.Dockerfile +++ /dev/null @@ -1,19 +0,0 @@ -FROM python:3.10-slim - -# the UID and GID to run cartography as -# (https://github.com/hexops/dockerfile#do-not-use-a-uid-below-10000). -ARG uid=10001 -ARG gid=10001 - -COPY . /var/cartography -WORKDIR /var/cartography - -RUN pip install -U -e . - -USER ${uid}:${gid} - -# verify that the binary at least runs -RUN cartography -h - -ENTRYPOINT ["cartography"] -CMD ["-h"] diff --git a/docker-compose.yml b/docker-compose.yml index 8beacae4e2..105fbb0c8b 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,7 +1,11 @@ -version: "3.7" +# This docker-compose is intended to help you quick-start or develop cartography. +# It is also a good starting point for your own customizations. +# If you want to modify this and contribute your change upstream, please file a GitHub issue first. It's hard to make +# this so that it will support as many users as possible, so we've tried to make this with a minimum set of +# functionality that you can extend on your own. services: neo4j: - image: neo4j:4.4.5-community + image: neo4j:4.4-community restart: unless-stopped ports: - 7474:7474 @@ -12,6 +16,7 @@ services: - ./.compose/neo4j/import:/import - ./.compose/neo4j/logs:/logs - ./.compose/neo4j/plugins:/plugins + user: "${UID}:${GID}" environment: # Raise memory limits: - NEO4J_dbms_memory_pagecache_size=1G @@ -33,19 +38,39 @@ services: interval: 10s timeout: 10s retries: 10 + +# Runs the standard cartography image available at ghcr.io. cartography: - # As seen in docs, we build with `cd /path/to/cartography && docker build -t lyft/cartography .` - # and then run with `docker-compose up -d`. - image: lyft/cartography + image: ghcr.io/lyft/cartography:latest # EXAMPLE: Our ENTRYPOINT is cartography, running specific command to sync AWS # command: ["-v", "--neo4j-uri=bolt://neo4j:7687", "--aws-sync-all-profiles"] - user: cartography init: true restart: on-failure depends_on: - neo4j volumes: - - ~/.aws:/cartography/.aws/ + # Provide AWS creds to the container + - ~/.aws:/var/cartography/.aws/ + environment: + # Point to the neo4j service defined in this docker-compose file. + - NEO4J_URL=bolt://cartography-neo4j-1:7687 + +# Intended to run local automated tests, custom sync scripts, and local changes. + cartography-dev: + # See dev instructions: we assume that you have built this with + # `docker build -t lyft/cartography-dev . -f dev.Dockerfile`. + # Do not push this image remotely! + image: lyft/cartography-dev + init: true + restart: on-failure + depends_on: + - neo4j + volumes: + # Provide AWS creds to the container + - ~/.aws:/var/cartography/.aws/ + # For pre-commit to work + - .:/var/cartography + - ./.cache/pre-commit:/var/cartography/.cache/pre-commit environment: # Point to the neo4j service defined in this docker-compose file. - NEO4J_URL=bolt://cartography-neo4j-1:7687 diff --git a/docs/root/dev/developer-guide.md b/docs/root/dev/developer-guide.md index 279016567e..b927d292e7 100644 --- a/docs/root/dev/developer-guide.md +++ b/docs/root/dev/developer-guide.md @@ -1,72 +1,57 @@ # Cartography Developer Guide -## Testing +## Running the source code -_If you'd like to test using Docker and Docker Compose, see [here](testing-with-docker.html)_ +This document assumes familiarity with Python dev practices such as using [virtualenvs](https://packaging.python.org/guides/installing-using-pip-and-virtualenv/). -### Running from source +1. **Run Neo4j** -1. **Install** + Follow the [Install Steps](../install.html) so that you get Neo4j running locally. It's up to you if you want to use Docker or a native install. - Follow steps 1 and 2 in [Installation](../install.html#cartography-installation). Ensure that you have JVM 11 installed and Neo4j Community Edition 4.4 is running on your local machine. +1. **Install Python 3.10** -2. **Clone the source code** +1. **Clone the source code** Run `cd {path-where-you-want-your-source-code}`. Get the source code with `git clone git://github.com/lyft/cartography.git` -3. **Install from source** +1. **Perform an editable install of the cartography source code** - Run `cd cartography` and then `pip install -e .` (yes, actually type the period into the command line) to install Cartography from source. - - ℹ️You may find it beneficial to use Python [virtualenvs](https://packaging.python.org/guides/installing-using-pip-and-virtualenv/) (or the [virutalenvwrapper](https://virtualenvwrapper.readthedocs.io/en/latest/command_ref.html#managing-environments)) so that packages installed via `pip` are easier to manage. + Run `cd cartography` and then `pip install -e .` (yes, actually type the period into the command line) to install Cartography from source to the current venv. 4. **Run from source** - After this finishes you should be able to run Cartography from source with `cartography --neo4j-uri `. Any changes to the source code in `{path-where-you-want-your-source-code}/cartography` are now locally testable by running `cartography` from the command line. - -### Manually testing individual intel modules - -After completing the section above, you are now able to manually test intel modules. - -1. **If needed, comment out unnecessary lines** - - See `cartography.intel.aws._sync_one_account()`[here](https://github.com/lyft/cartography/blob/master/cartography/intel/aws/__init__.py). This function syncs different AWS objects with your Neo4j instance. Comment out the lines that you don't want to test for. - - For example, IAM can take a long time to ingest so if you're testing an intel module that doesn't require IAM nodes to already exist in the graph, then you can comment out all of the `iam.sync_*` lines. - -2. Save your changes and run `cartography` from a terminal as you normally would. + After this finishes you should be able to run Cartography from source with `cartography --neo4j-uri bolt://localhost:7687`. Any changes to the source code in `{path-where-you-want-your-source-code}/cartography` are now locally testable by running `cartography` from the command line. -### Automated testing +## Automated testing 1. **Install test requirements** `pip install -r test-requirements.txt` -2. **(OPTIONAL) Setup environment variables for integration tests** +1. **(OPTIONAL) Setup environment variables for integration tests** - The integration tests expect Neo4j to be running locally, listening on default ports, with auth disabled: - - To disable auth, edit your `neo4j.conf` file with `dbms.security.auth_enabled=false`. Additional details on [neo4j.com]( https://neo4j.com/docs/operations-manual/current/authentication-authorization/enable/). + The integration tests expect Neo4j to be running locally, listening on default ports, and with auth disabled. To run the integration tests on a specific Neo4j instance, add the following environment variable: `export "NEO4J_URL="` -3. **Run tests using `make`** - - `make test_lint` can be used to run [pre-commit](https://pre-commit.com) linting against the codebase. We use [pre-commit](https://pre-commit.com) to standardize our linting across our code-base at Lyft. - - `make test_unit` can be used to run the unit test suite. +1. **Run tests using `make`** + - `make test_lint` runs [pre-commit](https://pre-commit.com) linting against the codebase. + - `make test_unit` runs the unit test suite. ⚠️ Important! The below commands will **DELETE ALL NODES** on your local Neo4j instance as part of our testing procedure. Only run any of the below commands if you are ok with this. ⚠️ - - `make test_integration` can be used to run the integration test suite. + - `make test_integration` runs the integration test suite. For more granular testing, you can invoke `pytest` directly: - `pytest ./tests/integration/cartography/intel/aws/test_iam.py` - `pytest ./tests/integration/cartography/intel/aws/test_iam.py::test_load_groups` + - `pytest -k test_load_groups` - `make test` can be used to run all of the above. ## Implementing custom sync commands -By default, cartography will try to sync every intel module included as part of the default sync. If you're not using certain intel modules you can create a custom sync script and invoke it using the cartography CLI. For example, if you're only interested in the AWS intel module you can create a sync script, `custom_sync.py`, that looks like this: +By default, cartography will try to sync every intel module included as part of the default sync. If you're not using certain intel modules, you can create a custom sync script and invoke it using the cartography CLI. For example, if you're only interested in the AWS intel module you can create a sync script, `custom_sync.py`, that looks like this: ```python from cartography import cli @@ -103,5 +88,133 @@ INFO:botocore.credentials:Found credentials in shared credentials file: ~/.aws/c ... ``` +## dev.Dockerfile + +We include a dev.Dockerfile that can help streamline common dev tasks. It is different from the main Dockerfile in that + +1. It is strictly intended for dev purposes. +1. It performs an editable install of the cartography source code and test requirements. +1. It does not define a docker entrypoint. This is to allow you to run a custom sync script instead of just the main `cartography` command. + +To use it, build dev.Dockerfile with +```bash +cd /path/to/cartography/repo +docker build -t lyft/cartography-dev -f . dev.Dockerfile +docker-compose --profile dev up -d +``` + +With that, there are some interesting things you can do with it. + +### Dev with docker-compose + +#### Run the full test suite + +```bash +docker-compose run cartography-dev make test_lint +docker-compose run cartography-dev make test_unit +docker-compose run cartography-dev make test_integration + +# for all the above +docker-compose run cartography-dev make test +``` + +#### Run a [custom sync script](#implementing-custom-sync-commands) + +```bash +docker-compose run cartography-dev python custom_script.py +``` + +#### Run the cartography CLI + +```bash +docker-compose run cartography-dev cartography --help +``` + +### Equivalent manual docker commands + +If you don't like docker-compose or if it doesn't work for you for any reason, here are the equivalent manual docker commands for the previous scenarios: + +#### Run unit tests with dev.Dockerfile + +```bash +docker run --rm lyft/cartography-dev make test_unit +``` + +This is a simple command because it doesn't require any volume mounts or docker networking. + +#### Run the linter with dev.Dockerfile + +```bash +docker run --rm \ + -v $(pwd):/var/cartography \ + -v $(pwd)/.cache/pre-commit:/var/cartography/.cache/pre-commit \ + lyft/cartography-dev \ + make test_lint +``` + +The volume mounts are necessary to let pre-commit from within the container edit source files on the host machine, and for pre-commit's cached state to save on your host machine without needing to update itself every time you run it. + +#### Run integration tests with dev.Dockerfile + +First run a Neo4j container: +```bash +docker run \ + --publish=7474:7474 \ + --publish=7687:7687 \ + --network cartography-network \ + -v data:/data \ + --name cartography-neo4j \ + --env=NEO4J_AUTH=none \ + neo4j:4.4-community +``` + +and then call the integration test suite like this: +```bash +docker run --rm \ + --network cartography-network \ + -e NEO4J_URL=bolt://cartography-neo4j:7687 \ + lyft/cartography-dev \ + make test_integration +``` + +Note that we needed to specify the `NEO4J_URL` env var so that the integration test would be able to reach the Neo4j container. + +#### Run the full test suite with dev.Dockerfile + +Bring up a neo4j container +```bash +docker run \ + --publish=7474:7474 \ + --publish=7687:7687 \ + --network cartography-network \ + -v data:/data \ + --name cartography-neo4j \ + --env=NEO4J_AUTH=none \ + neo4j:4.4-community +``` + +and then run the full test suite by specifying all the necessary volumes, network, and env vars. +```bash +docker run --rm \ + -v $(pwd):/var/cartography \ + -v $(pwd)/.cache/pre-commit:/var/cartography/.cache/pre-commit \ + --network cartography-network \ + -e NEO4J_URL=bolt://cartography-neo4j:7687 \ + lyft/cartography-dev \ + make test +``` + +#### Run a [custom sync script](#implementing-custom-sync-commands) with dev.Dockerfile + +```bash +docker run --rm lyft/cartography-dev python custom_sync.py +``` + +#### Run cartography CLI with dev.Dockerfile + +```bash +docker run --rm lyft/cartography-dev cartography --help +``` + ## How to write a new intel module See [here](writing-intel-modules.html). diff --git a/docs/root/dev/index.rst b/docs/root/dev/index.rst index 05e11df566..4c6b9dc209 100644 --- a/docs/root/dev/index.rst +++ b/docs/root/dev/index.rst @@ -3,4 +3,3 @@ developer-guide writing-analysis-jobs writing-intel-modules - testing-with-docker diff --git a/docs/root/dev/testing-with-docker.md b/docs/root/dev/testing-with-docker.md deleted file mode 100644 index 82a64ebd3e..0000000000 --- a/docs/root/dev/testing-with-docker.md +++ /dev/null @@ -1,71 +0,0 @@ -# Testing with docker - -## Using the included docker-compose support -docker-compose lets you run cartography (and its unit and integration tests) -without needing to install Python and without needing to install Neo4j. - -### Usage - -1. Build the cartography Dockerfile. This creates a Docker image with all the -Python dependencies needed by cartography and installs cartography itself to -the image. - - ```bash - # Make sure you don't forget the '.' (represents current directory) - docker build -t lyft/cartography . - ``` - -1. Start up the docker-compose dev environment - - ```bash - docker-compose up -d - ``` - -1. Run the tests - ```bash - docker-compose run cartography make test - # Alternatively replace `make test` with `make test_lint`, - # `make test_unit`, or `make test_integration` - ``` - -Now when you make changes to the code, you can follow the steps above again to -rebuild the container and re-run the automated tests. You can also run the full -cartography sync by following the ["Notes"](#notes) section of this document. - -### Configuration - -Configuration is possible via the `.compose` directory, which is -git ignored. neo4j config, logs, etc is located at `.compose/neo4j/...` - -Configuration for cartography itself should be passed in through -environment variables, using the docker-compose format `-e VARIABLE -e VARIABLE` - -AWS credentials can be bind mapped in using volumes. TODO: document correct -bind mount format for docker-compose run. - -### Notes - -* On initial start of the compose stack, it's necessary to -change the neo4j user's password through the neo4j UI. -* Neither the docker image, nor the docker-compose file define an -entrypoint, so it's necessary to pass in the command being run. This -also makes it possible to run a custom sync script, rather than only -cartography. - -### Example - -```bash -# Temporarily disable bash command history -set +o history -# See the cartography github configuration intel module docs -export GITHUB_KEY=BASE64ENCODEDKEY -# You need to set this after starting neo4j once, and resetting -# the default neo4j password, which is neo4j -export NEO4j_PASSWORD=... -# Reenable bash command history -set -o history -# Start cartography dependencies -docker-compose up -d -# Run cartography -docker-compose run -e GITHUB_KEY -e NEO4j_PASSWORD cartography cartography --github-config-env-var GITHUB_KEY --neo4j-uri bolt://neo4j:7687 --neo4j-password-env-var NEO4j_PASSWORD --neo4j-user neo4j -``` diff --git a/docs/root/images/docker-compose-run.png b/docs/root/images/docker-compose-run.png new file mode 100644 index 0000000000..aaf0daba46 Binary files /dev/null and b/docs/root/images/docker-compose-run.png differ diff --git a/docs/root/images/dockercompose-flow.png b/docs/root/images/dockercompose-flow.png new file mode 100644 index 0000000000..d1e30f9d84 Binary files /dev/null and b/docs/root/images/dockercompose-flow.png differ diff --git a/docs/root/images/dockercompose-result.png b/docs/root/images/dockercompose-result.png new file mode 100644 index 0000000000..ef063bbe38 Binary files /dev/null and b/docs/root/images/dockercompose-result.png differ diff --git a/docs/root/images/nativeinstall-run.png b/docs/root/images/nativeinstall-run.png new file mode 100644 index 0000000000..93aa57b488 Binary files /dev/null and b/docs/root/images/nativeinstall-run.png differ diff --git a/docs/root/images/yourowntestmachine.png b/docs/root/images/yourowntestmachine.png new file mode 100644 index 0000000000..55a90a19cb Binary files /dev/null and b/docs/root/images/yourowntestmachine.png differ diff --git a/docs/root/install.md b/docs/root/install.md index 5703f52ca0..7ada5be2d9 100644 --- a/docs/root/install.md +++ b/docs/root/install.md @@ -1,28 +1,181 @@ -# Cartography Installation On Test Machine +# Install and Run Cartography On Test Machine .. _cartography-installation: -Time to set up a test machine to run Cartography. Cartography _should_ work on both Linux and Windows, but bear in mind we've only tested it on Linux so far. +Time to set up a test machine to run Cartography. -1. Ensure that you have Python 3.10 set up on your machine. +## Option 1: Run docker-compose (preferred) - - Older or newer versions of Python may work but are not explicitly supported. You will probably have more luck with newer versions. +This is the quickest way to get started (assuming docker does what it's supposed to do). -1. **Run the Neo4j graph database version 4.4.\*** or higher on your server. 4.3 and lower will _not_ work. +![dockercompose-flow.png](images/dockercompose-flow.png) - ⚠️ Neo4j 5.x will probably work since it's included in our test suite, but we do not explicitly support it yet. +1. **Start up the Neo4j graph database.** + + ```bash + docker-compose up -d + ``` + + If this command errors out with permission problems, you may need to configure your Docker Desktop/Rancher Desktop/etc to use a different virtual machine volume setting: try virtiofs on Mac. + + It may take a minute for the Neo4j container to spin up. + +1. **Configure and run Cartography.** + + In this example we will run Cartography on [AWS](https://lyft.github.io/cartography/modules/aws/config.html) with a profile called "1234_testprofile" and default region set to "us-east-1". + + ```bash + docker-compose run \ + -e AWS_PROFILE=1234_testprofile \ + -e AWS_DEFAULT_REGION=us-east-1 \ + cartography --neo4j-uri bolt://cartography-neo4j-1:7687 + ``` + + If you get a connection error like `ValueError: Cannot resolve address cartography-neo4j-1:7687`, you may need to wait a bit for the Neo4j container to be ready. Run `docker ps` periodically to check on it and then retry the `docker-compose run ..` command. + + You will know it works when your terminal shows log messages displaying how many assets are being loaded to the graph: + + ![docker-compose-run.png](images/docker-compose-run.png) + + **Notes:** + - You can view a full list of Cartography's CLI arguments by running `docker-compose run cartography --help`. + + - Also see the configuration section of [each relevant intel module](https://lyft.github.io/cartography/modules) to set up each data source. This generally involves specifying environment variables to cartography, or making a config/credential file on the host available to the container. + + - You can pass in environment variables to the cartography container using the docker-compose format like this: `-e VARIABLE1 -e VARIABLE2=value2`. + - You can make files available to the cartography container by editing the volumes in the docker-compose.yml file. See docker-compose documentation on how to do that. + + - `cartography-neo4j-1` is how the Cartography docker container knows how to reach the Neo4j container in docker-compose. + + - AWS things + + - `AWS_DEFAULT_REGION` must be specified. + - The docker-compose.yml maps in `~/.aws/` on your host machine to `/var/cartography/.aws` in the cartography container so that the container has access to AWS profile and credential files. + +1. **View the graph.** + + You can view the graph while it is still syncing by visiting http://localhost:7474. Try a query like + + ```cypher + match (i:AWSRole)--(c:AWSAccount) return * + ``` + + It should look like this: + + ![dockercompose-result.png](images/dockercompose-result.png) + +1. **Optional**: If you want to configure the Neo4j container itself, you can do this via the `.compose` directory, which is +git ignored. neo4j config, logs, etc are all located at `.compose/neo4j/...` + +1. **Optional**: You can supply additional environment variables via `docker-compose` like this: + ```bash + # Temporarily disable bash command history + set +o history + # See the cartography github configuration intel module docs + export GITHUB_KEY=BASE64ENCODEDKEY + # You need to set this after starting neo4j once, and resetting + # the default neo4j password, which is neo4j + export NEO4j_PASSWORD=... + # Reenable bash command history + set -o history + # Start cartography dependencies + docker-compose up -d + # Run cartography + docker-compose run -e GITHUB_KEY -e NEO4j_PASSWORD cartography cartography --github-config-env-var GITHUB_KEY --neo4j-uri bolt://neo4j:7687 --neo4j-password-env-var NEO4j_PASSWORD --neo4j-user neo4j + ``` + + +Read on to see [other things you can do with Cartography](#things-to-do-next). + +## Option 2: manually run 2 containers + +1. **Run the Neo4j graph database container.** + + ```bash + # Create a docker network so that cartography can talk to neo4j + docker network create cartography-network + + # run the Neo4j graph database + docker run \ + --publish=7474:7474 \ + --publish=7687:7687 \ + --network cartography-network \ + -v data:/data \ + --name cartography-neo4j \ + --env=NEO4J_AUTH=none \ + neo4j:4.4-community + ``` - 1. If you prefer **Docker** (recommended), run `docker run --publish=7474:7474 --publish=7687:7687 -v data:/data --env=NEO4J_AUTH=none neo4j:4.4-community` to spin up a Neo4j container. Refer to the Neo4j Docker [official docs](https://github.com/neo4j/docker-neo4j) for more information. + - Refer to the Neo4j Docker [official docs](https://github.com/neo4j/docker-neo4j) for more information. - - Note that we are just playing around here on a test instance and have specified `--env=NEO4J_AUTH=none` to turn off authentication. + - Note that we are just playing around here on a test instance and have specified `--env=NEO4J_AUTH=none` to turn off authentication. - - If you experience very slow write performance using an ARM-based machine like an M1 Mac, you should use an ARM image. Neo4j keeps ARM builds [here](https://hub.docker.com/r/arm64v8/neo4j/). + - If you experience very slow write performance using an ARM-based machine like an M1 Mac, see if using an ARM image helps. Neo4j keeps ARM builds [here](https://hub.docker.com/r/arm64v8/neo4j/). - 1. Else if you prefer a **manual install**, +1. **Configure and run Cartography.** - 1. Neo4j requires a JVM (JDK/JRE 11 or higher) to be installed. One option is to install [Amazon Coretto 11](https://docs.aws.amazon.com/corretto/latest/corretto-11-ug/what-is-corretto-11.html). + See the configuration section of [each relevant intel module](https://lyft.github.io/cartography/modules) to set up each data source. In this example we will use [AWS](https://lyft.github.io/cartography/modules/aws/config.html). - ⚠️ Make sure you have `JAVA_HOME` environment variable set. The following works for Mac OS: `export JAVA_HOME=$(/usr/libexec/java_home)` + This command runs cartography on an AWS profile called "1234_testprofile" on region us-east-1. We also expose the host machine's ~/.aws directory to ~/var/cartography/.aws in the container so that AWS configs work. + + ```bash + docker run --rm \ + --network cartography-network \ + -v ~/.aws:/var/cartography/.aws/ \ + -e AWS_PROFILE=1234_testprofile \ + -e AWS_DEFAULT_REGION=us-east-1 \ + lyft/cartography --neo4j-uri bolt://cartography-neo4j:7687 + ``` + + If things work, your terminal will look like this where you see log messages displaying how many assets are being loaded to the graph: + + ![docker-compose-run.png](images/docker-compose-run.png) + + ### Notes: + + - You pass in environment variables to the cartography container using the docker format like this: `-e VARIABLE1 -e VARIABLE2=value2`. + + - AWS things + + - `AWS_DEFAULT_REGION` must be specified. + - Our docker-compose.yml maps in `~/.aws/` on your host machine to `/var/cartography/.aws` in the cartography container, so the container has access to AWS profile and credential files. + - You can view a full list of Cartography's CLI arguments by running `docker run lyft/cartography --help`. + +1. **View the graph.** + + You can view the graph while it is still syncing by visiting http://localhost:7474. Try a query like + + ```cypher + match (i:AWSRole)--(c:AWSAccount) return * + ``` + + It should look like this: + + ![dockercompose-result.png](images/dockercompose-result.png) + +Read on to see [other things you can do with Cartography](#things-to-do-next). + +## Option 3: Native install + +Do this if you prefer to install and manage all the dependencies yourself. Cartography _should_ work on Linux, Mac, and Windows, but bear in mind we haven't tested much on Windows so far. + +![yourowntestmachine.png](images/yourowntestmachine.png) + +1. **Ensure that you have Python 3.10 set up on your machine.** + + Older or newer versions of Python may work but are not explicitly supported. You will probably have more luck with newer versions. + +1. **Run Neo4j graph database version 4.4 or higher. 4.3 and lower will _not_ work.** + + ⚠️ Neo4j 5.x will probably work since it's included in our test suite, but we do not explicitly support it yet. + + 1. We recommend running Neo4j as a Docker container so that you save time and don't need to install Java. Run `docker run --publish=7474:7474 --publish=7687:7687 -v data:/data --env=NEO4J_AUTH=none neo4j:4.4-community`. + + 1. Otherwise, if you prefer to **install Neo4j from scratch**, + + 1. Neo4j requires a JVM (JDK/JRE 11 or higher). One option is [Amazon Coretto 11](https://docs.aws.amazon.com/corretto/latest/corretto-11-ug/what-is-corretto-11.html). + + ⚠️ Make sure you have the `JAVA_HOME` environment variable set. The following works for Mac OS: `export JAVA_HOME=$(/usr/libexec/java_home)` 1. Go to the [Neo4j download page](https://neo4j.com/download-center/#community), and download Neo4j Community Edition 4.4.\*. @@ -30,41 +183,54 @@ Time to set up a test machine to run Cartography. Cartography _should_ work on b ⚠️ For local testing, you might want to turn off authentication via property `dbms.security.auth_enabled` in file NEO4J_PATH/conf/neo4j.conf -1. Configure your data sources. See the configuration section of [each relevant intel module](https://lyft.github.io/cartography/modules) for more details. +1. **Install cartography to the current Python virtual environment with `pip install cartography`.** + + We recommend creating a separate venv for just Cartography and its dependencies. You can read about venvs [here](https://packaging.python.org/en/latest/guides/installing-using-pip-and-virtual-environments/#create-and-use-virtual-environments), and searching on how to use tools like pyenv and pyenv-virtualenv. + +1. **Configure your data sources.** + + See the configuration section of [each relevant intel module](https://lyft.github.io/cartography/modules) for more details. In this example we will use [AWS](https://lyft.github.io/cartography/modules/aws/config.html). -1. **Get and run Cartography** +1. **Run cartography.** - 1. Run `pip install cartography` + - For a specific AWS account defined as a separate profile in your AWS config file, set the `AWS_PROFILE` environment variable, for example this command runs cartography on an AWS profile called "1234_testprofile" on region us-east-1. - - This will install cartography in the current Python virtual environment. We recommend creating a separate virtual environment for just Cartography and its dependencies. + ```bash + AWS_PROFILE=1234_testprofile AWS_DEFAULT_REGION=us-east-1 cartography --neo4j-uri bolt://localhost:7687 + ``` - 1. Finally, let's sync some data into the test graph. In this example we will use AWS. Refer to each module's [specific configuration section](https://lyft.github.io/cartography/modules) on how to set them up. + - For one account using the `default` profile defined in your AWS config file, run - - For one account using the `default` profile defined in your AWS config file, run + ```bash + cartography --neo4j-uri bolt://localhost:7687 + ``` - ``` - cartography --neo4j-uri bolt://localhost:7687 - ``` + - For more than one AWS account, run - - Or for a specific account defined as a separate profile in your AWS config file, set the `AWS_PROFILE` environment variable, for example + ```bash + AWS_CONFIG_FILE=/path/to/your/aws/config cartography --neo4j-uri bolt://localhost:7687 --aws-sync-all-profiles + ``` - ``` - AWS_PROFILE=other-profile cartography --neo4j-uri - ``` + You can view a full list of Cartography's CLI arguments by running `cartography --help`. - - For more than one AWS account, run + If everything worked, the sync will pull data from your configured accounts and ingest data to Neo4j! This process might take a long time if your account has a lot of assets. + ![nativeinstall-run.png](images/nativeinstall-run.png) - ``` - AWS_CONFIG_FILE=/path/to/your/aws/config cartography --neo4j-uri --aws-sync-all-profiles - ``` +1. **View the graph.** + You can view the graph while it is still syncing by visiting http://localhost:7474. Try a query like - You can view a full list of Cartography's CLI arguments by running `cartography --help` + ```cypher + match (i:AWSRole)--(c:AWSAccount) return * + ``` - If everything worked, the sync will pull data from your configured accounts and ingest data to Neo4j! This process might take a long time if your account has a lot of assets. + It should look like this: - If you encounter errors, review these references: - - Ensure your ~/.aws/credentials and ~/.aws/config files are set up correctly: https://docs.aws.amazon.com/cli/v1/userguide/cli-configure-files.html - - Review the various AWS environment variables: https://docs.aws.amazon.com/cli/v1/userguide/cli-configure-envvars.html - - Cartography uses the boto3 Python library to access AWS, so remember that boto3's standard order of precedence when retrieving credentials applies: https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html#configuring-credentials + ![dockercompose-result.png](images/dockercompose-result.png) - 1. Enjoy! Next set up other data providers, see our [Operations Guide](ops.html) for tips on running Cartography in production, view our [usage instructions](https://lyft.github.io/cartography/usage/tutorial.html) and [schema](https://lyft.github.io/cartography/usage/schema.html) for querying help, and think of [applications](https://lyft.github.io/cartography/usage/applications.html) to build around it. +## Things to do next +Here's some ideas to get the most out of Cartography: +- [Set up other data providers](https://lyft.github.io/cartography/modules) +- View our [Operations Guide](ops.html) for tips on running Cartography in production +- Read our [usage instructions](https://lyft.github.io/cartography/usage/tutorial.html) and [schema](https://lyft.github.io/cartography/usage/schema.html) to learn how to query the graph +- Think of [applications](https://lyft.github.io/cartography/usage/applications.html) to build around it +- Consider [writing your own Cartography custom modules](dev/writing-intel-modules.md) diff --git a/docs/root/modules/aws/config.md b/docs/root/modules/aws/config.md index 7536be1617..5ad1573170 100644 --- a/docs/root/modules/aws/config.md +++ b/docs/root/modules/aws/config.md @@ -4,6 +4,13 @@ Follow these steps to analyze AWS assets with Cartography. +In a nutshell, Cartography uses the [boto3](https://github.com/boto/boto3) library to retrieve assets from AWS and respects all settings and credentials passed to boto3. If you've used boto3 before, then you're already very familiar with setting up Cartography for AWS. + +### Very helpful references +- Ensure your ~/.aws/credentials and ~/.aws/config files are set up correctly: https://docs.aws.amazon.com/cli/v1/userguide/cli-configure-files.html +- Review the various AWS environment variables: https://docs.aws.amazon.com/cli/v1/userguide/cli-configure-envvars.html +- Refer to boto3's standard order of precedence when retrieving credentials: https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html#configuring-credentials + ### Single AWS Account Setup 1. Set up an AWS identity (user, group, or role) for Cartography to use. Ensure that this identity has the built-in AWS [SecurityAudit policy](https://docs.aws.amazon.com/IAM/latest/UserGuide/access_policies_job-functions.html#jf_security-auditor) (arn:aws:iam::aws:policy/SecurityAudit) attached. This policy grants access to read security config metadata.