diff --git a/clayground.py b/clayground.py deleted file mode 100644 index a73fec94..00000000 --- a/clayground.py +++ /dev/null @@ -1,132 +0,0 @@ -import lancedb -import matplotlib.pyplot as plt -import rasterio as rio -import streamlit as st -from rasterio.plot import show - -st.set_page_config(layout="wide") - - -# Get preferrred chips -def get_unique_chips(tbl): - chips = [ - {"tile": "17MNP", "idx": "0271", "year": 2023}, - {"tile": "19HGU", "idx": "0033", "year": 2018}, - {"tile": "33NVB", "idx": "0393", "year": 2020}, - {"tile": "21JVJ", "idx": "0100", "year": 2020}, - {"tile": "34KHD", "idx": "0080", "year": 2018}, - {"tile": "19JCF", "idx": "0215", "year": 2023}, - {"tile": "20HMK", "idx": "0100", "year": 2020}, - {"tile": "37MFT", "idx": "0313", "year": 2023}, - {"tile": "49KHR", "idx": "0020", "year": 2017}, - {"tile": "55LBC", "idx": "0075", "year": 2022}, - ] - - tile_filter = " OR ".join( - [ - f"(tile == '{chip['tile']}' " - f"AND idx == '{chip['idx']}') " - f"AND year == {chip['year']}" - for chip in chips - ] - ) - result = tbl.search().where(tile_filter, prefilter=True).to_pandas() - return result - - -# Load embeddings -@st.cache_resource() -def connect_to_database(): - db = lancedb.connect("nbs/embeddings") - tbl = db.open_table("clay-v001") - return tbl - - -@st.cache_resource() -def show_samples(_tbl): - df = get_unique_chips(_tbl) - # df = _tbl.head(10).to_pandas() - # sample 100 random rows - # samples = df.sample(100).to_dict("records") - samples = df.to_dict("records") - - cols = st.columns(10) - options = {} - for idx, sample in enumerate(samples): - path = sample["path"] - rgb_chip = rio.open(path).read(indexes=[3, 2, 1]) - rgb_chip = (rgb_chip - rgb_chip.min()) / (rgb_chip.max() - rgb_chip.min()) - with cols[idx % 10]: - st.caption(f"{sample['tile']}-{sample['date']}-{sample['idx']}") - show(rgb_chip) - plt.axis("off") - st.pyplot(plt) - - options[f"{sample['tile']}-{sample['idx']}"] = { - "vector": sample["vector"], - "tile": sample["tile"], - "year": sample["year"], - } - - return options - - -# Function to find similar vectors -@st.cache_data() -def find_similar_vectors(_tbl, query): - # tile, year = query["tile"], query["year"] - # filter = f"tile != '{tile}'" - result = ( - _tbl.search(query=query["vector"], vector_column_name="vector") - .metric("cosine") - # .where(filter, prefilter=True) - .limit(10) - .to_pandas() - ) - # st.dataframe(result) - cols = st.columns(10) - for idx, row in result.iterrows(): - path = row["path"] - rgb_chip = rio.open(path).read(indexes=[3, 2, 1]) - rgb_chip = (rgb_chip - rgb_chip.min()) / (rgb_chip.max() - rgb_chip.min()) - with cols[idx % 10]: - st.caption(f"{row['tile']}-{row['date']}-{row['idx']}") - show(rgb_chip) - plt.axis("off") - st.pyplot(plt) - - -# Main app -def main(): - st.title("Clayground") - - tbl = connect_to_database() - options = show_samples(tbl) - - # UI to select an embedding - with st.sidebar: - selection = st.selectbox("Select a chip", options=options.keys()) - - arithmetic = st.toggle("Arithmetic", False) - if arithmetic: - multiselect = st.multiselect( - "Select multiple chips", options=options.keys(), default=[] - ) - - submit = st.button("Submit") - - if submit and not arithmetic: - query = options[selection] - find_similar_vectors(tbl, query) - - if submit and arithmetic and len(multiselect) > 1: - st.write("Selected:", multiselect) - v1 = options[multiselect[0]] - v2 = options[multiselect[1]] - v3 = (v1["vector"] + v2["vector"]) / 2 - - find_similar_vectors(tbl, {"vector": v3}) - - -if __name__ == "__main__": - main() diff --git a/scripts/pipeline/batch/Dockerfile b/scripts/pipeline/batch/Dockerfile deleted file mode 100644 index 24e4a755..00000000 --- a/scripts/pipeline/batch/Dockerfile +++ /dev/null @@ -1,8 +0,0 @@ -# List of aws images https://github.com/aws/deep-learning-containers/blob/master/available_images.md -FROM mcr.microsoft.com/planetary-computer/python - -# Fetch-and-run setup. -# https://aws.amazon.com/blogs/compute/creating-a-simple-fetch-and-run-aws-batch-job/ -ADD fetch_and_run.sh /usr/local/bin/fetch_and_run.sh -WORKDIR /tmp -ENTRYPOINT ["/usr/local/bin/fetch_and_run.sh"] diff --git a/scripts/pipeline/batch/compute-environment.json b/scripts/pipeline/batch/compute-environment.json deleted file mode 100644 index 4e40e4c0..00000000 --- a/scripts/pipeline/batch/compute-environment.json +++ /dev/null @@ -1,42 +0,0 @@ -{ - "computeEnvironmentName": "fetch-and-run", - "computeEnvironmentArn": "arn:aws:batch:us-east-1:939659358055:compute-environment/fetch-and-run", - "ecsClusterArn": "arn:aws:ecs:us-east-1:939659358055:cluster/AWSBatch-fetch-and-run-6c59b338-7c7a-3d17-9cae-48fa8cb01fd1", - "tags": {}, - "type": "MANAGED", - "state": "ENABLED", - "status": "VALID", - "statusReason": "ComputeEnvironment Healthy", - "computeResources": { - "type": "SPOT", - "allocationStrategy": "SPOT_PRICE_CAPACITY_OPTIMIZED", - "minvCpus": 0, - "maxvCpus": 256, - "desiredvCpus": 0, - "instanceTypes": [ - "optimal" - ], - "subnets": [ - "subnet-065db57a9bbe9db39", - "subnet-071e09a3c54797241", - "subnet-02b3216f099a85184", - "subnet-082a6aa9a80b134ce", - "subnet-021cbf7ad41e521ff", - "subnet-0ea444ed7a76a64ee" - ], - "securityGroupIds": [ - "sg-076376c48cdb15a88" - ], - "instanceRole": "arn:aws:iam::939659358055:instance-profile/ecsInstanceRole", - "tags": {}, - "bidPercentage": 100, - "ec2Configuration": [ - { - "imageType": "ECS_AL2" - } - ] - }, - "serviceRole": "arn:aws:iam::939659358055:role/aws-service-role/batch.amazonaws.com/AWSServiceRoleForBatch", - "containerOrchestrationType": "ECS", - "uuid": "b105b281-f736-36de-af70-450cafda628a" -} diff --git a/scripts/pipeline/batch/fetch_and_run.sh b/scripts/pipeline/batch/fetch_and_run.sh deleted file mode 100755 index c34e5bde..00000000 --- a/scripts/pipeline/batch/fetch_and_run.sh +++ /dev/null @@ -1,128 +0,0 @@ -#!/bin/bash - -# Copyright 2013-2017 Amazon.com, Inc. or its affiliates. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"). You may not use this file except in compliance with the -# License. A copy of the License is located at -# -# http://aws.amazon.com/apache2.0/ -# -# or in the "LICENSE.txt" file accompanying this file. This file is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES -# OR CONDITIONS OF ANY KIND, express or implied. See the License for the specific language governing permissions and -# limitations under the License. - -# This script can help you download and run a script from S3 using aws-cli. -# It can also download a zip file from S3 and run a script from inside. -# See below for usage instructions. - -# https://raw.githubusercontent.com/awslabs/aws-batch-helpers/master/fetch-and-run/fetch_and_run.sh - -PATH="/srv/conda/envs/notebook/bin:/opt/conda/bin:/bin:/usr/bin:/sbin:/usr/sbin:/usr/local/bin:/usr/local/sbin" -BASENAME="${0##*/}" - -usage () { - if [ "${#@}" -ne 0 ]; then - echo "* ${*}" - echo - fi - cat < ] - - - or - - -export BATCH_FILE_TYPE="zip" -export BATCH_FILE_S3_URL="s3://my-bucket/my-zip" -${BASENAME} script-from-zip [