-
Notifications
You must be signed in to change notification settings - Fork 23
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
43 changed files
with
1,362 additions
and
0 deletions.
There are no files selected for viewing
Empty file.
Empty file.
Empty file.
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
# the codecommit repo where the pipeline should pull its source from | ||
repoName: "aiops-to-greengrass-cs" | ||
# the branch to use | ||
branchName: "main" | ||
# S3 prefix where pipeline assets will be stored | ||
pipelineAssetsPrefix: "pipeline/labeling" | ||
# whether to use a private worteam for Labeling | ||
usePrivateWorkteamForLabeling: false | ||
# whether to use a private worteam for verifaction | ||
usePrivateWorkteamForVerification: false | ||
# maximum number of labels per labeling job | ||
maxLabelsPerLabelingJob: 200 | ||
# the arn of the private workteam for labeling (only used if usePrivateWorkteamForLabeling is true) | ||
labelingJobPrivateWorkteamArn: "arn:aws:sagemaker:eu-west-1:0000000000000:workteam/private-crowd/GT1" | ||
# the arn of the private workteam for labeling (only used if usePrivateWorkteamForLabeling is true) | ||
verificationJobPrivateWorkteamArn: "arn:aws:sagemaker:eu-west-1:0000000000000:workteam/private-crowd/GT1" | ||
# labeling pipeline schedule, triggering once a month on the 1st to keep cost to a minimum , fell free to change this | ||
labelingPipelineSchedule: "cron(0 12 1 * ? *)" | ||
# featureGroupName in SageMaker Feature Store, where features should be saved | ||
featureGroupName: "tag-quality-inspection" | ||
# modelPackageGroupName in model Registry | ||
modelPackageGroupName: "TagQualityInspectionPackageGroup" | ||
# modelPackageGroupName in model Registry | ||
modelPackageGroupDescription: "Contains models for quality inspection of metal tags" |
Empty file.
1 change: 1 addition & 0 deletions
1
...gemaker/sagemaker-groundtruth/tests/lib/assets/groundtruth/labeling_job/class_labels.json
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
{"labels": [{"label": "scratch"}]} |
28 changes: 28 additions & 0 deletions
28
...ker/sagemaker-groundtruth/tests/lib/assets/groundtruth/labeling_job/instructions.template
Large diffs are not rendered by default.
Oops, something went wrong.
1 change: 1 addition & 0 deletions
1
...s/sagemaker/sagemaker-groundtruth/tests/lib/assets/groundtruth/verification_job/data.json
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
{"document-version":"2021-05-13","labels":[{"label":"Label correct"},{"label":"Incorrect label - missed object"},{"label":"Incorrect label - bounding box not accurate enough"}]} |
38 changes: 38 additions & 0 deletions
38
...maker/sagemaker-groundtruth/tests/lib/assets/groundtruth/verification_job/template.liquid
Large diffs are not rendered by default.
Oops, something went wrong.
Binary file added
BIN
+29 KB
...mages/2021-01-28-09-50-34/bad/image-2020-04-22-11-09-36-69-cropped-left-top.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added
BIN
+30.6 KB
...s/2021-01-28-09-50-34/bad/image-2020-04-22-11-09-36-69-cropped-right-bottom.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added
BIN
+31 KB
...ages/2021-01-28-09-50-34/bad/image-2020-04-22-11-09-36-69-cropped-right-top.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added
BIN
+25.5 KB
...s/2021-01-28-09-50-34/good/image-2020-04-22-10-15-03-04-cropped-left-bottom.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added
BIN
+24.8 KB
...ages/2021-01-28-09-50-34/good/image-2020-04-22-10-15-03-04-cropped-left-top.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added
BIN
+28.6 KB
.../2021-01-28-09-50-34/good/image-2020-04-22-10-15-03-04-cropped-right-bottom.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added
BIN
+28.6 KB
...ges/2021-01-28-09-50-34/good/image-2020-04-22-10-15-03-04-cropped-right-top.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added
BIN
+25.4 KB
...s/2021-01-28-09-50-34/good/image-2020-04-22-10-15-16-00-cropped-left-bottom.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added
BIN
+24.6 KB
...ages/2021-01-28-09-50-34/good/image-2020-04-22-10-15-16-00-cropped-left-top.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added
BIN
+28.7 KB
.../2021-01-28-09-50-34/good/image-2020-04-22-10-15-16-00-cropped-right-bottom.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
237 changes: 237 additions & 0 deletions
237
modules/sagemaker/sagemaker-groundtruth/tests/lib/assets/labels/labels.csv
Large diffs are not rendered by default.
Oops, something went wrong.
Empty file.
13 changes: 13 additions & 0 deletions
13
modules/sagemaker/sagemaker-groundtruth/tests/lib/lambda/check_missing_labels/Dockerfile
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
FROM public.ecr.aws/lambda/python:3.11-x86_64 | ||
|
||
# Install the function's dependencies using file requirements.txt | ||
# from your project folder. | ||
|
||
COPY requirements.txt . | ||
RUN pip3 install -r requirements.txt --target "${LAMBDA_TASK_ROOT}" | ||
RUN mkdir -p /opt/extensions | ||
# Copy function code | ||
COPY app.py ${LAMBDA_TASK_ROOT} | ||
|
||
# Set the CMD to your handler (could also be done as a parameter override outside of the Dockerfile) | ||
CMD [ "app.handler" ] |
Empty file.
160 changes: 160 additions & 0 deletions
160
modules/sagemaker/sagemaker-groundtruth/tests/lib/lambda/check_missing_labels/app.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,160 @@ | ||
from typing import List, Dict, Any, Tuple, Optional | ||
|
||
from collections import namedtuple | ||
from botocore.exceptions import ClientError | ||
import sagemaker | ||
from urllib.parse import urlparse | ||
import boto3 | ||
import logging | ||
from sagemaker.feature_store.feature_group import FeatureGroup | ||
import os | ||
|
||
APPROVED_LABELS_QUERY = """ | ||
SELECT * | ||
FROM | ||
(SELECT *, row_number() | ||
OVER (PARTITION BY source_ref | ||
ORDER BY event_time desc, Api_Invocation_Time DESC, write_time DESC) AS row_number | ||
FROM "{table}") | ||
WHERE row_number = 1 AND status = 'APPROVED' AND NOT is_deleted | ||
""" | ||
|
||
logger = logging.getLogger() | ||
logger.setLevel(logging.INFO) | ||
|
||
# initialize clients | ||
s3 = boto3.resource("s3") | ||
s3_client = boto3.client("s3") | ||
sagemaker_session = sagemaker.Session() | ||
sagemaker_client = boto3.client("sagemaker") | ||
|
||
# initialize config from env variables | ||
LambdaConfig = namedtuple( | ||
"LambdaConfig", | ||
[ | ||
"feature_group_name", | ||
"feature_name_s3uri", | ||
"input_images_s3uri", | ||
"query_results_s3uri", | ||
], | ||
) | ||
|
||
|
||
def initialize_lambda_config() -> LambdaConfig: | ||
feature_group_name = ( | ||
os.environ["FEATURE_GROUP_NAME"] | ||
if "FEATURE_GROUP_NAME" in os.environ | ||
else "tag-quality-inspection" | ||
) | ||
feature_name_s3uri = ( | ||
os.environ["FEATURE_NAME_S3URI"] | ||
if "FEATURE_NAME_S3URI" in os.environ | ||
else "source_ref" | ||
) | ||
input_images_s3uri = ( | ||
os.environ["INPUT_IMAGES_S3URI"] | ||
if "INPUT_IMAGES_S3URI" in os.environ | ||
else "s3://aiopsbucket/pipeline/assets/images/" | ||
) | ||
query_results_s3uri = ( | ||
os.environ["QUERY_RESULTS_S3URI"] | ||
if "QUERY_RESULTS_S3URI" in os.environ | ||
else "s3://aiopsbucket/tmp/feature_store_query_results" | ||
) | ||
return LambdaConfig( | ||
feature_group_name, feature_name_s3uri, input_images_s3uri, query_results_s3uri | ||
) | ||
|
||
|
||
lambda_config = initialize_lambda_config() | ||
|
||
|
||
def handler(event: Dict[str, Any], context: Any) -> Dict[str, Any]: | ||
logger.info( | ||
f"check-missing-labels called with event {event} and lambda config {lambda_config}" | ||
) | ||
|
||
bucket, key = split_s3_url(lambda_config.input_images_s3uri) | ||
images = get_list_of_files(bucket=bucket, prefix=key, file_types=[".jpg", ".png"]) | ||
|
||
existing_labels = get_existing_labels( | ||
lambda_config.feature_group_name, lambda_config.query_results_s3uri | ||
) | ||
missing_labels = get_images_without_labels( | ||
images=images, existing_labels=existing_labels | ||
) | ||
|
||
logger.info( | ||
f"Finished check-missing-labels lambda with {len(missing_labels)} missing labels" | ||
) | ||
output = { | ||
"missing_labels_count": len(missing_labels), | ||
"missing_labels": missing_labels, | ||
} | ||
return output | ||
|
||
|
||
def split_s3_url(s3_url: str) -> Tuple[str, str]: | ||
bucket = urlparse(s3_url, allow_fragments=False).netloc | ||
key = urlparse(s3_url, allow_fragments=False).path[1:] | ||
return bucket, key | ||
|
||
|
||
def get_list_of_files( | ||
bucket: str, prefix: str, file_types: Optional[List[str]] = None | ||
) -> List[str]: | ||
logger.info(f"Getting list of files for bucket {bucket} and prefix {prefix}") | ||
filtered_files: List[str] = [] | ||
|
||
bucket_resource = s3.Bucket(bucket) | ||
files = bucket_resource.objects.filter(Prefix=prefix) | ||
|
||
for file in files: | ||
if is_allowed_file_type(file.key, file_types): | ||
filtered_files.append(f"s3://{file.bucket_name}/{file.key}") | ||
logger.info(f"Found {len(filtered_files)} images") | ||
return filtered_files | ||
|
||
|
||
def is_allowed_file_type(file: str, file_types: Optional[List[str]] = None) -> bool: | ||
allowed = False | ||
for file_type in file_types or []: | ||
if file.endswith(file_type): | ||
allowed = True | ||
return allowed | ||
|
||
|
||
def feature_group_exists(feature_group_name: str) -> bool: | ||
try: | ||
sagemaker_client.describe_feature_group(FeatureGroupName=feature_group_name) | ||
except ClientError as error: | ||
if error.response["Error"]["Code"] == "ResourceNotFound": | ||
logger.info(f"No feature group found with name {feature_group_name}") | ||
return False | ||
return True | ||
|
||
|
||
def get_existing_labels(feature_group_name: str, query_results_s3uri: str) -> Any: | ||
if not feature_group_exists(feature_group_name): | ||
return [] | ||
feature_group = FeatureGroup( | ||
name=feature_group_name, sagemaker_session=sagemaker_session | ||
) | ||
query = feature_group.athena_query() | ||
query_string = APPROVED_LABELS_QUERY.format(table=query.table_name) | ||
logger.debug(f"Running query {query_string} against FeatureGroup {feature_group}") | ||
query.run(query_string=query_string, output_location=query_results_s3uri) | ||
query.wait() | ||
df = query.as_dataframe() | ||
logger.info(f"Found {len(df[lambda_config.feature_name_s3uri].tolist())} labels") | ||
return df[lambda_config.feature_name_s3uri].tolist() | ||
|
||
|
||
def get_images_without_labels( | ||
images: List[str], existing_labels: List[str] | ||
) -> List[str]: | ||
missing_labels = [image for image in images if image not in existing_labels] | ||
logger.info( | ||
f"images: {len(images)} , existing_labels: {len(existing_labels)}, missing_labels: {len(missing_labels)}" | ||
) | ||
return missing_labels |
4 changes: 4 additions & 0 deletions
4
...es/sagemaker/sagemaker-groundtruth/tests/lib/lambda/check_missing_labels/requirements.txt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
pandas==1.5.2 | ||
scikit-learn==1.1.3 | ||
boto3==1.28.52 | ||
sagemaker==2.187.0 |
Empty file.
Oops, something went wrong.