diff --git a/00_Miscellaneous/tfx/tfx-pipelines/01-census-data-analysis.ipynb b/00_Miscellaneous/tfx/tfx-pipelines/01-census-data-analysis.ipynb index 2214170..fd1ba8c 100644 --- a/00_Miscellaneous/tfx/tfx-pipelines/01-census-data-analysis.ipynb +++ b/00_Miscellaneous/tfx/tfx-pipelines/01-census-data-analysis.ipynb @@ -25,20 +25,21 @@ }, { "cell_type": "code", - "execution_count": 27, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "import os\n", "\n", - "DATA_DIR='../data'\n", - "TRAIN_DATA_DIR = os.path.join(DATA_DIR, 'train')\n", - "RAW_SCHEMA_DIR = '../raw_schema'" + "ROOT_DIR = '..'\n", + "DATA_DIR = ROOT_DIR + '/data'\n", + "TRAIN_DATA_DIR = DATA_DIR + '/train'\n", + "RAW_SCHEMA_DIR = ROOT_DIR + '/raw_schema'" ] }, { "cell_type": "code", - "execution_count": 28, + "execution_count": 3, "metadata": {}, "outputs": [ { @@ -46,10 +47,10 @@ "output_type": "stream", "text": [ "Copying gs://cloud-samples-data/ml-engine/census/data/adult.data.csv...\n", - "/ [1 files][ 3.8 MiB/ 3.8 MiB] \n", + "- [1 files][ 3.8 MiB/ 3.8 MiB] \n", "Operation completed over 1 objects/3.8 MiB. \n", "Copying gs://cloud-samples-data/ml-engine/census/data/adult.test.csv...\n", - "/ [1 files][ 1.9 MiB/ 1.9 MiB] \n", + "- [1 files][ 1.9 MiB/ 1.9 MiB] \n", "Operation completed over 1 objects/1.9 MiB. \n" ] } @@ -70,7 +71,7 @@ }, { "cell_type": "code", - "execution_count": 37, + "execution_count": 4, "metadata": {}, "outputs": [], "source": [ @@ -87,7 +88,7 @@ }, { "cell_type": "code", - "execution_count": 39, + "execution_count": 5, "metadata": {}, "outputs": [ { @@ -122,7 +123,7 @@ }, { "cell_type": "code", - "execution_count": 40, + "execution_count": 6, "metadata": {}, "outputs": [], "source": [ @@ -141,9 +142,37 @@ }, { "cell_type": "code", - "execution_count": 41, + "execution_count": 7, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "WARNING:root:Couldn't find python-snappy so the implementation of _TFRecordUtil._masked_crc32c is not as fast as it could be.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "WARNING:tensorflow:From /Users/khalidsalama/Technology/GoogleCloud/source-code/tfx-playground/venv/lib/python3.6/site-packages/tensorflow_data_validation/utils/stats_gen_lib.py:366: tf_record_iterator (from tensorflow.python.lib.io.tf_record) is deprecated and will be removed in a future version.\n", + "Instructions for updating:\n", + "Use eager execution and: \n", + "`tf.data.TFRecordDataset(path)`\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "WARNING:tensorflow:From /Users/khalidsalama/Technology/GoogleCloud/source-code/tfx-playground/venv/lib/python3.6/site-packages/tensorflow_data_validation/utils/stats_gen_lib.py:366: tf_record_iterator (from tensorflow.python.lib.io.tf_record) is deprecated and will be removed in a future version.\n", + "Instructions for updating:\n", + "Use eager execution and: \n", + "`tf.data.TFRecordDataset(path)`\n" + ] + } + ], "source": [ "train_stats = tfdv.generate_statistics_from_csv(\n", " data_location=TRAIN_DATA_DIR+'/*.csv', \n", @@ -157,7 +186,7 @@ }, { "cell_type": "code", - "execution_count": 42, + "execution_count": 8, "metadata": {}, "outputs": [ { @@ -166,7 +195,7 @@ "\n", " \n", - "
ExecutionResult at 0x7fb591de69e8
.execution_id2
.component\n", + "\n", + "
ExecutionResult at 0x7fb15a2c0080
.execution_id12
.component\n", + "\n", + "
Pusher at 0x7fb15a2c0828
.inputs
['model_export']\n", + "\n", + "
Channel of type 'ModelExportPath' (1 artifact) at 0x7fb1585d4f60
.type_nameModelExportPath
._artifacts
[0]\n", + "\n", + "
Artifact of type 'ModelExportPath' (uri: ../workspace/Trainer.Census_Classifier_Trainer/output/6/) at 0x7fb1585d49b0
.type_nameModelExportPath
.uri../workspace/Trainer.Census_Classifier_Trainer/output/6/
.span0
.split
['model_blessing']\n", + "\n", + "
Channel of type 'ModelBlessingPath' (1 artifact) at 0x7fb149ce8f28
.type_nameModelBlessingPath
._artifacts
[0]\n", + "\n", + "
Artifact of type 'ModelBlessingPath' (uri: ../workspace/AccuracyModelValidator.Accuracy_Model_Validator/blessing/10/) at 0x7fb149ce8710
.type_nameModelBlessingPath
.uri../workspace/AccuracyModelValidator.Accuracy_Model_Validator/blessing/10/
.span0
.split
.outputs
['model_push']\n", + "\n", + "
Channel of type 'ModelPushPath' (1 artifact) at 0x7fb15a2c0208
.type_nameModelPushPath
._artifacts
[0]\n", + "\n", + "
Artifact of type 'ModelPushPath' (uri: ../workspace/Pusher/model_push/12/) at 0x7fb15a2c0c88
.type_nameModelPushPath
.uri../workspace/Pusher/model_push/12/
.span0
.split
.exec_properties
['push_destination']{\n", + " \"filesystem\": {\n", + " \"baseDirectory\": \"../model_registry\"\n", + " }\n", + "}
['custom_config']None
.component.inputs
['model_export']\n", + "\n", + "
Channel of type 'ModelExportPath' (1 artifact) at 0x7fb1585d4f60
.type_nameModelExportPath
._artifacts
[0]\n", + "\n", + "
Artifact of type 'ModelExportPath' (uri: ../workspace/Trainer.Census_Classifier_Trainer/output/6/) at 0x7fb1585d49b0
.type_nameModelExportPath
.uri../workspace/Trainer.Census_Classifier_Trainer/output/6/
.span0
.split
['model_blessing']\n", + "\n", + "
Channel of type 'ModelBlessingPath' (1 artifact) at 0x7fb149ce8f28
.type_nameModelBlessingPath
._artifacts
[0]\n", + "\n", + "
Artifact of type 'ModelBlessingPath' (uri: ../workspace/AccuracyModelValidator.Accuracy_Model_Validator/blessing/10/) at 0x7fb149ce8710
.type_nameModelBlessingPath
.uri../workspace/AccuracyModelValidator.Accuracy_Model_Validator/blessing/10/
.span0
.split
.component.outputs
['model_push']\n", + "\n", + "
Channel of type 'ModelPushPath' (1 artifact) at 0x7fb15a2c0208
.type_nameModelPushPath
._artifacts
[0]\n", + "\n", + "
Artifact of type 'ModelPushPath' (uri: ../workspace/Pusher/model_push/12/) at 0x7fb15a2c0c88
.type_nameModelPushPath
.uri../workspace/Pusher/model_push/12/
.span0
.split
" + ], + "text/plain": [ + "ExecutionResult(\n", + " component_id: Pusher\n", + " execution_id: 12\n", + " outputs:\n", + " model_push: Channel(\n", + " type_name: ModelPushPath\n", + " artifacts: [Artifact(type_name: ModelPushPath, uri: ../workspace/Pusher/model_push/12/, split: , id: 17)]\n", + " ))" + ] + }, + "execution_count": 41, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "push_destination=tfx.proto.pusher_pb2.PushDestination(\n", + " filesystem=tfx.proto.pusher_pb2.PushDestination.Filesystem(\n", + " base_directory=serving_models_location)\n", + ")\n", + "\n", + "pusher = tfx.components.Pusher(\n", + " model=trainer.outputs['model'],\n", + " model_blessing=accuracy_model_validator.outputs['blessing'],\n", + " push_destination=push_destination\n", + ")\n", + "\n", + "context.run(pusher)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 7.2. Test the pushed model" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "../model_registry/1573813725\n", + "\n", + "MetaGraphDef with tag-set: 'serve' contains the following SignatureDefs:\n", + "\n", + "signature_def['predict']:\n", + " The given SavedModel SignatureDef contains the following input(s):\n", + " inputs['age'] tensor_info:\n", + " dtype: DT_INT64\n", + " shape: (-1)\n", + " name: Placeholder:0\n", + " inputs['capital_gain'] tensor_info:\n", + " dtype: DT_INT64\n", + " shape: (-1)\n", + " name: Placeholder_1:0\n", + " inputs['capital_loss'] tensor_info:\n", + " dtype: DT_INT64\n", + " shape: (-1)\n", + " name: Placeholder_2:0\n", + " inputs['education'] tensor_info:\n", + " dtype: DT_STRING\n", + " shape: (-1)\n", + " name: Placeholder_3:0\n", + " inputs['education_num'] tensor_info:\n", + " dtype: DT_INT64\n", + " shape: (-1)\n", + " name: Placeholder_4:0\n", + " inputs['gender'] tensor_info:\n", + " dtype: DT_STRING\n", + " shape: (-1)\n", + " name: Placeholder_5:0\n", + " inputs['hours_per_week'] tensor_info:\n", + " dtype: DT_INT64\n", + " shape: (-1)\n", + " name: Placeholder_6:0\n", + " inputs['marital_status'] tensor_info:\n", + " dtype: DT_STRING\n", + " shape: (-1)\n", + " name: Placeholder_7:0\n", + " inputs['native_country'] tensor_info:\n", + " dtype: DT_STRING\n", + " shape: (-1)\n", + " name: Placeholder_8:0\n", + " inputs['occupation'] tensor_info:\n", + " dtype: DT_STRING\n", + " shape: (-1)\n", + " name: Placeholder_9:0\n", + " inputs['race'] tensor_info:\n", + " dtype: DT_STRING\n", + " shape: (-1)\n", + " name: Placeholder_10:0\n", + " inputs['relationship'] tensor_info:\n", + " dtype: DT_STRING\n", + " shape: (-1)\n", + " name: Placeholder_11:0\n", + " inputs['workclass'] tensor_info:\n", + " dtype: DT_STRING\n", + " shape: (-1)\n", + " name: Placeholder_12:0\n", + " The given SavedModel SignatureDef contains the following output(s):\n", + " outputs['all_class_ids'] tensor_info:\n", + " dtype: DT_INT32\n", + " shape: (-1, 2)\n", + " name: dnn/head/predictions/Tile:0\n", + " outputs['all_classes'] tensor_info:\n", + " dtype: DT_STRING\n", + " shape: (-1, 2)\n", + " name: dnn/head/predictions/Tile_1:0\n", + " outputs['class_ids'] tensor_info:\n", + " dtype: DT_INT64\n", + " shape: (-1, 1)\n", + " name: dnn/head/predictions/ExpandDims:0\n", + " outputs['classes'] tensor_info:\n", + " dtype: DT_STRING\n", + " shape: (-1, 1)\n", + " name: dnn/head/predictions/hash_table_Lookup/LookupTableFindV2:0\n", + " outputs['logistic'] tensor_info:\n", + " dtype: DT_FLOAT\n", + " shape: (-1, 1)\n", + " name: dnn/head/predictions/logistic:0\n", + " outputs['logits'] tensor_info:\n", + " dtype: DT_FLOAT\n", + " shape: (-1, 1)\n", + " name: dnn/logits/BiasAdd:0\n", + " outputs['probabilities'] tensor_info:\n", + " dtype: DT_FLOAT\n", + " shape: (-1, 2)\n", + " name: dnn/head/predictions/probabilities:0\n", + " Method name is: tensorflow/serving/predict\n" + ] + } + ], + "source": [ + "latest_serving_model_path = os.path.join(serving_models_location, max(os.listdir(serving_models_location)))\n", + "print(latest_serving_model_path)\n", + "!saved_model_cli show --dir={latest_serving_model_path} --all" + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "INFO:tensorflow:Restoring parameters from ../model_registry/1573813725/variables/variables\n", + "\n", + "{'classes': array([[b' <=50K']], dtype=object), 'all_class_ids': array([[0, 1]], dtype=int32), 'all_classes': array([[b' <=50K', b' >50K']], dtype=object), 'logistic': array([[0.19016084]], dtype=float32), 'probabilities': array([[0.8098392 , 0.19016084]], dtype=float32), 'logits': array([[-1.4489654]], dtype=float32), 'class_ids': array([[0]])}\n" + ] + } + ], + "source": [ + "predictor_fn = tf.contrib.predictor.from_saved_model(\n", + " export_dir = latest_serving_model_path,\n", + " signature_def_key=\"predict\"\n", + ")\n", + "print(\"\")\n", + "\n", + "output = predictor_fn(\n", + " {\n", + " 'age': [34.0],\n", + " 'workclass': ['Private'],\n", + " 'education': ['Doctorate'],\n", + " 'education_num': [10.0],\n", + " 'marital_status': ['Married-civ-spouse'],\n", + " 'occupation': ['Prof-specialty'],\n", + " 'relationship': ['Husband'],\n", + " 'race': ['White'],\n", + " 'gender': ['Male'],\n", + " 'capital_gain': [0.0], \n", + " 'capital_loss': [0.0], \n", + " 'hours_per_week': [40.0],\n", + " 'native_country':['Egyptian']\n", + " }\n", + ")\n", + "\n", + "print(output)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 8. Querying Metadata database" + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "metadata": {}, + "outputs": [], + "source": [ + "import sqlite3\n", + "connection = sqlite3.connect(os.path.join(OUTPUT_DIR, 'metadata.sqlite'))\n", + "cursor = connection.cursor()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### List tables" + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[('Type',),\n", + " ('sqlite_sequence',),\n", + " ('TypeProperty',),\n", + " ('Artifact',),\n", + " ('ArtifactProperty',),\n", + " ('Execution',),\n", + " ('ExecutionProperty',),\n", + " ('Event',),\n", + " ('EventPath',),\n", + " ('MLMDEnv',),\n", + " ('Context',),\n", + " ('ContextProperty',),\n", + " ('Association',),\n", + " ('Attribution',)]\n" + ] + } + ], + "source": [ + "cursor.execute(\"SELECT name FROM sqlite_master WHERE type='table';\")\n", + "pprint(cursor.fetchall())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Query Artifact table" + ] + }, + { + "cell_type": "code", + "execution_count": 46, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[(1, 1, '../data/train'),\n", + " (2,\n", + " 4,\n", + " '../workspace/CsvExampleGen.Data_Extraction_Spliting/examples/1/train/'),\n", + " (3, 4, '../workspace/CsvExampleGen.Data_Extraction_Spliting/examples/1/eval/'),\n", + " (4, 6, '../workspace/StatisticsGen.Statistics_Generation/output/2/train/'),\n", + " (5, 6, '../workspace/StatisticsGen.Statistics_Generation/output/2/eval/'),\n", + " (6, 7, '../raw_schema'),\n", + " (7, 10, '../workspace/ExampleValidator.Data_Validation/output/4/'),\n", + " (8, 12, '../workspace/Transform.Data_Transformation/transform_output/5/'),\n", + " (9,\n", + " 4,\n", + " '../workspace/Transform.Data_Transformation/transformed_examples/5/train/'),\n", + " (10,\n", + " 4,\n", + " '../workspace/Transform.Data_Transformation/transformed_examples/5/eval/'),\n", + " (11, 14, '../workspace/Trainer.Census_Classifier_Trainer/output/6/'),\n", + " (12, 16, '../workspace/Evaluator.Occupation_based_Evaluator/output/7/'),\n", + " (13, 18, '../workspace/ModelValidator.Base_Model_Validator/blessing/8/'),\n", + " (14,\n", + " 18,\n", + " '../workspace/AccuracyModelValidator.Accuracy_Model_Validator/blessing/9/'),\n", + " (15,\n", " 18,\n", - " '../workspace/AccuracyModelValidator.Accuracy_Model_Validator/blessing/37/')]\n" + " '../workspace/AccuracyModelValidator.Accuracy_Model_Validator/blessing/10/'),\n", + " (16, 21, '../workspace/Pusher/model_push/11/'),\n", + " (17, 21, '../workspace/Pusher/model_push/12/')]\n" ] } ],